Merge branch 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm fixes from Dan Williams: "1/ Fixes to the libnvdimm 'pfn' device that establishes a reserved area for storing a struct page array. 2/ Fixes for dax operations on a raw block device to prevent pagecache collisions with dax mappings. 3/ A fix for pfn_t usage in vm_insert_mixed that lead to a null pointer de-reference. These have received build success notification from the kbuild robot across 153 configs and pass the latest ndctl tests" * 'libnvdimm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: phys_to_pfn_t: use phys_addr_t mm: fix pfn_t to page conversion in vm_insert_mixed block: use DAX for partition table reads block: revert runtime dax control of the raw block device fs, block: force direct-I/O for dax-enabled block devices devm_memremap_pages: fix vmem_altmap lifetime + alignment handling libnvdimm, pfn: fix restoring memmap location libnvdimm: fix mode determination for e820 devices
This commit is contained in:
Коммит
29a8ea4fbe
|
@ -434,42 +434,6 @@ bool blkdev_dax_capable(struct block_device *bdev)
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
|
||||
{
|
||||
unsigned long arg;
|
||||
int rc = 0;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EACCES;
|
||||
|
||||
if (get_user(arg, (int __user *)(argp)))
|
||||
return -EFAULT;
|
||||
arg = !!arg;
|
||||
if (arg == !!(bdev->bd_inode->i_flags & S_DAX))
|
||||
return 0;
|
||||
|
||||
if (arg)
|
||||
arg = S_DAX;
|
||||
|
||||
if (arg && !blkdev_dax_capable(bdev))
|
||||
return -ENOTTY;
|
||||
|
||||
inode_lock(bdev->bd_inode);
|
||||
if (bdev->bd_map_count == 0)
|
||||
inode_set_flags(bdev->bd_inode, arg, S_DAX);
|
||||
else
|
||||
rc = -EBUSY;
|
||||
inode_unlock(bdev->bd_inode);
|
||||
return rc;
|
||||
}
|
||||
#else
|
||||
static int blkdev_daxset(struct block_device *bdev, int arg)
|
||||
{
|
||||
if (arg)
|
||||
return -ENOTTY;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
|
||||
|
@ -634,8 +598,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
|
|||
case BLKTRACESETUP:
|
||||
case BLKTRACETEARDOWN:
|
||||
return blk_trace_ioctl(bdev, cmd, argp);
|
||||
case BLKDAXSET:
|
||||
return blkdev_daxset(bdev, arg);
|
||||
case BLKDAXGET:
|
||||
return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
|
||||
break;
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <linux/kmod.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/blktrace_api.h>
|
||||
|
||||
#include "partitions/check.h"
|
||||
|
@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
|
||||
static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
|
||||
{
|
||||
struct address_space *mapping = bdev->bd_inode->i_mapping;
|
||||
|
||||
return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
|
||||
NULL);
|
||||
}
|
||||
|
||||
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
|
||||
NULL);
|
||||
/* don't populate page cache for dax capable devices */
|
||||
if (IS_DAX(bdev->bd_inode))
|
||||
page = read_dax_sector(bdev, n);
|
||||
else
|
||||
page = read_pagecache_sector(bdev, n);
|
||||
|
||||
if (!IS_ERR(page)) {
|
||||
if (PageError(page))
|
||||
goto fail;
|
||||
|
|
|
@ -1277,10 +1277,12 @@ static ssize_t mode_show(struct device *dev,
|
|||
|
||||
device_lock(dev);
|
||||
claim = ndns->claim;
|
||||
if (pmem_should_map_pages(dev) || (claim && is_nd_pfn(claim)))
|
||||
mode = "memory";
|
||||
else if (claim && is_nd_btt(claim))
|
||||
if (claim && is_nd_btt(claim))
|
||||
mode = "safe";
|
||||
else if (claim && is_nd_pfn(claim))
|
||||
mode = "memory";
|
||||
else if (!claim && pmem_should_map_pages(dev))
|
||||
mode = "memory";
|
||||
else
|
||||
mode = "raw";
|
||||
rc = sprintf(buf, "%s\n", mode);
|
||||
|
|
|
@ -301,10 +301,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
|
|||
|
||||
switch (le32_to_cpu(pfn_sb->mode)) {
|
||||
case PFN_MODE_RAM:
|
||||
break;
|
||||
case PFN_MODE_PMEM:
|
||||
/* TODO: allocate from PMEM support */
|
||||
return -ENOTTY;
|
||||
break;
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
|
|
|
@ -1736,37 +1736,13 @@ static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
|||
return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
|
||||
}
|
||||
|
||||
static void blkdev_vm_open(struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
|
||||
struct block_device *bdev = I_BDEV(bd_inode);
|
||||
|
||||
inode_lock(bd_inode);
|
||||
bdev->bd_map_count++;
|
||||
inode_unlock(bd_inode);
|
||||
}
|
||||
|
||||
static void blkdev_vm_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
|
||||
struct block_device *bdev = I_BDEV(bd_inode);
|
||||
|
||||
inode_lock(bd_inode);
|
||||
bdev->bd_map_count--;
|
||||
inode_unlock(bd_inode);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct blkdev_dax_vm_ops = {
|
||||
.open = blkdev_vm_open,
|
||||
.close = blkdev_vm_close,
|
||||
.fault = blkdev_dax_fault,
|
||||
.pmd_fault = blkdev_dax_pmd_fault,
|
||||
.pfn_mkwrite = blkdev_dax_fault,
|
||||
};
|
||||
|
||||
static const struct vm_operations_struct blkdev_default_vm_ops = {
|
||||
.open = blkdev_vm_open,
|
||||
.close = blkdev_vm_close,
|
||||
.fault = filemap_fault,
|
||||
.map_pages = filemap_map_pages,
|
||||
};
|
||||
|
@ -1774,18 +1750,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = {
|
|||
static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *bd_inode = bdev_file_inode(file);
|
||||
struct block_device *bdev = I_BDEV(bd_inode);
|
||||
|
||||
file_accessed(file);
|
||||
inode_lock(bd_inode);
|
||||
bdev->bd_map_count++;
|
||||
if (IS_DAX(bd_inode)) {
|
||||
vma->vm_ops = &blkdev_dax_vm_ops;
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
|
||||
} else {
|
||||
vma->vm_ops = &blkdev_default_vm_ops;
|
||||
}
|
||||
inode_unlock(bd_inode);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
20
fs/dax.c
20
fs/dax.c
|
@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
|
|||
blk_queue_exit(bdev->bd_queue);
|
||||
}
|
||||
|
||||
struct page *read_dax_sector(struct block_device *bdev, sector_t n)
|
||||
{
|
||||
struct page *page = alloc_pages(GFP_KERNEL, 0);
|
||||
struct blk_dax_ctl dax = {
|
||||
.size = PAGE_SIZE,
|
||||
.sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
|
||||
};
|
||||
long rc;
|
||||
|
||||
if (!page)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
rc = dax_map_atomic(bdev, &dax);
|
||||
if (rc < 0)
|
||||
return ERR_PTR(rc);
|
||||
memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
|
||||
dax_unmap_atomic(bdev, &dax);
|
||||
return page;
|
||||
}
|
||||
|
||||
/*
|
||||
* dax_clear_blocks() is called from within transaction context from XFS,
|
||||
* and hence this means the stack from this point must follow GFP_NOFS
|
||||
|
|
|
@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
|
|||
dax_iodone_t);
|
||||
int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
|
||||
dax_iodone_t);
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
struct page *read_dax_sector(struct block_device *bdev, sector_t n);
|
||||
#else
|
||||
static inline struct page *read_dax_sector(struct block_device *bdev,
|
||||
sector_t n)
|
||||
{
|
||||
return ERR_PTR(-ENXIO);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *,
|
||||
unsigned int flags, get_block_t, dax_iodone_t);
|
||||
|
|
|
@ -484,9 +484,6 @@ struct block_device {
|
|||
int bd_fsfreeze_count;
|
||||
/* Mutex for freeze */
|
||||
struct mutex bd_fsfreeze_mutex;
|
||||
#ifdef CONFIG_FS_DAX
|
||||
int bd_map_count;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -2907,7 +2904,7 @@ extern void replace_mount_options(struct super_block *sb, char *options);
|
|||
|
||||
static inline bool io_is_direct(struct file *filp)
|
||||
{
|
||||
return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp));
|
||||
return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host);
|
||||
}
|
||||
|
||||
static inline int iocb_flags(struct file *file)
|
||||
|
|
|
@ -29,7 +29,7 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
|
|||
return __pfn_to_pfn_t(pfn, 0);
|
||||
}
|
||||
|
||||
extern pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags);
|
||||
extern pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags);
|
||||
|
||||
static inline bool pfn_t_has_page(pfn_t pfn)
|
||||
{
|
||||
|
@ -48,7 +48,7 @@ static inline struct page *pfn_t_to_page(pfn_t pfn)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
|
||||
static inline phys_addr_t pfn_t_to_phys(pfn_t pfn)
|
||||
{
|
||||
return PFN_PHYS(pfn_t_to_pfn(pfn));
|
||||
}
|
||||
|
|
|
@ -222,7 +222,6 @@ struct fsxattr {
|
|||
#define BLKSECDISCARD _IO(0x12,125)
|
||||
#define BLKROTATIONAL _IO(0x12,126)
|
||||
#define BLKZEROOUT _IO(0x12,127)
|
||||
#define BLKDAXSET _IO(0x12,128)
|
||||
#define BLKDAXGET _IO(0x12,129)
|
||||
|
||||
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
|
||||
|
|
|
@ -150,7 +150,7 @@ void devm_memunmap(struct device *dev, void *addr)
|
|||
}
|
||||
EXPORT_SYMBOL(devm_memunmap);
|
||||
|
||||
pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
|
||||
pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
|
||||
{
|
||||
return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
|
||||
}
|
||||
|
@ -183,7 +183,11 @@ EXPORT_SYMBOL(put_zone_device_page);
|
|||
|
||||
static void pgmap_radix_release(struct resource *res)
|
||||
{
|
||||
resource_size_t key;
|
||||
resource_size_t key, align_start, align_size, align_end;
|
||||
|
||||
align_start = res->start & ~(SECTION_SIZE - 1);
|
||||
align_size = ALIGN(resource_size(res), SECTION_SIZE);
|
||||
align_end = align_start + align_size - 1;
|
||||
|
||||
mutex_lock(&pgmap_lock);
|
||||
for (key = res->start; key <= res->end; key += SECTION_SIZE)
|
||||
|
@ -226,12 +230,11 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
|
|||
percpu_ref_put(pgmap->ref);
|
||||
}
|
||||
|
||||
pgmap_radix_release(res);
|
||||
|
||||
/* pages are dead and unused, undo the arch mapping */
|
||||
align_start = res->start & ~(SECTION_SIZE - 1);
|
||||
align_size = ALIGN(resource_size(res), SECTION_SIZE);
|
||||
arch_remove_memory(align_start, align_size);
|
||||
pgmap_radix_release(res);
|
||||
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
|
||||
"%s: failed to free all reserved pages\n", __func__);
|
||||
}
|
||||
|
@ -267,7 +270,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
|
|||
{
|
||||
int is_ram = region_intersects(res->start, resource_size(res),
|
||||
"System RAM");
|
||||
resource_size_t key, align_start, align_size;
|
||||
resource_size_t key, align_start, align_size, align_end;
|
||||
struct dev_pagemap *pgmap;
|
||||
struct page_map *page_map;
|
||||
unsigned long pfn;
|
||||
|
@ -309,7 +312,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
|
|||
|
||||
mutex_lock(&pgmap_lock);
|
||||
error = 0;
|
||||
for (key = res->start; key <= res->end; key += SECTION_SIZE) {
|
||||
align_start = res->start & ~(SECTION_SIZE - 1);
|
||||
align_size = ALIGN(resource_size(res), SECTION_SIZE);
|
||||
align_end = align_start + align_size - 1;
|
||||
for (key = align_start; key <= align_end; key += SECTION_SIZE) {
|
||||
struct dev_pagemap *dup;
|
||||
|
||||
rcu_read_lock();
|
||||
|
@ -336,8 +342,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
|
|||
if (nid < 0)
|
||||
nid = numa_mem_id();
|
||||
|
||||
align_start = res->start & ~(SECTION_SIZE - 1);
|
||||
align_size = ALIGN(resource_size(res), SECTION_SIZE);
|
||||
error = arch_add_memory(nid, align_start, align_size, true);
|
||||
if (error)
|
||||
goto err_add_memory;
|
||||
|
|
|
@ -1591,10 +1591,15 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
|
|||
* than insert_pfn). If a zero_pfn were inserted into a VM_MIXEDMAP
|
||||
* without pte special, it would there be refcounted as a normal page.
|
||||
*/
|
||||
if (!HAVE_PTE_SPECIAL && pfn_t_valid(pfn)) {
|
||||
if (!HAVE_PTE_SPECIAL && !pfn_t_devmap(pfn) && pfn_t_valid(pfn)) {
|
||||
struct page *page;
|
||||
|
||||
page = pfn_t_to_page(pfn);
|
||||
/*
|
||||
* At this point we are committed to insert_page()
|
||||
* regardless of whether the caller specified flags that
|
||||
* result in pfn_t_has_page() == false.
|
||||
*/
|
||||
page = pfn_to_page(pfn_t_to_pfn(pfn));
|
||||
return insert_page(vma, addr, page, vma->vm_page_prot);
|
||||
}
|
||||
return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
|
||||
|
|
|
@ -113,7 +113,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
|
|||
}
|
||||
EXPORT_SYMBOL(__wrap_devm_memremap_pages);
|
||||
|
||||
pfn_t __wrap_phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
|
||||
pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
|
||||
{
|
||||
struct nfit_test_resource *nfit_res = get_nfit_res(addr);
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче