Merge branch 'for-4.7/dax' into libnvdimm-for-next
This commit is contained in:
Коммит
36092ee8ba
|
@ -407,35 +407,6 @@ static inline int is_unrecognized_ioctl(int ret)
|
|||
ret == -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
bool blkdev_dax_capable(struct block_device *bdev)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
|
||||
if (!disk->fops->direct_access)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the partition is not aligned on a page boundary, we can't
|
||||
* do dax I/O to it.
|
||||
*/
|
||||
if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
|
||||
|| (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the device has known bad blocks, force all I/O through the
|
||||
* driver / page cache.
|
||||
*
|
||||
* TODO: support finer grained dax error handling
|
||||
*/
|
||||
if (disk->bb && disk->bb->count)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
|
||||
unsigned cmd, unsigned long arg)
|
||||
{
|
||||
|
@ -598,9 +569,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
|
|||
case BLKTRACESETUP:
|
||||
case BLKTRACETEARDOWN:
|
||||
return blk_trace_ioctl(bdev, cmd, argp);
|
||||
case BLKDAXGET:
|
||||
return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
|
||||
break;
|
||||
case IOC_PR_REGISTER:
|
||||
return blkdev_pr_register(bdev, argp);
|
||||
case IOC_PR_RESERVE:
|
||||
|
|
|
@ -190,6 +190,8 @@ source "drivers/android/Kconfig"
|
|||
|
||||
source "drivers/nvdimm/Kconfig"
|
||||
|
||||
source "drivers/dax/Kconfig"
|
||||
|
||||
source "drivers/nvmem/Kconfig"
|
||||
|
||||
source "drivers/hwtracing/stm/Kconfig"
|
||||
|
|
|
@ -66,6 +66,7 @@ obj-$(CONFIG_PARPORT) += parport/
|
|||
obj-$(CONFIG_NVM) += lightnvm/
|
||||
obj-y += base/ block/ misc/ mfd/ nfc/
|
||||
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
|
||||
obj-$(CONFIG_DEV_DAX) += dax/
|
||||
obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
|
||||
obj-$(CONFIG_NUBUS) += nubus/
|
||||
obj-y += macintosh/
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
menuconfig DEV_DAX
|
||||
tristate "DAX: direct access to differentiated memory"
|
||||
default m if NVDIMM_DAX
|
||||
depends on TRANSPARENT_HUGEPAGE
|
||||
help
|
||||
Support raw access to differentiated (persistence, bandwidth,
|
||||
latency...) memory via an mmap(2) capable character
|
||||
device. Platform firmware or a device driver may identify a
|
||||
platform memory resource that is differentiated from the
|
||||
baseline memory pool. Mappings of a /dev/daxX.Y device impose
|
||||
restrictions that make the mapping behavior deterministic.
|
||||
|
||||
if DEV_DAX
|
||||
|
||||
config DEV_DAX_PMEM
|
||||
tristate "PMEM DAX: direct access to persistent memory"
|
||||
depends on NVDIMM_DAX
|
||||
default DEV_DAX
|
||||
help
|
||||
Support raw access to persistent memory. Note that this
|
||||
driver consumes memory ranges allocated and exported by the
|
||||
libnvdimm sub-system.
|
||||
|
||||
Say Y if unsure
|
||||
|
||||
endif
|
|
@ -0,0 +1,4 @@
|
|||
obj-$(CONFIG_DEV_DAX) += dax.o
|
||||
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
|
||||
|
||||
dax_pmem-y := pmem.o
|
|
@ -0,0 +1,575 @@
|
|||
/*
|
||||
* Copyright(c) 2016 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/pfn_t.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
static int dax_major;
|
||||
static struct class *dax_class;
|
||||
static DEFINE_IDA(dax_minor_ida);
|
||||
|
||||
/**
|
||||
* struct dax_region - mapping infrastructure for dax devices
|
||||
* @id: kernel-wide unique region for a memory range
|
||||
* @base: linear address corresponding to @res
|
||||
* @kref: to pin while other agents have a need to do lookups
|
||||
* @dev: parent device backing this region
|
||||
* @align: allocation and mapping alignment for child dax devices
|
||||
* @res: physical address range of the region
|
||||
* @pfn_flags: identify whether the pfns are paged back or not
|
||||
*/
|
||||
struct dax_region {
|
||||
int id;
|
||||
struct ida ida;
|
||||
void *base;
|
||||
struct kref kref;
|
||||
struct device *dev;
|
||||
unsigned int align;
|
||||
struct resource res;
|
||||
unsigned long pfn_flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct dax_dev - subdivision of a dax region
|
||||
* @region - parent region
|
||||
* @dev - device backing the character device
|
||||
* @kref - enable this data to be tracked in filp->private_data
|
||||
* @alive - !alive + rcu grace period == no new mappings can be established
|
||||
* @id - child id in the region
|
||||
* @num_resources - number of physical address extents in this device
|
||||
* @res - array of physical address ranges
|
||||
*/
|
||||
struct dax_dev {
|
||||
struct dax_region *region;
|
||||
struct device *dev;
|
||||
struct kref kref;
|
||||
bool alive;
|
||||
int id;
|
||||
int num_resources;
|
||||
struct resource res[0];
|
||||
};
|
||||
|
||||
static void dax_region_free(struct kref *kref)
|
||||
{
|
||||
struct dax_region *dax_region;
|
||||
|
||||
dax_region = container_of(kref, struct dax_region, kref);
|
||||
kfree(dax_region);
|
||||
}
|
||||
|
||||
void dax_region_put(struct dax_region *dax_region)
|
||||
{
|
||||
kref_put(&dax_region->kref, dax_region_free);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_region_put);
|
||||
|
||||
static void dax_dev_free(struct kref *kref)
|
||||
{
|
||||
struct dax_dev *dax_dev;
|
||||
|
||||
dax_dev = container_of(kref, struct dax_dev, kref);
|
||||
dax_region_put(dax_dev->region);
|
||||
kfree(dax_dev);
|
||||
}
|
||||
|
||||
static void dax_dev_put(struct dax_dev *dax_dev)
|
||||
{
|
||||
kref_put(&dax_dev->kref, dax_dev_free);
|
||||
}
|
||||
|
||||
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
|
||||
struct resource *res, unsigned int align, void *addr,
|
||||
unsigned long pfn_flags)
|
||||
{
|
||||
struct dax_region *dax_region;
|
||||
|
||||
dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
|
||||
|
||||
if (!dax_region)
|
||||
return NULL;
|
||||
|
||||
memcpy(&dax_region->res, res, sizeof(*res));
|
||||
dax_region->pfn_flags = pfn_flags;
|
||||
kref_init(&dax_region->kref);
|
||||
dax_region->id = region_id;
|
||||
ida_init(&dax_region->ida);
|
||||
dax_region->align = align;
|
||||
dax_region->dev = parent;
|
||||
dax_region->base = addr;
|
||||
|
||||
return dax_region;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alloc_dax_region);
|
||||
|
||||
static ssize_t size_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct dax_dev *dax_dev = dev_get_drvdata(dev);
|
||||
unsigned long long size = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dax_dev->num_resources; i++)
|
||||
size += resource_size(&dax_dev->res[i]);
|
||||
|
||||
return sprintf(buf, "%llu\n", size);
|
||||
}
|
||||
static DEVICE_ATTR_RO(size);
|
||||
|
||||
static struct attribute *dax_device_attributes[] = {
|
||||
&dev_attr_size.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group dax_device_attribute_group = {
|
||||
.attrs = dax_device_attributes,
|
||||
};
|
||||
|
||||
static const struct attribute_group *dax_attribute_groups[] = {
|
||||
&dax_device_attribute_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static void unregister_dax_dev(void *_dev)
|
||||
{
|
||||
struct device *dev = _dev;
|
||||
struct dax_dev *dax_dev = dev_get_drvdata(dev);
|
||||
struct dax_region *dax_region = dax_dev->region;
|
||||
|
||||
dev_dbg(dev, "%s\n", __func__);
|
||||
|
||||
/*
|
||||
* Note, rcu is not protecting the liveness of dax_dev, rcu is
|
||||
* ensuring that any fault handlers that might have seen
|
||||
* dax_dev->alive == true, have completed. Any fault handlers
|
||||
* that start after synchronize_rcu() has started will abort
|
||||
* upon seeing dax_dev->alive == false.
|
||||
*/
|
||||
dax_dev->alive = false;
|
||||
synchronize_rcu();
|
||||
|
||||
get_device(dev);
|
||||
device_unregister(dev);
|
||||
ida_simple_remove(&dax_region->ida, dax_dev->id);
|
||||
ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
|
||||
put_device(dev);
|
||||
dax_dev_put(dax_dev);
|
||||
}
|
||||
|
||||
int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
|
||||
int count)
|
||||
{
|
||||
struct device *parent = dax_region->dev;
|
||||
struct dax_dev *dax_dev;
|
||||
struct device *dev;
|
||||
int rc, minor;
|
||||
dev_t dev_t;
|
||||
|
||||
dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL);
|
||||
if (!dax_dev)
|
||||
return -ENOMEM;
|
||||
memcpy(dax_dev->res, res, sizeof(*res) * count);
|
||||
dax_dev->num_resources = count;
|
||||
kref_init(&dax_dev->kref);
|
||||
dax_dev->alive = true;
|
||||
dax_dev->region = dax_region;
|
||||
kref_get(&dax_region->kref);
|
||||
|
||||
dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
|
||||
if (dax_dev->id < 0) {
|
||||
rc = dax_dev->id;
|
||||
goto err_id;
|
||||
}
|
||||
|
||||
minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL);
|
||||
if (minor < 0) {
|
||||
rc = minor;
|
||||
goto err_minor;
|
||||
}
|
||||
|
||||
dev_t = MKDEV(dax_major, minor);
|
||||
dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev,
|
||||
dax_attribute_groups, "dax%d.%d", dax_region->id,
|
||||
dax_dev->id);
|
||||
if (IS_ERR(dev)) {
|
||||
rc = PTR_ERR(dev);
|
||||
goto err_create;
|
||||
}
|
||||
dax_dev->dev = dev;
|
||||
|
||||
rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev);
|
||||
if (rc) {
|
||||
unregister_dax_dev(dev);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_create:
|
||||
ida_simple_remove(&dax_minor_ida, minor);
|
||||
err_minor:
|
||||
ida_simple_remove(&dax_region->ida, dax_dev->id);
|
||||
err_id:
|
||||
dax_dev_put(dax_dev);
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(devm_create_dax_dev);
|
||||
|
||||
/* return an unmapped area aligned to the dax region specified alignment */
|
||||
static unsigned long dax_dev_get_unmapped_area(struct file *filp,
|
||||
unsigned long addr, unsigned long len, unsigned long pgoff,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned long off, off_end, off_align, len_align, addr_align, align;
|
||||
struct dax_dev *dax_dev = filp ? filp->private_data : NULL;
|
||||
struct dax_region *dax_region;
|
||||
|
||||
if (!dax_dev || addr)
|
||||
goto out;
|
||||
|
||||
dax_region = dax_dev->region;
|
||||
align = dax_region->align;
|
||||
off = pgoff << PAGE_SHIFT;
|
||||
off_end = off + len;
|
||||
off_align = round_up(off, align);
|
||||
|
||||
if ((off_end <= off_align) || ((off_end - off_align) < align))
|
||||
goto out;
|
||||
|
||||
len_align = len + align;
|
||||
if ((off + len_align) < off)
|
||||
goto out;
|
||||
|
||||
addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
|
||||
pgoff, flags);
|
||||
if (!IS_ERR_VALUE(addr_align)) {
|
||||
addr_align += (off - addr_align) & (align - 1);
|
||||
return addr_align;
|
||||
}
|
||||
out:
|
||||
return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
|
||||
}
|
||||
|
||||
static int __match_devt(struct device *dev, const void *data)
|
||||
{
|
||||
const dev_t *devt = data;
|
||||
|
||||
return dev->devt == *devt;
|
||||
}
|
||||
|
||||
static struct device *dax_dev_find(dev_t dev_t)
|
||||
{
|
||||
return class_find_device(dax_class, NULL, &dev_t, __match_devt);
|
||||
}
|
||||
|
||||
static int dax_dev_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct dax_dev *dax_dev = NULL;
|
||||
struct device *dev;
|
||||
|
||||
dev = dax_dev_find(inode->i_rdev);
|
||||
if (!dev)
|
||||
return -ENXIO;
|
||||
|
||||
device_lock(dev);
|
||||
dax_dev = dev_get_drvdata(dev);
|
||||
if (dax_dev) {
|
||||
dev_dbg(dev, "%s\n", __func__);
|
||||
filp->private_data = dax_dev;
|
||||
kref_get(&dax_dev->kref);
|
||||
inode->i_flags = S_DAX;
|
||||
}
|
||||
device_unlock(dev);
|
||||
|
||||
if (!dax_dev) {
|
||||
put_device(dev);
|
||||
return -ENXIO;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dax_dev_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct dax_dev *dax_dev = filp->private_data;
|
||||
struct device *dev = dax_dev->dev;
|
||||
|
||||
dev_dbg(dax_dev->dev, "%s\n", __func__);
|
||||
dax_dev_put(dax_dev);
|
||||
put_device(dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma,
|
||||
const char *func)
|
||||
{
|
||||
struct dax_region *dax_region = dax_dev->region;
|
||||
struct device *dev = dax_dev->dev;
|
||||
unsigned long mask;
|
||||
|
||||
if (!dax_dev->alive)
|
||||
return -ENXIO;
|
||||
|
||||
/* prevent private / writable mappings from being established */
|
||||
if ((vma->vm_flags & (VM_NORESERVE|VM_SHARED|VM_WRITE)) == VM_WRITE) {
|
||||
dev_info(dev, "%s: %s: fail, attempted private mapping\n",
|
||||
current->comm, func);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mask = dax_region->align - 1;
|
||||
if (vma->vm_start & mask || vma->vm_end & mask) {
|
||||
dev_info(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
|
||||
current->comm, func, vma->vm_start, vma->vm_end,
|
||||
mask);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
|
||||
&& (vma->vm_flags & VM_DONTCOPY) == 0) {
|
||||
dev_info(dev, "%s: %s: fail, dax range requires MADV_DONTFORK\n",
|
||||
current->comm, func);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!vma_is_dax(vma)) {
|
||||
dev_info(dev, "%s: %s: fail, vma is not DAX capable\n",
|
||||
current->comm, func);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
|
||||
unsigned long size)
|
||||
{
|
||||
struct resource *res;
|
||||
phys_addr_t phys;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dax_dev->num_resources; i++) {
|
||||
res = &dax_dev->res[i];
|
||||
phys = pgoff * PAGE_SIZE + res->start;
|
||||
if (phys >= res->start && phys <= res->end)
|
||||
break;
|
||||
pgoff -= PHYS_PFN(resource_size(res));
|
||||
}
|
||||
|
||||
if (i < dax_dev->num_resources) {
|
||||
res = &dax_dev->res[i];
|
||||
if (phys + size - 1 <= res->end)
|
||||
return phys;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
unsigned long vaddr = (unsigned long) vmf->virtual_address;
|
||||
struct device *dev = dax_dev->dev;
|
||||
struct dax_region *dax_region;
|
||||
int rc = VM_FAULT_SIGBUS;
|
||||
phys_addr_t phys;
|
||||
pfn_t pfn;
|
||||
|
||||
if (check_vma(dax_dev, vma, __func__))
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
dax_region = dax_dev->region;
|
||||
if (dax_region->align > PAGE_SIZE) {
|
||||
dev_dbg(dev, "%s: alignment > fault size\n", __func__);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE);
|
||||
if (phys == -1) {
|
||||
dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
|
||||
vmf->pgoff);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
|
||||
|
||||
rc = vm_insert_mixed(vma, vaddr, pfn);
|
||||
|
||||
if (rc == -ENOMEM)
|
||||
return VM_FAULT_OOM;
|
||||
if (rc < 0 && rc != -EBUSY)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
return VM_FAULT_NOPAGE;
|
||||
}
|
||||
|
||||
static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
int rc;
|
||||
struct file *filp = vma->vm_file;
|
||||
struct dax_dev *dax_dev = filp->private_data;
|
||||
|
||||
dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
|
||||
current->comm, (vmf->flags & FAULT_FLAG_WRITE)
|
||||
? "write" : "read", vma->vm_start, vma->vm_end);
|
||||
rcu_read_lock();
|
||||
rc = __dax_dev_fault(dax_dev, vma, vmf);
|
||||
rcu_read_unlock();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
|
||||
struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd,
|
||||
unsigned int flags)
|
||||
{
|
||||
unsigned long pmd_addr = addr & PMD_MASK;
|
||||
struct device *dev = dax_dev->dev;
|
||||
struct dax_region *dax_region;
|
||||
phys_addr_t phys;
|
||||
pgoff_t pgoff;
|
||||
pfn_t pfn;
|
||||
|
||||
if (check_vma(dax_dev, vma, __func__))
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
dax_region = dax_dev->region;
|
||||
if (dax_region->align > PMD_SIZE) {
|
||||
dev_dbg(dev, "%s: alignment > fault size\n", __func__);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
/* dax pmd mappings require pfn_t_devmap() */
|
||||
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
|
||||
dev_dbg(dev, "%s: alignment > fault size\n", __func__);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
pgoff = linear_page_index(vma, pmd_addr);
|
||||
phys = pgoff_to_phys(dax_dev, pgoff, PAGE_SIZE);
|
||||
if (phys == -1) {
|
||||
dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
|
||||
pgoff);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
|
||||
|
||||
return vmf_insert_pfn_pmd(vma, addr, pmd, pfn,
|
||||
flags & FAULT_FLAG_WRITE);
|
||||
}
|
||||
|
||||
static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
||||
pmd_t *pmd, unsigned int flags)
|
||||
{
|
||||
int rc;
|
||||
struct file *filp = vma->vm_file;
|
||||
struct dax_dev *dax_dev = filp->private_data;
|
||||
|
||||
dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
|
||||
current->comm, (flags & FAULT_FLAG_WRITE)
|
||||
? "write" : "read", vma->vm_start, vma->vm_end);
|
||||
|
||||
rcu_read_lock();
|
||||
rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags);
|
||||
rcu_read_unlock();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void dax_dev_vm_open(struct vm_area_struct *vma)
|
||||
{
|
||||
struct file *filp = vma->vm_file;
|
||||
struct dax_dev *dax_dev = filp->private_data;
|
||||
|
||||
dev_dbg(dax_dev->dev, "%s\n", __func__);
|
||||
kref_get(&dax_dev->kref);
|
||||
}
|
||||
|
||||
static void dax_dev_vm_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct file *filp = vma->vm_file;
|
||||
struct dax_dev *dax_dev = filp->private_data;
|
||||
|
||||
dev_dbg(dax_dev->dev, "%s\n", __func__);
|
||||
dax_dev_put(dax_dev);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct dax_dev_vm_ops = {
|
||||
.fault = dax_dev_fault,
|
||||
.pmd_fault = dax_dev_pmd_fault,
|
||||
.open = dax_dev_vm_open,
|
||||
.close = dax_dev_vm_close,
|
||||
};
|
||||
|
||||
static int dax_dev_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct dax_dev *dax_dev = filp->private_data;
|
||||
int rc;
|
||||
|
||||
dev_dbg(dax_dev->dev, "%s\n", __func__);
|
||||
|
||||
rc = check_vma(dax_dev, vma, __func__);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
kref_get(&dax_dev->kref);
|
||||
vma->vm_ops = &dax_dev_vm_ops;
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static const struct file_operations dax_fops = {
|
||||
.llseek = noop_llseek,
|
||||
.owner = THIS_MODULE,
|
||||
.open = dax_dev_open,
|
||||
.release = dax_dev_release,
|
||||
.get_unmapped_area = dax_dev_get_unmapped_area,
|
||||
.mmap = dax_dev_mmap,
|
||||
};
|
||||
|
||||
static int __init dax_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = register_chrdev(0, "dax", &dax_fops);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
dax_major = rc;
|
||||
|
||||
dax_class = class_create(THIS_MODULE, "dax");
|
||||
if (IS_ERR(dax_class)) {
|
||||
unregister_chrdev(dax_major, "dax");
|
||||
return PTR_ERR(dax_class);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit dax_exit(void)
|
||||
{
|
||||
class_destroy(dax_class);
|
||||
unregister_chrdev(dax_major, "dax");
|
||||
ida_destroy(&dax_minor_ida);
|
||||
}
|
||||
|
||||
MODULE_AUTHOR("Intel Corporation");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
subsys_initcall(dax_init);
|
||||
module_exit(dax_exit);
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright(c) 2016 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#ifndef __DAX_H__
|
||||
#define __DAX_H__
|
||||
struct device;
|
||||
struct resource;
|
||||
struct dax_region;
|
||||
void dax_region_put(struct dax_region *dax_region);
|
||||
struct dax_region *alloc_dax_region(struct device *parent,
|
||||
int region_id, struct resource *res, unsigned int align,
|
||||
void *addr, unsigned long flags);
|
||||
int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
|
||||
int count);
|
||||
#endif /* __DAX_H__ */
|
|
@ -0,0 +1,158 @@
|
|||
/*
|
||||
* Copyright(c) 2016 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/memremap.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/pfn_t.h>
|
||||
#include "../nvdimm/pfn.h"
|
||||
#include "../nvdimm/nd.h"
|
||||
#include "dax.h"
|
||||
|
||||
struct dax_pmem {
|
||||
struct device *dev;
|
||||
struct percpu_ref ref;
|
||||
struct completion cmp;
|
||||
};
|
||||
|
||||
struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
|
||||
{
|
||||
return container_of(ref, struct dax_pmem, ref);
|
||||
}
|
||||
|
||||
static void dax_pmem_percpu_release(struct percpu_ref *ref)
|
||||
{
|
||||
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
||||
|
||||
dev_dbg(dax_pmem->dev, "%s\n", __func__);
|
||||
complete(&dax_pmem->cmp);
|
||||
}
|
||||
|
||||
static void dax_pmem_percpu_exit(void *data)
|
||||
{
|
||||
struct percpu_ref *ref = data;
|
||||
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
||||
|
||||
dev_dbg(dax_pmem->dev, "%s\n", __func__);
|
||||
percpu_ref_exit(ref);
|
||||
wait_for_completion(&dax_pmem->cmp);
|
||||
}
|
||||
|
||||
static void dax_pmem_percpu_kill(void *data)
|
||||
{
|
||||
struct percpu_ref *ref = data;
|
||||
struct dax_pmem *dax_pmem = to_dax_pmem(ref);
|
||||
|
||||
dev_dbg(dax_pmem->dev, "%s\n", __func__);
|
||||
percpu_ref_kill(ref);
|
||||
}
|
||||
|
||||
static int dax_pmem_probe(struct device *dev)
|
||||
{
|
||||
int rc;
|
||||
void *addr;
|
||||
struct resource res;
|
||||
struct nd_pfn_sb *pfn_sb;
|
||||
struct dax_pmem *dax_pmem;
|
||||
struct nd_region *nd_region;
|
||||
struct nd_namespace_io *nsio;
|
||||
struct dax_region *dax_region;
|
||||
struct nd_namespace_common *ndns;
|
||||
struct nd_dax *nd_dax = to_nd_dax(dev);
|
||||
struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
|
||||
struct vmem_altmap __altmap, *altmap = NULL;
|
||||
|
||||
ndns = nvdimm_namespace_common_probe(dev);
|
||||
if (IS_ERR(ndns))
|
||||
return PTR_ERR(ndns);
|
||||
nsio = to_nd_namespace_io(&ndns->dev);
|
||||
|
||||
/* parse the 'pfn' info block via ->rw_bytes */
|
||||
devm_nsio_enable(dev, nsio);
|
||||
altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap);
|
||||
if (IS_ERR(altmap))
|
||||
return PTR_ERR(altmap);
|
||||
devm_nsio_disable(dev, nsio);
|
||||
|
||||
pfn_sb = nd_pfn->pfn_sb;
|
||||
|
||||
if (!devm_request_mem_region(dev, nsio->res.start,
|
||||
resource_size(&nsio->res), dev_name(dev))) {
|
||||
dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
|
||||
if (!dax_pmem)
|
||||
return -ENOMEM;
|
||||
|
||||
dax_pmem->dev = dev;
|
||||
init_completion(&dax_pmem->cmp);
|
||||
rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
|
||||
GFP_KERNEL);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
|
||||
if (rc) {
|
||||
dax_pmem_percpu_exit(&dax_pmem->ref);
|
||||
return rc;
|
||||
}
|
||||
|
||||
addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
|
||||
if (IS_ERR(addr))
|
||||
return PTR_ERR(addr);
|
||||
|
||||
rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref);
|
||||
if (rc) {
|
||||
dax_pmem_percpu_kill(&dax_pmem->ref);
|
||||
return rc;
|
||||
}
|
||||
|
||||
nd_region = to_nd_region(dev->parent);
|
||||
dax_region = alloc_dax_region(dev, nd_region->id, &res,
|
||||
le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
|
||||
if (!dax_region)
|
||||
return -ENOMEM;
|
||||
|
||||
/* TODO: support for subdividing a dax region... */
|
||||
rc = devm_create_dax_dev(dax_region, &res, 1);
|
||||
|
||||
/* child dax_dev instances now own the lifetime of the dax_region */
|
||||
dax_region_put(dax_region);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static struct nd_device_driver dax_pmem_driver = {
|
||||
.probe = dax_pmem_probe,
|
||||
.drv = {
|
||||
.name = "dax_pmem",
|
||||
},
|
||||
.type = ND_DRIVER_DAX_PMEM,
|
||||
};
|
||||
|
||||
static int __init dax_pmem_init(void)
|
||||
{
|
||||
return nd_driver_register(&dax_pmem_driver);
|
||||
}
|
||||
module_init(dax_pmem_init);
|
||||
|
||||
static void __exit dax_pmem_exit(void)
|
||||
{
|
||||
driver_unregister(&dax_pmem_driver.drv);
|
||||
}
|
||||
module_exit(dax_pmem_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Intel Corporation");
|
||||
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM);
|
|
@ -124,9 +124,10 @@ static int nvdimm_bus_remove(struct device *dev)
|
|||
struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
|
||||
struct module *provider = to_bus_provider(dev);
|
||||
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
|
||||
int rc;
|
||||
int rc = 0;
|
||||
|
||||
rc = nd_drv->remove(dev);
|
||||
if (nd_drv->remove)
|
||||
rc = nd_drv->remove(dev);
|
||||
nd_region_disable(nvdimm_bus, dev);
|
||||
|
||||
dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
|
||||
|
@ -296,8 +297,8 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!nd_drv->probe || !nd_drv->remove) {
|
||||
pr_debug("->probe() and ->remove() must be specified\n");
|
||||
if (!nd_drv->probe) {
|
||||
pr_debug("%s ->probe() must be specified\n", mod_name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
|
|
@ -93,6 +93,25 @@ static bool is_idle(struct device *dev, struct nd_namespace_common *ndns)
|
|||
return true;
|
||||
}
|
||||
|
||||
struct nd_pfn *to_nd_pfn_safe(struct device *dev)
|
||||
{
|
||||
/*
|
||||
* pfn device attributes are re-used by dax device instances, so we
|
||||
* need to be careful to correct device-to-nd_pfn conversion.
|
||||
*/
|
||||
if (is_nd_pfn(dev))
|
||||
return to_nd_pfn(dev);
|
||||
|
||||
if (is_nd_dax(dev)) {
|
||||
struct nd_dax *nd_dax = to_nd_dax(dev);
|
||||
|
||||
return &nd_dax->nd_pfn;
|
||||
}
|
||||
|
||||
WARN_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void nd_detach_and_reset(struct device *dev,
|
||||
struct nd_namespace_common **_ndns)
|
||||
{
|
||||
|
@ -106,8 +125,8 @@ static void nd_detach_and_reset(struct device *dev,
|
|||
nd_btt->lbasize = 0;
|
||||
kfree(nd_btt->uuid);
|
||||
nd_btt->uuid = NULL;
|
||||
} else if (is_nd_pfn(dev)) {
|
||||
struct nd_pfn *nd_pfn = to_nd_pfn(dev);
|
||||
} else if (is_nd_pfn(dev) || is_nd_dax(dev)) {
|
||||
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
|
||||
|
||||
kfree(nd_pfn->uuid);
|
||||
nd_pfn->uuid = NULL;
|
||||
|
|
|
@ -648,6 +648,9 @@ static __exit void libnvdimm_exit(void)
|
|||
nd_region_exit();
|
||||
nvdimm_exit();
|
||||
nvdimm_bus_exit();
|
||||
nd_region_devs_exit();
|
||||
nvdimm_devs_exit();
|
||||
ida_destroy(&nd_ida);
|
||||
}
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/mm.h>
|
||||
#include "nd-core.h"
|
||||
#include "pfn.h"
|
||||
#include "nd.h"
|
||||
|
||||
static void nd_dax_release(struct device *dev)
|
||||
|
@ -97,3 +98,37 @@ struct device *nd_dax_create(struct nd_region *nd_region)
|
|||
__nd_device_register(dev);
|
||||
return dev;
|
||||
}
|
||||
|
||||
int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
|
||||
{
|
||||
int rc;
|
||||
struct nd_dax *nd_dax;
|
||||
struct device *dax_dev;
|
||||
struct nd_pfn *nd_pfn;
|
||||
struct nd_pfn_sb *pfn_sb;
|
||||
struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
|
||||
|
||||
if (ndns->force_raw)
|
||||
return -ENODEV;
|
||||
|
||||
nvdimm_bus_lock(&ndns->dev);
|
||||
nd_dax = nd_dax_alloc(nd_region);
|
||||
nd_pfn = &nd_dax->nd_pfn;
|
||||
dax_dev = nd_pfn_devinit(nd_pfn, ndns);
|
||||
nvdimm_bus_unlock(&ndns->dev);
|
||||
if (!dax_dev)
|
||||
return -ENOMEM;
|
||||
pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
|
||||
nd_pfn->pfn_sb = pfn_sb;
|
||||
rc = nd_pfn_validate(nd_pfn, DAX_SIG);
|
||||
dev_dbg(dev, "%s: dax: %s\n", __func__,
|
||||
rc == 0 ? dev_name(dax_dev) : "<none>");
|
||||
if (rc < 0) {
|
||||
__nd_detach_ndns(dax_dev, &nd_pfn->ndns);
|
||||
put_device(dax_dev);
|
||||
} else
|
||||
__nd_device_register(dax_dev);
|
||||
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL(nd_dax_probe);
|
||||
|
|
|
@ -552,3 +552,8 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
|
|||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count);
|
||||
|
||||
void __exit nvdimm_devs_exit(void)
|
||||
{
|
||||
ida_destroy(&dimm_ida);
|
||||
}
|
||||
|
|
|
@ -49,6 +49,8 @@ bool is_nd_blk(struct device *dev);
|
|||
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
|
||||
int __init nvdimm_bus_init(void);
|
||||
void nvdimm_bus_exit(void);
|
||||
void nvdimm_devs_exit(void);
|
||||
void nd_region_devs_exit(void);
|
||||
void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
|
||||
struct nd_region;
|
||||
void nd_region_create_blk_seed(struct nd_region *nd_region);
|
||||
|
@ -92,4 +94,5 @@ bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
|
|||
ssize_t nd_namespace_store(struct device *dev,
|
||||
struct nd_namespace_common **_ndns, const char *buf,
|
||||
size_t len);
|
||||
struct nd_pfn *to_nd_pfn_safe(struct device *dev);
|
||||
#endif /* __ND_CORE_H__ */
|
||||
|
|
|
@ -232,7 +232,7 @@ bool is_nd_pfn(struct device *dev);
|
|||
struct device *nd_pfn_create(struct nd_region *nd_region);
|
||||
struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
|
||||
struct nd_namespace_common *ndns);
|
||||
int nd_pfn_validate(struct nd_pfn *nd_pfn);
|
||||
int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig);
|
||||
extern struct attribute_group nd_pfn_attribute_group;
|
||||
#else
|
||||
static inline int nd_pfn_probe(struct device *dev,
|
||||
|
@ -251,7 +251,7 @@ static inline struct device *nd_pfn_create(struct nd_region *nd_region)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline int nd_pfn_validate(struct nd_pfn *nd_pfn)
|
||||
static inline int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
@ -259,9 +259,16 @@ static inline int nd_pfn_validate(struct nd_pfn *nd_pfn)
|
|||
|
||||
struct nd_dax *to_nd_dax(struct device *dev);
|
||||
#if IS_ENABLED(CONFIG_NVDIMM_DAX)
|
||||
int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns);
|
||||
bool is_nd_dax(struct device *dev);
|
||||
struct device *nd_dax_create(struct nd_region *nd_region);
|
||||
#else
|
||||
static inline int nd_dax_probe(struct device *dev,
|
||||
struct nd_namespace_common *ndns)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static inline bool is_nd_dax(struct device *dev)
|
||||
{
|
||||
return false;
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#define PFN_SIG_LEN 16
|
||||
#define PFN_SIG "NVDIMM_PFN_INFO\0"
|
||||
#define DAX_SIG "NVDIMM_DAX_INFO\0"
|
||||
|
||||
struct nd_pfn_sb {
|
||||
u8 signature[PFN_SIG_LEN];
|
||||
|
|
|
@ -54,25 +54,6 @@ struct nd_pfn *to_nd_pfn(struct device *dev)
|
|||
}
|
||||
EXPORT_SYMBOL(to_nd_pfn);
|
||||
|
||||
static struct nd_pfn *to_nd_pfn_safe(struct device *dev)
|
||||
{
|
||||
/*
|
||||
* pfn device attributes are re-used by dax device instances, so we
|
||||
* need to be careful to correct device-to-nd_pfn conversion.
|
||||
*/
|
||||
if (is_nd_pfn(dev))
|
||||
return to_nd_pfn(dev);
|
||||
|
||||
if (is_nd_dax(dev)) {
|
||||
struct nd_dax *nd_dax = to_nd_dax(dev);
|
||||
|
||||
return &nd_dax->nd_pfn;
|
||||
}
|
||||
|
||||
WARN_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static ssize_t mode_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
|
@ -360,7 +341,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
|
|||
return dev;
|
||||
}
|
||||
|
||||
int nd_pfn_validate(struct nd_pfn *nd_pfn)
|
||||
int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
|
||||
{
|
||||
u64 checksum, offset;
|
||||
struct nd_namespace_io *nsio;
|
||||
|
@ -377,7 +358,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
|
|||
if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
|
||||
return -ENXIO;
|
||||
|
||||
if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0)
|
||||
if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0)
|
||||
return -ENODEV;
|
||||
|
||||
checksum = le64_to_cpu(pfn_sb->checksum);
|
||||
|
@ -416,6 +397,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
|
|||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (nd_pfn->align == 0)
|
||||
nd_pfn->align = le32_to_cpu(pfn_sb->align);
|
||||
if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) {
|
||||
dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
|
||||
nd_pfn->align, nvdimm_namespace_capacity(ndns));
|
||||
|
@ -436,8 +419,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
|
|||
return -EBUSY;
|
||||
}
|
||||
|
||||
nd_pfn->align = le32_to_cpu(pfn_sb->align);
|
||||
if (!is_power_of_2(offset) || offset < PAGE_SIZE) {
|
||||
if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align))
|
||||
|| !IS_ALIGNED(offset, PAGE_SIZE)) {
|
||||
dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n",
|
||||
offset);
|
||||
return -ENXIO;
|
||||
|
@ -467,7 +450,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
|
|||
pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
|
||||
nd_pfn = to_nd_pfn(pfn_dev);
|
||||
nd_pfn->pfn_sb = pfn_sb;
|
||||
rc = nd_pfn_validate(nd_pfn);
|
||||
rc = nd_pfn_validate(nd_pfn, PFN_SIG);
|
||||
dev_dbg(dev, "%s: pfn: %s\n", __func__,
|
||||
rc == 0 ? dev_name(pfn_dev) : "<none>");
|
||||
if (rc < 0) {
|
||||
|
@ -552,6 +535,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
|
|||
struct nd_pfn_sb *pfn_sb;
|
||||
unsigned long npfns;
|
||||
phys_addr_t offset;
|
||||
const char *sig;
|
||||
u64 checksum;
|
||||
int rc;
|
||||
|
||||
|
@ -560,7 +544,11 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
|
|||
return -ENOMEM;
|
||||
|
||||
nd_pfn->pfn_sb = pfn_sb;
|
||||
rc = nd_pfn_validate(nd_pfn);
|
||||
if (is_nd_dax(&nd_pfn->dev))
|
||||
sig = DAX_SIG;
|
||||
else
|
||||
sig = PFN_SIG;
|
||||
rc = nd_pfn_validate(nd_pfn, sig);
|
||||
if (rc != -ENODEV)
|
||||
return rc;
|
||||
|
||||
|
@ -635,7 +623,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
|
|||
pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
|
||||
pfn_sb->dataoff = cpu_to_le64(offset);
|
||||
pfn_sb->npfns = cpu_to_le64(npfns);
|
||||
memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
|
||||
memcpy(pfn_sb->signature, sig, PFN_SIG_LEN);
|
||||
memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
|
||||
memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
|
||||
pfn_sb->version_major = cpu_to_le16(1);
|
||||
|
|
|
@ -320,7 +320,8 @@ static int nd_pmem_probe(struct device *dev)
|
|||
return pmem_attach_disk(dev, ndns);
|
||||
|
||||
/* if we find a valid info-block we'll come back as that personality */
|
||||
if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0)
|
||||
if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0
|
||||
|| nd_dax_probe(dev, ndns) == 0)
|
||||
return -ENXIO;
|
||||
|
||||
/* ...otherwise we're just a raw pmem device */
|
||||
|
|
|
@ -793,3 +793,8 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
|
|||
__func__);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
|
||||
|
||||
void __exit nd_region_devs_exit(void)
|
||||
{
|
||||
ida_destroy(®ion_ida);
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <linux/log2.h>
|
||||
#include <linux/cleancache.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/badblocks.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include "internal.h"
|
||||
|
||||
|
@ -1159,6 +1160,33 @@ void bd_set_size(struct block_device *bdev, loff_t size)
|
|||
}
|
||||
EXPORT_SYMBOL(bd_set_size);
|
||||
|
||||
static bool blkdev_dax_capable(struct block_device *bdev)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
|
||||
if (!disk->fops->direct_access || !IS_ENABLED(CONFIG_FS_DAX))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the partition is not aligned on a page boundary, we can't
|
||||
* do dax I/O to it.
|
||||
*/
|
||||
if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
|
||||
|| (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If the device has known bad blocks, force all I/O through the
|
||||
* driver / page cache.
|
||||
*
|
||||
* TODO: support finer grained dax error handling
|
||||
*/
|
||||
if (disk->bb && disk->bb->count)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
|
||||
|
||||
/*
|
||||
|
@ -1724,79 +1752,13 @@ static const struct address_space_operations def_blk_aops = {
|
|||
.is_dirty_writeback = buffer_check_dirty_writeback,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
/*
|
||||
* In the raw block case we do not need to contend with truncation nor
|
||||
* unwritten file extents. Without those concerns there is no need for
|
||||
* additional locking beyond the mmap_sem context that these routines
|
||||
* are already executing under.
|
||||
*
|
||||
* Note, there is no protection if the block device is dynamically
|
||||
* resized (partition grow/shrink) during a fault. A stable block device
|
||||
* size is already not enforced in the blkdev_direct_IO path.
|
||||
*
|
||||
* For DAX, it is the responsibility of the block device driver to
|
||||
* ensure the whole-disk device size is stable while requests are in
|
||||
* flight.
|
||||
*
|
||||
* Finally, unlike the filemap_page_mkwrite() case there is no
|
||||
* filesystem superblock to sync against freezing. We still include a
|
||||
* pfn_mkwrite callback for dax drivers to receive write fault
|
||||
* notifications.
|
||||
*/
|
||||
static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
return __dax_fault(vma, vmf, blkdev_get_block, NULL);
|
||||
}
|
||||
|
||||
static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
return dax_pfn_mkwrite(vma, vmf);
|
||||
}
|
||||
|
||||
static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
|
||||
pmd_t *pmd, unsigned int flags)
|
||||
{
|
||||
return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct blkdev_dax_vm_ops = {
|
||||
.fault = blkdev_dax_fault,
|
||||
.pmd_fault = blkdev_dax_pmd_fault,
|
||||
.pfn_mkwrite = blkdev_dax_pfn_mkwrite,
|
||||
};
|
||||
|
||||
static const struct vm_operations_struct blkdev_default_vm_ops = {
|
||||
.fault = filemap_fault,
|
||||
.map_pages = filemap_map_pages,
|
||||
};
|
||||
|
||||
static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct inode *bd_inode = bdev_file_inode(file);
|
||||
|
||||
file_accessed(file);
|
||||
if (IS_DAX(bd_inode)) {
|
||||
vma->vm_ops = &blkdev_dax_vm_ops;
|
||||
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
|
||||
} else {
|
||||
vma->vm_ops = &blkdev_default_vm_ops;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
#define blkdev_mmap generic_file_mmap
|
||||
#endif
|
||||
|
||||
const struct file_operations def_blk_fops = {
|
||||
.open = blkdev_open,
|
||||
.release = blkdev_close,
|
||||
.llseek = block_llseek,
|
||||
.read_iter = blkdev_read_iter,
|
||||
.write_iter = blkdev_write_iter,
|
||||
.mmap = blkdev_mmap,
|
||||
.mmap = generic_file_mmap,
|
||||
.fsync = blkdev_fsync,
|
||||
.unlocked_ioctl = block_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
|
|
@ -2320,14 +2320,6 @@ extern struct super_block *freeze_bdev(struct block_device *);
|
|||
extern void emergency_thaw_all(void);
|
||||
extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
|
||||
extern int fsync_bdev(struct block_device *);
|
||||
#ifdef CONFIG_FS_DAX
|
||||
extern bool blkdev_dax_capable(struct block_device *bdev);
|
||||
#else
|
||||
static inline bool blkdev_dax_capable(struct block_device *bdev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
extern struct super_block *blockdev_superblock;
|
||||
|
||||
|
|
|
@ -222,7 +222,6 @@ struct fsxattr {
|
|||
#define BLKSECDISCARD _IO(0x12,125)
|
||||
#define BLKROTATIONAL _IO(0x12,126)
|
||||
#define BLKZEROOUT _IO(0x12,127)
|
||||
#define BLKDAXGET _IO(0x12,129)
|
||||
|
||||
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
|
||||
#define FIBMAP _IO(0x00,1) /* bmap access */
|
||||
|
|
|
@ -1013,6 +1013,7 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
|
|||
insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
|
||||
return VM_FAULT_NOPAGE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
|
||||
|
||||
static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
|
||||
pmd_t *pmd)
|
||||
|
|
|
@ -624,6 +624,7 @@ pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
|
|||
{
|
||||
return vma_hugecache_offset(hstate_vma(vma), vma, address);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(linear_hugepage_index);
|
||||
|
||||
/*
|
||||
* Return the size of the pages allocated when backing a VMA. In the majority
|
||||
|
|
|
@ -16,6 +16,7 @@ ldflags-y += --wrap=phys_to_pfn_t
|
|||
DRIVERS := ../../../drivers
|
||||
NVDIMM_SRC := $(DRIVERS)/nvdimm
|
||||
ACPI_SRC := $(DRIVERS)/acpi
|
||||
DAX_SRC := $(DRIVERS)/dax
|
||||
|
||||
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
|
||||
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
|
||||
|
@ -23,6 +24,8 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o
|
|||
obj-$(CONFIG_ND_BLK) += nd_blk.o
|
||||
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
|
||||
obj-$(CONFIG_ACPI_NFIT) += nfit.o
|
||||
obj-$(CONFIG_DEV_DAX) += dax.o
|
||||
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
|
||||
|
||||
nfit-y := $(ACPI_SRC)/nfit.o
|
||||
nfit-y += config_check.o
|
||||
|
@ -39,6 +42,12 @@ nd_blk-y += config_check.o
|
|||
nd_e820-y := $(NVDIMM_SRC)/e820.o
|
||||
nd_e820-y += config_check.o
|
||||
|
||||
dax-y := $(DAX_SRC)/dax.o
|
||||
dax-y += config_check.o
|
||||
|
||||
dax_pmem-y := $(DAX_SRC)/pmem.o
|
||||
dax_pmem-y += config_check.o
|
||||
|
||||
libnvdimm-y := $(NVDIMM_SRC)/core.o
|
||||
libnvdimm-y += $(NVDIMM_SRC)/bus.o
|
||||
libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
|
||||
|
|
|
@ -12,4 +12,6 @@ void check(void)
|
|||
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
|
||||
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
|
||||
BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
|
||||
BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
|
||||
BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM));
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче