nd_blk: change aperture mapping from WC to WB

This should result in a pretty sizeable performance gain for reads.  For
rough comparison I did some simple read testing using PMEM to compare
reads of write combining (WC) mappings vs write-back (WB).  This was
done on a random lab machine.

PMEM reads from a write combining mapping:
	# dd of=/dev/null if=/dev/pmem0 bs=4096 count=100000
	100000+0 records in
	100000+0 records out
	409600000 bytes (410 MB) copied, 9.2855 s, 44.1 MB/s

PMEM reads from a write-back mapping:
	# dd of=/dev/null if=/dev/pmem0 bs=4096 count=1000000
	1000000+0 records in
	1000000+0 records out
	4096000000 bytes (4.1 GB) copied, 3.44034 s, 1.2 GB/s

To be able to safely support a write-back aperture I needed to add
support for the "read flush" _DSM flag, as outlined in the DSM spec:

http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf

This flag tells the ND BLK driver that it needs to flush the cache lines
associated with the aperture after the aperture is moved but before any
new data is read.  This ensures that any stale cache lines from the
previous contents of the aperture will be discarded from the processor
cache, and the new data will be read properly from the DIMM.  We know
that the cache lines are clean and will be discarded without any
writeback because either a) the previous aperture operation was a read,
and we never modified the contents of the aperture, or b) the previous
aperture operation was a write and we must have written back the dirtied
contents of the aperture to the DIMM before the I/O was completed.

In order to add support for the "read flush" flag I needed to add a
generic routine to invalidate cache lines, mmio_flush_range().  This is
protected by the ARCH_HAS_MMIO_FLUSH Kconfig variable, and is currently
only supported on x86.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
This commit is contained in:
Ross Zwisler 2015-08-27 13:14:20 -06:00 коммит произвёл Dan Williams
Родитель e2e05394e4
Коммит 67a3e8fe90
11 изменённых файлов: 88 добавлений и 36 удалений

Просмотреть файл

@ -28,6 +28,7 @@ config X86
select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_PMEM_API select ARCH_HAS_PMEM_API
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_SG_CHAIN select ARCH_HAS_SG_CHAIN
select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI

Просмотреть файл

@ -89,6 +89,8 @@ int set_pages_rw(struct page *page, int numpages);
void clflush_cache_range(void *addr, unsigned int size); void clflush_cache_range(void *addr, unsigned int size);
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
#ifdef CONFIG_DEBUG_RODATA #ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void); void mark_rodata_ro(void);
extern const int rodata_test_data; extern const int rodata_test_data;

Просмотреть файл

@ -248,8 +248,6 @@ static inline void flush_write_buffers(void)
#endif #endif
} }
#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
extern void native_io_delay(void); extern void native_io_delay(void);

Просмотреть файл

@ -18,6 +18,8 @@
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/special_insns.h> #include <asm/special_insns.h>
#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
#ifdef CONFIG_ARCH_HAS_PMEM_API #ifdef CONFIG_ARCH_HAS_PMEM_API
/** /**
* arch_memcpy_to_pmem - copy data to persistent memory * arch_memcpy_to_pmem - copy data to persistent memory

Просмотреть файл

@ -410,6 +410,7 @@ config ACPI_NFIT
tristate "ACPI NVDIMM Firmware Interface Table (NFIT)" tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
depends on PHYS_ADDR_T_64BIT depends on PHYS_ADDR_T_64BIT
depends on BLK_DEV depends on BLK_DEV
depends on ARCH_HAS_MMIO_FLUSH
select LIBNVDIMM select LIBNVDIMM
help help
Infrastructure to probe ACPI 6 compliant platforms for Infrastructure to probe ACPI 6 compliant platforms for

Просмотреть файл

@ -1017,7 +1017,7 @@ static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
if (mmio->num_lines) if (mmio->num_lines)
offset = to_interleave_offset(offset, mmio); offset = to_interleave_offset(offset, mmio);
return readq(mmio->base + offset); return readq(mmio->addr.base + offset);
} }
static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
@ -1042,11 +1042,11 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
if (mmio->num_lines) if (mmio->num_lines)
offset = to_interleave_offset(offset, mmio); offset = to_interleave_offset(offset, mmio);
writeq(cmd, mmio->base + offset); writeq(cmd, mmio->addr.base + offset);
wmb_blk(nfit_blk); wmb_blk(nfit_blk);
if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
readq(mmio->base + offset); readq(mmio->addr.base + offset);
} }
static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
@ -1078,11 +1078,16 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
} }
if (rw) if (rw)
memcpy_to_pmem(mmio->aperture + offset, memcpy_to_pmem(mmio->addr.aperture + offset,
iobuf + copied, c); iobuf + copied, c);
else else {
if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH)
mmio_flush_range((void __force *)
mmio->addr.aperture + offset, c);
memcpy_from_pmem(iobuf + copied, memcpy_from_pmem(iobuf + copied,
mmio->aperture + offset, c); mmio->addr.aperture + offset, c);
}
copied += c; copied += c;
len -= c; len -= c;
@ -1129,7 +1134,10 @@ static void nfit_spa_mapping_release(struct kref *kref)
WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex)); WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index); dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
iounmap(spa_map->iomem); if (spa_map->type == SPA_MAP_APERTURE)
memunmap((void __force *)spa_map->addr.aperture);
else
iounmap(spa_map->addr.base);
release_mem_region(spa->address, spa->length); release_mem_region(spa->address, spa->length);
list_del(&spa_map->list); list_del(&spa_map->list);
kfree(spa_map); kfree(spa_map);
@ -1175,7 +1183,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
spa_map = find_spa_mapping(acpi_desc, spa); spa_map = find_spa_mapping(acpi_desc, spa);
if (spa_map) { if (spa_map) {
kref_get(&spa_map->kref); kref_get(&spa_map->kref);
return spa_map->iomem; return spa_map->addr.base;
} }
spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL); spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
@ -1191,20 +1199,19 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
if (!res) if (!res)
goto err_mem; goto err_mem;
if (type == SPA_MAP_APERTURE) { spa_map->type = type;
/* if (type == SPA_MAP_APERTURE)
* TODO: memremap_pmem() support, but that requires cache spa_map->addr.aperture = (void __pmem *)memremap(start, n,
* flushing when the aperture is moved. ARCH_MEMREMAP_PMEM);
*/ else
spa_map->iomem = ioremap_wc(start, n); spa_map->addr.base = ioremap_nocache(start, n);
} else
spa_map->iomem = ioremap_nocache(start, n);
if (!spa_map->iomem)
if (!spa_map->addr.base)
goto err_map; goto err_map;
list_add_tail(&spa_map->list, &acpi_desc->spa_maps); list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
return spa_map->iomem; return spa_map->addr.base;
err_map: err_map:
release_mem_region(start, n); release_mem_region(start, n);
@ -1267,7 +1274,7 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
nfit_blk->dimm_flags = flags.flags; nfit_blk->dimm_flags = flags.flags;
else if (rc == -ENOTTY) { else if (rc == -ENOTTY) {
/* fall back to a conservative default */ /* fall back to a conservative default */
nfit_blk->dimm_flags = ND_BLK_DCR_LATCH; nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH;
rc = 0; rc = 0;
} else } else
rc = -ENXIO; rc = -ENXIO;
@ -1307,9 +1314,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
/* map block aperture memory */ /* map block aperture memory */
nfit_blk->bdw_offset = nfit_mem->bdw->offset; nfit_blk->bdw_offset = nfit_mem->bdw->offset;
mmio = &nfit_blk->mmio[BDW]; mmio = &nfit_blk->mmio[BDW];
mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
SPA_MAP_APERTURE); SPA_MAP_APERTURE);
if (!mmio->base) { if (!mmio->addr.base) {
dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
nvdimm_name(nvdimm)); nvdimm_name(nvdimm));
return -ENOMEM; return -ENOMEM;
@ -1330,9 +1337,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
nfit_blk->stat_offset = nfit_mem->dcr->status_offset; nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
mmio = &nfit_blk->mmio[DCR]; mmio = &nfit_blk->mmio[DCR];
mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
SPA_MAP_CONTROL); SPA_MAP_CONTROL);
if (!mmio->base) { if (!mmio->addr.base) {
dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
nvdimm_name(nvdimm)); nvdimm_name(nvdimm));
return -ENOMEM; return -ENOMEM;
@ -1399,7 +1406,7 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
for (i = 0; i < 2; i++) { for (i = 0; i < 2; i++) {
struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i]; struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
if (mmio->base) if (mmio->addr.base)
nfit_spa_unmap(acpi_desc, mmio->spa); nfit_spa_unmap(acpi_desc, mmio->spa);
} }
nd_blk_region_set_provider_data(ndbr, NULL); nd_blk_region_set_provider_data(ndbr, NULL);

Просмотреть файл

@ -41,6 +41,7 @@ enum nfit_uuids {
}; };
enum { enum {
ND_BLK_READ_FLUSH = 1,
ND_BLK_DCR_LATCH = 2, ND_BLK_DCR_LATCH = 2,
}; };
@ -117,12 +118,16 @@ enum nd_blk_mmio_selector {
DCR, DCR,
}; };
struct nfit_blk { struct nd_blk_addr {
struct nfit_blk_mmio {
union { union {
void __iomem *base; void __iomem *base;
void __pmem *aperture; void __pmem *aperture;
}; };
};
struct nfit_blk {
struct nfit_blk_mmio {
struct nd_blk_addr addr;
u64 size; u64 size;
u64 base_offset; u64 base_offset;
u32 line_size; u32 line_size;
@ -149,7 +154,8 @@ struct nfit_spa_mapping {
struct acpi_nfit_system_address *spa; struct acpi_nfit_system_address *spa;
struct list_head list; struct list_head list;
struct kref kref; struct kref kref;
void __iomem *iomem; enum spa_map_type type;
struct nd_blk_addr addr;
}; };
static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref) static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)

Просмотреть файл

@ -531,4 +531,7 @@ config ARCH_HAS_SG_CHAIN
config ARCH_HAS_PMEM_API config ARCH_HAS_PMEM_API
bool bool
config ARCH_HAS_MMIO_FLUSH
bool
endmenu endmenu

Просмотреть файл

@ -1,8 +1,10 @@
ldflags-y += --wrap=ioremap_wc ldflags-y += --wrap=ioremap_wc
ldflags-y += --wrap=memremap
ldflags-y += --wrap=devm_ioremap_nocache ldflags-y += --wrap=devm_ioremap_nocache
ldflags-y += --wrap=devm_memremap ldflags-y += --wrap=devm_memremap
ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=ioremap_nocache
ldflags-y += --wrap=iounmap ldflags-y += --wrap=iounmap
ldflags-y += --wrap=memunmap
ldflags-y += --wrap=__devm_request_region ldflags-y += --wrap=__devm_request_region
ldflags-y += --wrap=__request_region ldflags-y += --wrap=__request_region
ldflags-y += --wrap=__release_region ldflags-y += --wrap=__release_region

Просмотреть файл

@ -89,12 +89,25 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
nfit_res = get_nfit_res(offset); nfit_res = get_nfit_res(offset);
rcu_read_unlock(); rcu_read_unlock();
if (nfit_res) if (nfit_res)
return (void __iomem *) nfit_res->buf + offset return nfit_res->buf + offset - nfit_res->res->start;
- nfit_res->res->start;
return devm_memremap(dev, offset, size, flags); return devm_memremap(dev, offset, size, flags);
} }
EXPORT_SYMBOL(__wrap_devm_memremap); EXPORT_SYMBOL(__wrap_devm_memremap);
void *__wrap_memremap(resource_size_t offset, size_t size,
unsigned long flags)
{
struct nfit_test_resource *nfit_res;
rcu_read_lock();
nfit_res = get_nfit_res(offset);
rcu_read_unlock();
if (nfit_res)
return nfit_res->buf + offset - nfit_res->res->start;
return memremap(offset, size, flags);
}
EXPORT_SYMBOL(__wrap_memremap);
void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
{ {
return __nfit_test_ioremap(offset, size, ioremap_nocache); return __nfit_test_ioremap(offset, size, ioremap_nocache);
@ -120,6 +133,19 @@ void __wrap_iounmap(volatile void __iomem *addr)
} }
EXPORT_SYMBOL(__wrap_iounmap); EXPORT_SYMBOL(__wrap_iounmap);
void __wrap_memunmap(void *addr)
{
struct nfit_test_resource *nfit_res;
rcu_read_lock();
nfit_res = get_nfit_res((unsigned long) addr);
rcu_read_unlock();
if (nfit_res)
return;
return memunmap(addr);
}
EXPORT_SYMBOL(__wrap_memunmap);
static struct resource *nfit_test_request_region(struct device *dev, static struct resource *nfit_test_request_region(struct device *dev,
struct resource *parent, resource_size_t start, struct resource *parent, resource_size_t start,
resource_size_t n, const char *name, int flags) resource_size_t n, const char *name, int flags)

Просмотреть файл

@ -1029,9 +1029,13 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
lane = nd_region_acquire_lane(nd_region); lane = nd_region_acquire_lane(nd_region);
if (rw) if (rw)
memcpy(mmio->base + dpa, iobuf, len); memcpy(mmio->addr.base + dpa, iobuf, len);
else else {
memcpy(iobuf, mmio->base + dpa, len); memcpy(iobuf, mmio->addr.base + dpa, len);
/* give us some some coverage of the mmio_flush_range() API */
mmio_flush_range(mmio->addr.base + dpa, len);
}
nd_region_release_lane(nd_region, lane); nd_region_release_lane(nd_region, lane);
return 0; return 0;