libnvdimm: namespace indices: read and validate

This on media label format [1] consists of two index blocks followed by
an array of labels.  None of these structures are ever updated in place.
A sequence number tracks the current active index and the next one to
write, while labels are written to free slots.

    +------------+
    |            |
    |  nsindex0  |
    |            |
    +------------+
    |            |
    |  nsindex1  |
    |            |
    +------------+
    |   label0   |
    +------------+
    |   label1   |
    +------------+
    |            |
     ....nslot...
    |            |
    +------------+
    |   labelN   |
    +------------+

After reading valid labels, store the dpa ranges they claim into
per-dimm resource trees.

[1]: http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf

Cc: Neil Brown <neilb@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
This commit is contained in:
Dan Williams 2015-06-09 16:09:36 -04:00
Родитель eaf961536e
Коммит 4a826c83db
7 изменённых файлов: 520 добавлений и 2 удалений

Просмотреть файл

@ -10,3 +10,4 @@ libnvdimm-y += dimm.o
libnvdimm-y += region_devs.o libnvdimm-y += region_devs.o
libnvdimm-y += region.o libnvdimm-y += region.o
libnvdimm-y += namespace_devs.o libnvdimm-y += namespace_devs.o
libnvdimm-y += label.o

Просмотреть файл

@ -18,6 +18,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/nd.h> #include <linux/nd.h>
#include "label.h"
#include "nd.h" #include "nd.h"
static void free_data(struct nvdimm_drvdata *ndd) static void free_data(struct nvdimm_drvdata *ndd)
@ -42,6 +43,11 @@ static int nvdimm_probe(struct device *dev)
return -ENOMEM; return -ENOMEM;
dev_set_drvdata(dev, ndd); dev_set_drvdata(dev, ndd);
ndd->dpa.name = dev_name(dev);
ndd->ns_current = -1;
ndd->ns_next = -1;
ndd->dpa.start = 0;
ndd->dpa.end = -1;
ndd->dev = dev; ndd->dev = dev;
rc = nvdimm_init_nsarea(ndd); rc = nvdimm_init_nsarea(ndd);
@ -54,6 +60,17 @@ static int nvdimm_probe(struct device *dev)
dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size); dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
nvdimm_bus_lock(dev);
ndd->ns_current = nd_label_validate(ndd);
ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
nd_label_copy(ndd, to_next_namespace_index(ndd),
to_current_namespace_index(ndd));
rc = nd_label_reserve_dpa(ndd);
nvdimm_bus_unlock(dev);
if (rc)
goto err;
return 0; return 0;
err: err:
@ -64,7 +81,13 @@ static int nvdimm_probe(struct device *dev)
static int nvdimm_remove(struct device *dev) static int nvdimm_remove(struct device *dev)
{ {
struct nvdimm_drvdata *ndd = dev_get_drvdata(dev); struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
struct resource *res, *_r;
nvdimm_bus_lock(dev);
dev_set_drvdata(dev, NULL);
for_each_dpa_resource_safe(ndd, res, _r)
nvdimm_free_dpa(ndd, res);
nvdimm_bus_unlock(dev);
free_data(ndd); free_data(ndd);
return 0; return 0;

Просмотреть файл

@ -92,8 +92,12 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
if (ndd->data) if (ndd->data)
return 0; return 0;
if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0) if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0
|| ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) {
dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n",
ndd->nsarea.max_xfer, ndd->nsarea.config_size);
return -ENXIO; return -ENXIO;
}
ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL); ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL);
if (!ndd->data) if (!ndd->data)
@ -243,6 +247,30 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
} }
EXPORT_SYMBOL_GPL(nvdimm_create); EXPORT_SYMBOL_GPL(nvdimm_create);
void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res)
{
WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
kfree(res->name);
__release_region(&ndd->dpa, res->start, resource_size(res));
}
struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
struct nd_label_id *label_id, resource_size_t start,
resource_size_t n)
{
char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL);
struct resource *res;
if (!name)
return NULL;
WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
res = __request_region(&ndd->dpa, start, n, name, 0);
if (!res)
kfree(name);
return res;
}
static int count_dimms(struct device *dev, void *c) static int count_dimms(struct device *dev, void *c)
{ {
int *count = c; int *count = c;

290
drivers/nvdimm/label.c Normal file
Просмотреть файл

@ -0,0 +1,290 @@
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/device.h>
#include <linux/ndctl.h>
#include <linux/io.h>
#include <linux/nd.h>
#include "nd-core.h"
#include "label.h"
#include "nd.h"
static u32 best_seq(u32 a, u32 b)
{
a &= NSINDEX_SEQ_MASK;
b &= NSINDEX_SEQ_MASK;
if (a == 0 || a == b)
return b;
else if (b == 0)
return a;
else if (nd_inc_seq(a) == b)
return b;
else
return a;
}
size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
{
u32 index_span;
if (ndd->nsindex_size)
return ndd->nsindex_size;
/*
* The minimum index space is 512 bytes, with that amount of
* index we can describe ~1400 labels which is less than a byte
* of overhead per label. Round up to a byte of overhead per
* label and determine the size of the index region. Yes, this
* starts to waste space at larger config_sizes, but it's
* unlikely we'll ever see anything but 128K.
*/
index_span = ndd->nsarea.config_size / 129;
index_span /= NSINDEX_ALIGN * 2;
ndd->nsindex_size = index_span * NSINDEX_ALIGN;
return ndd->nsindex_size;
}
int nd_label_validate(struct nvdimm_drvdata *ndd)
{
/*
* On media label format consists of two index blocks followed
* by an array of labels. None of these structures are ever
* updated in place. A sequence number tracks the current
* active index and the next one to write, while labels are
* written to free slots.
*
* +------------+
* | |
* | nsindex0 |
* | |
* +------------+
* | |
* | nsindex1 |
* | |
* +------------+
* | label0 |
* +------------+
* | label1 |
* +------------+
* | |
* ....nslot...
* | |
* +------------+
* | labelN |
* +------------+
*/
struct nd_namespace_index *nsindex[] = {
to_namespace_index(ndd, 0),
to_namespace_index(ndd, 1),
};
const int num_index = ARRAY_SIZE(nsindex);
struct device *dev = ndd->dev;
bool valid[2] = { 0 };
int i, num_valid = 0;
u32 seq;
for (i = 0; i < num_index; i++) {
u32 nslot;
u8 sig[NSINDEX_SIG_LEN];
u64 sum_save, sum, size;
memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
dev_dbg(dev, "%s: nsindex%d signature invalid\n",
__func__, i);
continue;
}
sum_save = __le64_to_cpu(nsindex[i]->checksum);
nsindex[i]->checksum = __cpu_to_le64(0);
sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
nsindex[i]->checksum = __cpu_to_le64(sum_save);
if (sum != sum_save) {
dev_dbg(dev, "%s: nsindex%d checksum invalid\n",
__func__, i);
continue;
}
seq = __le32_to_cpu(nsindex[i]->seq);
if ((seq & NSINDEX_SEQ_MASK) == 0) {
dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n",
__func__, i, seq);
continue;
}
/* sanity check the index against expected values */
if (__le64_to_cpu(nsindex[i]->myoff)
!= i * sizeof_namespace_index(ndd)) {
dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n",
__func__, i, (unsigned long long)
__le64_to_cpu(nsindex[i]->myoff));
continue;
}
if (__le64_to_cpu(nsindex[i]->otheroff)
!= (!i) * sizeof_namespace_index(ndd)) {
dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n",
__func__, i, (unsigned long long)
__le64_to_cpu(nsindex[i]->otheroff));
continue;
}
size = __le64_to_cpu(nsindex[i]->mysize);
if (size > sizeof_namespace_index(ndd)
|| size < sizeof(struct nd_namespace_index)) {
dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n",
__func__, i, size);
continue;
}
nslot = __le32_to_cpu(nsindex[i]->nslot);
if (nslot * sizeof(struct nd_namespace_label)
+ 2 * sizeof_namespace_index(ndd)
> ndd->nsarea.config_size) {
dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
__func__, i, nslot,
ndd->nsarea.config_size);
continue;
}
valid[i] = true;
num_valid++;
}
switch (num_valid) {
case 0:
break;
case 1:
for (i = 0; i < num_index; i++)
if (valid[i])
return i;
/* can't have num_valid > 0 but valid[] = { false, false } */
WARN_ON(1);
break;
default:
/* pick the best index... */
seq = best_seq(__le32_to_cpu(nsindex[0]->seq),
__le32_to_cpu(nsindex[1]->seq));
if (seq == (__le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK))
return 1;
else
return 0;
break;
}
return -1;
}
void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
struct nd_namespace_index *src)
{
if (dst && src)
/* pass */;
else
return;
memcpy(dst, src, sizeof_namespace_index(ndd));
}
static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd)
{
void *base = to_namespace_index(ndd, 0);
return base + 2 * sizeof_namespace_index(ndd);
}
#define for_each_clear_bit_le(bit, addr, size) \
for ((bit) = find_next_zero_bit_le((addr), (size), 0); \
(bit) < (size); \
(bit) = find_next_zero_bit_le((addr), (size), (bit) + 1))
/**
* preamble_current - common variable initialization for nd_label_* routines
* @ndd: dimm container for the relevant label set
* @nsindex_out: on return set to the currently active namespace index
* @free: on return set to the free label bitmap in the index
* @nslot: on return set to the number of slots in the label space
*/
static bool preamble_current(struct nvdimm_drvdata *ndd,
struct nd_namespace_index **nsindex_out,
unsigned long **free, u32 *nslot)
{
struct nd_namespace_index *nsindex;
nsindex = to_current_namespace_index(ndd);
if (nsindex == NULL)
return false;
*free = (unsigned long *) nsindex->free;
*nslot = __le32_to_cpu(nsindex->nslot);
*nsindex_out = nsindex;
return true;
}
static char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
{
if (!label_id || !uuid)
return NULL;
snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb",
flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid);
return label_id->id;
}
static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot)
{
/* check that we are written where we expect to be written */
if (slot != __le32_to_cpu(nd_label->slot))
return false;
/* check that DPA allocations are page aligned */
if ((__le64_to_cpu(nd_label->dpa)
| __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
return false;
return true;
}
int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
{
struct nd_namespace_index *nsindex;
unsigned long *free;
u32 nslot, slot;
if (!preamble_current(ndd, &nsindex, &free, &nslot))
return 0; /* no label, nothing to reserve */
for_each_clear_bit_le(slot, free, nslot) {
struct nd_namespace_label *nd_label;
struct nd_region *nd_region = NULL;
u8 label_uuid[NSLABEL_UUID_LEN];
struct nd_label_id label_id;
struct resource *res;
u32 flags;
nd_label = nd_label_base(ndd) + slot;
if (!slot_valid(nd_label, slot))
continue;
memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
flags = __le32_to_cpu(nd_label->flags);
nd_label_gen_id(&label_id, label_uuid, flags);
res = nvdimm_allocate_dpa(ndd, &label_id,
__le64_to_cpu(nd_label->dpa),
__le64_to_cpu(nd_label->rawsize));
nd_dbg_dpa(nd_region, ndd, res, "reserve\n");
if (!res)
return -EBUSY;
}
return 0;
}

128
drivers/nvdimm/label.h Normal file
Просмотреть файл

@ -0,0 +1,128 @@
/*
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef __LABEL_H__
#define __LABEL_H__
#include <linux/ndctl.h>
#include <linux/sizes.h>
#include <linux/io.h>
enum {
NSINDEX_SIG_LEN = 16,
NSINDEX_ALIGN = 256,
NSINDEX_SEQ_MASK = 0x3,
NSLABEL_UUID_LEN = 16,
NSLABEL_NAME_LEN = 64,
NSLABEL_FLAG_ROLABEL = 0x1, /* read-only label */
NSLABEL_FLAG_LOCAL = 0x2, /* DIMM-local namespace */
NSLABEL_FLAG_BTT = 0x4, /* namespace contains a BTT */
NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */
BTT_ALIGN = 4096, /* all btt structures */
BTTINFO_SIG_LEN = 16,
BTTINFO_UUID_LEN = 16,
BTTINFO_FLAG_ERROR = 0x1, /* error state (read-only) */
BTTINFO_MAJOR_VERSION = 1,
ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */
ND_LABEL_ID_SIZE = 50,
};
static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
/**
* struct nd_namespace_index - label set superblock
* @sig: NAMESPACE_INDEX\0
* @flags: placeholder
* @seq: sequence number for this index
* @myoff: offset of this index in label area
* @mysize: size of this index struct
* @otheroff: offset of other index
* @labeloff: offset of first label slot
* @nslot: total number of label slots
* @major: label area major version
* @minor: label area minor version
* @checksum: fletcher64 of all fields
* @free[0]: bitmap, nlabel bits
*
* The size of free[] is rounded up so the total struct size is a
* multiple of NSINDEX_ALIGN bytes. Any bits this allocates beyond
* nlabel bits must be zero.
*/
struct nd_namespace_index {
u8 sig[NSINDEX_SIG_LEN];
__le32 flags;
__le32 seq;
__le64 myoff;
__le64 mysize;
__le64 otheroff;
__le64 labeloff;
__le32 nslot;
__le16 major;
__le16 minor;
__le64 checksum;
u8 free[0];
};
/**
* struct nd_namespace_label - namespace superblock
* @uuid: UUID per RFC 4122
* @name: optional name (NULL-terminated)
* @flags: see NSLABEL_FLAG_*
* @nlabel: num labels to describe this ns
* @position: labels position in set
* @isetcookie: interleave set cookie
* @lbasize: LBA size in bytes or 0 for pmem
* @dpa: DPA of NVM range on this DIMM
* @rawsize: size of namespace
* @slot: slot of this label in label area
* @unused: must be zero
*/
struct nd_namespace_label {
u8 uuid[NSLABEL_UUID_LEN];
u8 name[NSLABEL_NAME_LEN];
__le32 flags;
__le16 nlabel;
__le16 position;
__le64 isetcookie;
__le64 lbasize;
__le64 dpa;
__le64 rawsize;
__le32 slot;
__le32 unused;
};
/**
* struct nd_label_id - identifier string for dpa allocation
* @id: "{blk|pmem}-<namespace uuid>"
*/
struct nd_label_id {
char id[ND_LABEL_ID_SIZE];
};
/*
* If the 'best' index is invalid, so is the 'next' index. Otherwise,
* the next index is MOD(index+1, 2)
*/
static inline int nd_label_next_nsindex(int index)
{
if (index < 0)
return -1;
return (index + 1) % 2;
}
struct nvdimm_drvdata;
int nd_label_validate(struct nvdimm_drvdata *ndd);
void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
struct nd_namespace_index *src);
size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
#endif /* __LABEL_H__ */

Просмотреть файл

@ -16,11 +16,15 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/ndctl.h> #include <linux/ndctl.h>
#include "label.h"
struct nvdimm_drvdata { struct nvdimm_drvdata {
struct device *dev; struct device *dev;
int nsindex_size;
struct nd_cmd_get_config_size nsarea; struct nd_cmd_get_config_size nsarea;
void *data; void *data;
int ns_current, ns_next;
struct resource dpa;
}; };
struct nd_region_namespaces { struct nd_region_namespaces {
@ -28,6 +32,37 @@ struct nd_region_namespaces {
int active; int active;
}; };
static inline struct nd_namespace_index *to_namespace_index(
struct nvdimm_drvdata *ndd, int i)
{
if (i < 0)
return NULL;
return ndd->data + sizeof_namespace_index(ndd) * i;
}
static inline struct nd_namespace_index *to_current_namespace_index(
struct nvdimm_drvdata *ndd)
{
return to_namespace_index(ndd, ndd->ns_current);
}
static inline struct nd_namespace_index *to_next_namespace_index(
struct nvdimm_drvdata *ndd)
{
return to_namespace_index(ndd, ndd->ns_next);
}
#define nd_dbg_dpa(r, d, res, fmt, arg...) \
dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \
(r) ? dev_name((d)->dev) : "", res ? res->name : "null", \
(unsigned long long) (res ? resource_size(res) : 0), \
(unsigned long long) (res ? res->start : 0), ##arg)
#define for_each_dpa_resource_safe(ndd, res, next) \
for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \
res; res = next, next = next ? next->sibling : NULL)
struct nd_region { struct nd_region {
struct device dev; struct device dev;
u16 ndr_mappings; u16 ndr_mappings;
@ -39,6 +74,15 @@ struct nd_region {
struct nd_mapping mapping[0]; struct nd_mapping mapping[0];
}; };
/*
* Lookup next in the repeating sequence of 01, 10, and 11.
*/
static inline unsigned nd_inc_seq(unsigned seq)
{
static const unsigned next[] = { 0, 2, 3, 1 };
return next[seq & 3];
}
enum nd_async_mode { enum nd_async_mode {
ND_SYNC, ND_SYNC,
ND_ASYNC, ND_ASYNC,
@ -58,4 +102,9 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
void nvdimm_bus_lock(struct device *dev); void nvdimm_bus_lock(struct device *dev);
void nvdimm_bus_unlock(struct device *dev); void nvdimm_bus_unlock(struct device *dev);
bool is_nvdimm_bus_locked(struct device *dev); bool is_nvdimm_bus_locked(struct device *dev);
int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res);
struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
struct nd_label_id *label_id, resource_size_t start,
resource_size_t n);
#endif /* __ND_H__ */ #endif /* __ND_H__ */

Просмотреть файл

@ -175,7 +175,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
#define ND_IOCTL_ARS_STATUS _IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\ #define ND_IOCTL_ARS_STATUS _IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\
struct nd_cmd_ars_status) struct nd_cmd_ars_status)
#define ND_DEVICE_DIMM 1 /* nd_dimm: container for "config data" */ #define ND_DEVICE_DIMM 1 /* nd_dimm: container for "config data" */
#define ND_DEVICE_REGION_PMEM 2 /* nd_region: (parent of PMEM namespaces) */ #define ND_DEVICE_REGION_PMEM 2 /* nd_region: (parent of PMEM namespaces) */
#define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */ #define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */