Merge branch 'dm-4.12' into dm-4.12-post-merge
This commit is contained in:
Коммит
7e25a76061
|
@ -11,14 +11,31 @@ Parameters: <cipher> <key> <iv_offset> <device path> \
|
|||
<offset> [<#opt_params> <opt_params>]
|
||||
|
||||
<cipher>
|
||||
Encryption cipher and an optional IV generation mode.
|
||||
(In format cipher[:keycount]-chainmode-ivmode[:ivopts]).
|
||||
Examples:
|
||||
des
|
||||
aes-cbc-essiv:sha256
|
||||
twofish-ecb
|
||||
Encryption cipher, encryption mode and Initial Vector (IV) generator.
|
||||
|
||||
/proc/crypto contains supported crypto modes
|
||||
The cipher specifications format is:
|
||||
cipher[:keycount]-chainmode-ivmode[:ivopts]
|
||||
Examples:
|
||||
aes-cbc-essiv:sha256
|
||||
aes-xts-plain64
|
||||
serpent-xts-plain64
|
||||
|
||||
Cipher format also supports direct specification with kernel crypt API
|
||||
format (selected by capi: prefix). The IV specification is the same
|
||||
as for the first format type.
|
||||
This format is mainly used for specification of authenticated modes.
|
||||
|
||||
The crypto API cipher specifications format is:
|
||||
capi:cipher_api_spec-ivmode[:ivopts]
|
||||
Examples:
|
||||
capi:cbc(aes)-essiv:sha256
|
||||
capi:xts(aes)-plain64
|
||||
Examples of authenticated modes:
|
||||
capi:gcm(aes)-random
|
||||
capi:authenc(hmac(sha256),xts(aes))-random
|
||||
capi:rfc7539(chacha20,poly1305)-random
|
||||
|
||||
The /proc/crypto contains a list of curently loaded crypto modes.
|
||||
|
||||
<key>
|
||||
Key used for encryption. It is encoded either as a hexadecimal number
|
||||
|
@ -93,6 +110,32 @@ submit_from_crypt_cpus
|
|||
thread because it benefits CFQ to have writes submitted using the
|
||||
same context.
|
||||
|
||||
integrity:<bytes>:<type>
|
||||
The device requires additional <bytes> metadata per-sector stored
|
||||
in per-bio integrity structure. This metadata must by provided
|
||||
by underlying dm-integrity target.
|
||||
|
||||
The <type> can be "none" if metadata is used only for persistent IV.
|
||||
|
||||
For Authenticated Encryption with Additional Data (AEAD)
|
||||
the <type> is "aead". An AEAD mode additionally calculates and verifies
|
||||
integrity for the encrypted device. The additional space is then
|
||||
used for storing authentication tag (and persistent IV if needed).
|
||||
|
||||
sector_size:<bytes>
|
||||
Use <bytes> as the encryption unit instead of 512 bytes sectors.
|
||||
This option can be in range 512 - 4096 bytes and must be power of two.
|
||||
Virtual device will announce this size as a minimal IO and logical sector.
|
||||
|
||||
iv_large_sectors
|
||||
IV generators will use sector number counted in <sector_size> units
|
||||
instead of default 512 bytes sectors.
|
||||
|
||||
For example, if <sector_size> is 4096 bytes, plain64 IV for the second
|
||||
sector will be 8 (without flag) and 1 if iv_large_sectors is present.
|
||||
The <iv_offset> must be multiple of <sector_size> (in 512 bytes units)
|
||||
if this flag is specified.
|
||||
|
||||
Example scripts
|
||||
===============
|
||||
LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
|
||||
|
|
|
@ -0,0 +1,199 @@
|
|||
The dm-integrity target emulates a block device that has additional
|
||||
per-sector tags that can be used for storing integrity information.
|
||||
|
||||
A general problem with storing integrity tags with every sector is that
|
||||
writing the sector and the integrity tag must be atomic - i.e. in case of
|
||||
crash, either both sector and integrity tag or none of them is written.
|
||||
|
||||
To guarantee write atomicity, the dm-integrity target uses journal, it
|
||||
writes sector data and integrity tags into a journal, commits the journal
|
||||
and then copies the data and integrity tags to their respective location.
|
||||
|
||||
The dm-integrity target can be used with the dm-crypt target - in this
|
||||
situation the dm-crypt target creates the integrity data and passes them
|
||||
to the dm-integrity target via bio_integrity_payload attached to the bio.
|
||||
In this mode, the dm-crypt and dm-integrity targets provide authenticated
|
||||
disk encryption - if the attacker modifies the encrypted device, an I/O
|
||||
error is returned instead of random data.
|
||||
|
||||
The dm-integrity target can also be used as a standalone target, in this
|
||||
mode it calculates and verifies the integrity tag internally. In this
|
||||
mode, the dm-integrity target can be used to detect silent data
|
||||
corruption on the disk or in the I/O path.
|
||||
|
||||
|
||||
When loading the target for the first time, the kernel driver will format
|
||||
the device. But it will only format the device if the superblock contains
|
||||
zeroes. If the superblock is neither valid nor zeroed, the dm-integrity
|
||||
target can't be loaded.
|
||||
|
||||
To use the target for the first time:
|
||||
1. overwrite the superblock with zeroes
|
||||
2. load the dm-integrity target with one-sector size, the kernel driver
|
||||
will format the device
|
||||
3. unload the dm-integrity target
|
||||
4. read the "provided_data_sectors" value from the superblock
|
||||
5. load the dm-integrity target with the the target size
|
||||
"provided_data_sectors"
|
||||
6. if you want to use dm-integrity with dm-crypt, load the dm-crypt target
|
||||
with the size "provided_data_sectors"
|
||||
|
||||
|
||||
Target arguments:
|
||||
|
||||
1. the underlying block device
|
||||
|
||||
2. the number of reserved sector at the beginning of the device - the
|
||||
dm-integrity won't read of write these sectors
|
||||
|
||||
3. the size of the integrity tag (if "-" is used, the size is taken from
|
||||
the internal-hash algorithm)
|
||||
|
||||
4. mode:
|
||||
D - direct writes (without journal) - in this mode, journaling is
|
||||
not used and data sectors and integrity tags are written
|
||||
separately. In case of crash, it is possible that the data
|
||||
and integrity tag doesn't match.
|
||||
J - journaled writes - data and integrity tags are written to the
|
||||
journal and atomicity is guaranteed. In case of crash,
|
||||
either both data and tag or none of them are written. The
|
||||
journaled mode degrades write throughput twice because the
|
||||
data have to be written twice.
|
||||
R - recovery mode - in this mode, journal is not replayed,
|
||||
checksums are not checked and writes to the device are not
|
||||
allowed. This mode is useful for data recovery if the
|
||||
device cannot be activated in any of the other standard
|
||||
modes.
|
||||
|
||||
5. the number of additional arguments
|
||||
|
||||
Additional arguments:
|
||||
|
||||
journal_sectors:number
|
||||
The size of journal, this argument is used only if formatting the
|
||||
device. If the device is already formatted, the value from the
|
||||
superblock is used.
|
||||
|
||||
interleave_sectors:number
|
||||
The number of interleaved sectors. This values is rounded down to
|
||||
a power of two. If the device is already formatted, the value from
|
||||
the superblock is used.
|
||||
|
||||
buffer_sectors:number
|
||||
The number of sectors in one buffer. The value is rounded down to
|
||||
a power of two.
|
||||
|
||||
The tag area is accessed using buffers, the buffer size is
|
||||
configurable. The large buffer size means that the I/O size will
|
||||
be larger, but there could be less I/Os issued.
|
||||
|
||||
journal_watermark:number
|
||||
The journal watermark in percents. When the size of the journal
|
||||
exceeds this watermark, the thread that flushes the journal will
|
||||
be started.
|
||||
|
||||
commit_time:number
|
||||
Commit time in milliseconds. When this time passes, the journal is
|
||||
written. The journal is also written immediatelly if the FLUSH
|
||||
request is received.
|
||||
|
||||
internal_hash:algorithm(:key) (the key is optional)
|
||||
Use internal hash or crc.
|
||||
When this argument is used, the dm-integrity target won't accept
|
||||
integrity tags from the upper target, but it will automatically
|
||||
generate and verify the integrity tags.
|
||||
|
||||
You can use a crc algorithm (such as crc32), then integrity target
|
||||
will protect the data against accidental corruption.
|
||||
You can also use a hmac algorithm (for example
|
||||
"hmac(sha256):0123456789abcdef"), in this mode it will provide
|
||||
cryptographic authentication of the data without encryption.
|
||||
|
||||
When this argument is not used, the integrity tags are accepted
|
||||
from an upper layer target, such as dm-crypt. The upper layer
|
||||
target should check the validity of the integrity tags.
|
||||
|
||||
journal_crypt:algorithm(:key) (the key is optional)
|
||||
Encrypt the journal using given algorithm to make sure that the
|
||||
attacker can't read the journal. You can use a block cipher here
|
||||
(such as "cbc(aes)") or a stream cipher (for example "chacha20",
|
||||
"salsa20", "ctr(aes)" or "ecb(arc4)").
|
||||
|
||||
The journal contains history of last writes to the block device,
|
||||
an attacker reading the journal could see the last sector nubmers
|
||||
that were written. From the sector numbers, the attacker can infer
|
||||
the size of files that were written. To protect against this
|
||||
situation, you can encrypt the journal.
|
||||
|
||||
journal_mac:algorithm(:key) (the key is optional)
|
||||
Protect sector numbers in the journal from accidental or malicious
|
||||
modification. To protect against accidental modification, use a
|
||||
crc algorithm, to protect against malicious modification, use a
|
||||
hmac algorithm with a key.
|
||||
|
||||
This option is not needed when using internal-hash because in this
|
||||
mode, the integrity of journal entries is checked when replaying
|
||||
the journal. Thus, modified sector number would be detected at
|
||||
this stage.
|
||||
|
||||
block_size:number
|
||||
The size of a data block in bytes. The larger the block size the
|
||||
less overhead there is for per-block integrity metadata.
|
||||
Supported values are 512, 1024, 2048 and 4096 bytes. If not
|
||||
specified the default block size is 512 bytes.
|
||||
|
||||
The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
|
||||
be changed when reloading the target (load an inactive table and swap the
|
||||
tables with suspend and resume). The other arguments should not be changed
|
||||
when reloading the target because the layout of disk data depend on them
|
||||
and the reloaded target would be non-functional.
|
||||
|
||||
|
||||
The layout of the formatted block device:
|
||||
* reserved sectors (they are not used by this target, they can be used for
|
||||
storing LUKS metadata or for other purpose), the size of the reserved
|
||||
area is specified in the target arguments
|
||||
* superblock (4kiB)
|
||||
* magic string - identifies that the device was formatted
|
||||
* version
|
||||
* log2(interleave sectors)
|
||||
* integrity tag size
|
||||
* the number of journal sections
|
||||
* provided data sectors - the number of sectors that this target
|
||||
provides (i.e. the size of the device minus the size of all
|
||||
metadata and padding). The user of this target should not send
|
||||
bios that access data beyond the "provided data sectors" limit.
|
||||
* flags - a flag is set if journal_mac is used
|
||||
* journal
|
||||
The journal is divided into sections, each section contains:
|
||||
* metadata area (4kiB), it contains journal entries
|
||||
every journal entry contains:
|
||||
* logical sector (specifies where the data and tag should
|
||||
be written)
|
||||
* last 8 bytes of data
|
||||
* integrity tag (the size is specified in the superblock)
|
||||
every metadata sector ends with
|
||||
* mac (8-bytes), all the macs in 8 metadata sectors form a
|
||||
64-byte value. It is used to store hmac of sector
|
||||
numbers in the journal section, to protect against a
|
||||
possibility that the attacker tampers with sector
|
||||
numbers in the journal.
|
||||
* commit id
|
||||
* data area (the size is variable; it depends on how many journal
|
||||
entries fit into the metadata area)
|
||||
every sector in the data area contains:
|
||||
* data (504 bytes of data, the last 8 bytes are stored in
|
||||
the journal entry)
|
||||
* commit id
|
||||
To test if the whole journal section was written correctly, every
|
||||
512-byte sector of the journal ends with 8-byte commit id. If the
|
||||
commit id matches on all sectors in a journal section, then it is
|
||||
assumed that the section was written correctly. If the commit id
|
||||
doesn't match, the section was written partially and it should not
|
||||
be replayed.
|
||||
* one or more runs of interleaved tags and data. Each run contains:
|
||||
* tag area - it contains integrity tags. There is one tag for each
|
||||
sector in the data area
|
||||
* data area - it contains data sectors. The number of data sectors
|
||||
in one run must be a power of two. log2 of this value is stored
|
||||
in the superblock.
|
|
@ -170,6 +170,13 @@ The target is named "raid" and it accepts the following parameters:
|
|||
Takeover/reshape is not possible with a raid4/5/6 journal device;
|
||||
it has to be deconfigured before requesting these.
|
||||
|
||||
[journal_mode <mode>]
|
||||
This option sets the caching mode on journaled raid4/5/6 raid sets
|
||||
(see 'journal_dev <dev>' above) to 'writethrough' or 'writeback'.
|
||||
If 'writeback' is selected the journal device has to be resilient
|
||||
and must not suffer from the 'write hole' problem itself (e.g. use
|
||||
raid1 or raid10) to avoid a single point of failure.
|
||||
|
||||
<#raid_devs>: The number of devices composing the array.
|
||||
Each device consists of two entries. The first is the device
|
||||
containing the metadata (if any); the second is the one containing the
|
||||
|
@ -254,7 +261,8 @@ recovery. Here is a fuller description of the individual fields:
|
|||
<data_offset> The current data offset to the start of the user data on
|
||||
each component device of a raid set (see the respective
|
||||
raid parameter to support out-of-place reshaping).
|
||||
<journal_char> 'A' - active raid4/5/6 journal device.
|
||||
<journal_char> 'A' - active write-through journal device.
|
||||
'a' - active write-back journal device.
|
||||
'D' - dead journal device.
|
||||
'-' - no journal device.
|
||||
|
||||
|
@ -331,3 +339,7 @@ Version History
|
|||
'D' on the status line. If '- -' is passed into the constructor, emit
|
||||
'- -' on the table line and '-' as the status line health character.
|
||||
1.10.0 Add support for raid4/5/6 journal device
|
||||
1.10.1 Fix data corruption on reshape request
|
||||
1.11.0 Fix table line argument order
|
||||
(wrong raid10_copies/raid10_format sequence)
|
||||
1.11.1 Add raid4/5/6 journal write-back support via journal_mode option
|
||||
|
|
|
@ -325,14 +325,6 @@ config DM_CACHE_SMQ
|
|||
of less memory utilization, improved performance and increased
|
||||
adaptability in the face of changing workloads.
|
||||
|
||||
config DM_CACHE_CLEANER
|
||||
tristate "Cleaner Cache Policy (EXPERIMENTAL)"
|
||||
depends on DM_CACHE
|
||||
default y
|
||||
---help---
|
||||
A simple cache policy that writes back all data to the
|
||||
origin. Used when decommissioning a dm-cache.
|
||||
|
||||
config DM_ERA
|
||||
tristate "Era target (EXPERIMENTAL)"
|
||||
depends on BLK_DEV_DM
|
||||
|
@ -365,6 +357,7 @@ config DM_LOG_USERSPACE
|
|||
config DM_RAID
|
||||
tristate "RAID 1/4/5/6/10 target"
|
||||
depends on BLK_DEV_DM
|
||||
select MD_RAID0
|
||||
select MD_RAID1
|
||||
select MD_RAID10
|
||||
select MD_RAID456
|
||||
|
@ -508,4 +501,14 @@ config DM_LOG_WRITES
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config DM_INTEGRITY
|
||||
tristate "Integrity target"
|
||||
depends on BLK_DEV_DM
|
||||
select BLK_DEV_INTEGRITY
|
||||
select DM_BUFIO
|
||||
select CRYPTO
|
||||
select ASYNC_XOR
|
||||
---help---
|
||||
This is the integrity target.
|
||||
|
||||
endif # MD
|
||||
|
|
|
@ -11,10 +11,11 @@ dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
|
|||
dm-mirror-y += dm-raid1.o
|
||||
dm-log-userspace-y \
|
||||
+= dm-log-userspace-base.o dm-log-userspace-transfer.o
|
||||
dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o
|
||||
dm-thin-pool-y += dm-thin.o dm-thin-metadata.o
|
||||
dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o
|
||||
dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \
|
||||
dm-cache-background-tracker.o
|
||||
dm-cache-smq-y += dm-cache-policy-smq.o
|
||||
dm-cache-cleaner-y += dm-cache-policy-cleaner.o
|
||||
dm-era-y += dm-era-target.o
|
||||
dm-verity-y += dm-verity-target.o
|
||||
md-mod-y += md.o bitmap.o
|
||||
|
@ -56,9 +57,9 @@ obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
|
|||
obj-$(CONFIG_DM_VERITY) += dm-verity.o
|
||||
obj-$(CONFIG_DM_CACHE) += dm-cache.o
|
||||
obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o
|
||||
obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o
|
||||
obj-$(CONFIG_DM_ERA) += dm-era.o
|
||||
obj-$(CONFIG_DM_LOG_WRITES) += dm-log-writes.o
|
||||
obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o
|
||||
|
||||
ifeq ($(CONFIG_DM_UEVENT),y)
|
||||
dm-mod-objs += dm-uevent.o
|
||||
|
|
|
@ -5,7 +5,8 @@
|
|||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-bio-prison.h"
|
||||
#include "dm-bio-prison-v1.h"
|
||||
#include "dm-bio-prison-v2.h"
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mempool.h>
|
||||
|
@ -398,7 +399,7 @@ EXPORT_SYMBOL_GPL(dm_deferred_set_add_work);
|
|||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
static int __init dm_bio_prison_init(void)
|
||||
static int __init dm_bio_prison_init_v1(void)
|
||||
{
|
||||
_cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0);
|
||||
if (!_cell_cache)
|
||||
|
@ -407,12 +408,51 @@ static int __init dm_bio_prison_init(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void __exit dm_bio_prison_exit(void)
|
||||
static void dm_bio_prison_exit_v1(void)
|
||||
{
|
||||
kmem_cache_destroy(_cell_cache);
|
||||
_cell_cache = NULL;
|
||||
}
|
||||
|
||||
static int (*_inits[])(void) __initdata = {
|
||||
dm_bio_prison_init_v1,
|
||||
dm_bio_prison_init_v2,
|
||||
};
|
||||
|
||||
static void (*_exits[])(void) = {
|
||||
dm_bio_prison_exit_v1,
|
||||
dm_bio_prison_exit_v2,
|
||||
};
|
||||
|
||||
static int __init dm_bio_prison_init(void)
|
||||
{
|
||||
const int count = ARRAY_SIZE(_inits);
|
||||
|
||||
int r, i;
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
r = _inits[i]();
|
||||
if (r)
|
||||
goto bad;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
bad:
|
||||
while (i--)
|
||||
_exits[i]();
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void __exit dm_bio_prison_exit(void)
|
||||
{
|
||||
int i = ARRAY_SIZE(_exits);
|
||||
|
||||
while (i--)
|
||||
_exits[i]();
|
||||
}
|
||||
|
||||
/*
|
||||
* module hooks
|
||||
*/
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2011-2012 Red Hat, Inc.
|
||||
* Copyright (C) 2011-2017 Red Hat, Inc.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
|
@ -0,0 +1,369 @@
|
|||
/*
|
||||
* Copyright (C) 2012-2017 Red Hat, Inc.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm.h"
|
||||
#include "dm-bio-prison-v2.h"
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mempool.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
#define MIN_CELLS 1024
|
||||
|
||||
struct dm_bio_prison_v2 {
|
||||
struct workqueue_struct *wq;
|
||||
|
||||
spinlock_t lock;
|
||||
mempool_t *cell_pool;
|
||||
struct rb_root cells;
|
||||
};
|
||||
|
||||
static struct kmem_cache *_cell_cache;
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* @nr_cells should be the number of cells you want in use _concurrently_.
|
||||
* Don't confuse it with the number of distinct keys.
|
||||
*/
|
||||
struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq)
|
||||
{
|
||||
struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL);
|
||||
|
||||
if (!prison)
|
||||
return NULL;
|
||||
|
||||
prison->wq = wq;
|
||||
spin_lock_init(&prison->lock);
|
||||
|
||||
prison->cell_pool = mempool_create_slab_pool(MIN_CELLS, _cell_cache);
|
||||
if (!prison->cell_pool) {
|
||||
kfree(prison);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
prison->cells = RB_ROOT;
|
||||
|
||||
return prison;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bio_prison_create_v2);
|
||||
|
||||
void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison)
|
||||
{
|
||||
mempool_destroy(prison->cell_pool);
|
||||
kfree(prison);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bio_prison_destroy_v2);
|
||||
|
||||
struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison, gfp_t gfp)
|
||||
{
|
||||
return mempool_alloc(prison->cell_pool, gfp);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell_v2);
|
||||
|
||||
void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell)
|
||||
{
|
||||
mempool_free(cell, prison->cell_pool);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell_v2);
|
||||
|
||||
static void __setup_new_cell(struct dm_cell_key_v2 *key,
|
||||
struct dm_bio_prison_cell_v2 *cell)
|
||||
{
|
||||
memset(cell, 0, sizeof(*cell));
|
||||
memcpy(&cell->key, key, sizeof(cell->key));
|
||||
bio_list_init(&cell->bios);
|
||||
}
|
||||
|
||||
static int cmp_keys(struct dm_cell_key_v2 *lhs,
|
||||
struct dm_cell_key_v2 *rhs)
|
||||
{
|
||||
if (lhs->virtual < rhs->virtual)
|
||||
return -1;
|
||||
|
||||
if (lhs->virtual > rhs->virtual)
|
||||
return 1;
|
||||
|
||||
if (lhs->dev < rhs->dev)
|
||||
return -1;
|
||||
|
||||
if (lhs->dev > rhs->dev)
|
||||
return 1;
|
||||
|
||||
if (lhs->block_end <= rhs->block_begin)
|
||||
return -1;
|
||||
|
||||
if (lhs->block_begin >= rhs->block_end)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if node found, otherwise it inserts a new one.
|
||||
*/
|
||||
static bool __find_or_insert(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_cell_key_v2 *key,
|
||||
struct dm_bio_prison_cell_v2 *cell_prealloc,
|
||||
struct dm_bio_prison_cell_v2 **result)
|
||||
{
|
||||
int r;
|
||||
struct rb_node **new = &prison->cells.rb_node, *parent = NULL;
|
||||
|
||||
while (*new) {
|
||||
struct dm_bio_prison_cell_v2 *cell =
|
||||
container_of(*new, struct dm_bio_prison_cell_v2, node);
|
||||
|
||||
r = cmp_keys(key, &cell->key);
|
||||
|
||||
parent = *new;
|
||||
if (r < 0)
|
||||
new = &((*new)->rb_left);
|
||||
|
||||
else if (r > 0)
|
||||
new = &((*new)->rb_right);
|
||||
|
||||
else {
|
||||
*result = cell;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
__setup_new_cell(key, cell_prealloc);
|
||||
*result = cell_prealloc;
|
||||
rb_link_node(&cell_prealloc->node, parent, new);
|
||||
rb_insert_color(&cell_prealloc->node, &prison->cells);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool __get(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_cell_key_v2 *key,
|
||||
unsigned lock_level,
|
||||
struct bio *inmate,
|
||||
struct dm_bio_prison_cell_v2 *cell_prealloc,
|
||||
struct dm_bio_prison_cell_v2 **cell)
|
||||
{
|
||||
if (__find_or_insert(prison, key, cell_prealloc, cell)) {
|
||||
if ((*cell)->exclusive_lock) {
|
||||
if (lock_level <= (*cell)->exclusive_level) {
|
||||
bio_list_add(&(*cell)->bios, inmate);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
(*cell)->shared_count++;
|
||||
|
||||
} else
|
||||
(*cell)->shared_count = 1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_cell_key_v2 *key,
|
||||
unsigned lock_level,
|
||||
struct bio *inmate,
|
||||
struct dm_bio_prison_cell_v2 *cell_prealloc,
|
||||
struct dm_bio_prison_cell_v2 **cell_result)
|
||||
{
|
||||
int r;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&prison->lock, flags);
|
||||
r = __get(prison, key, lock_level, inmate, cell_prealloc, cell_result);
|
||||
spin_unlock_irqrestore(&prison->lock, flags);
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_cell_get_v2);
|
||||
|
||||
static bool __put(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell)
|
||||
{
|
||||
BUG_ON(!cell->shared_count);
|
||||
cell->shared_count--;
|
||||
|
||||
// FIXME: shared locks granted above the lock level could starve this
|
||||
if (!cell->shared_count) {
|
||||
if (cell->exclusive_lock){
|
||||
if (cell->quiesce_continuation) {
|
||||
queue_work(prison->wq, cell->quiesce_continuation);
|
||||
cell->quiesce_continuation = NULL;
|
||||
}
|
||||
} else {
|
||||
rb_erase(&cell->node, &prison->cells);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell)
|
||||
{
|
||||
bool r;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&prison->lock, flags);
|
||||
r = __put(prison, cell);
|
||||
spin_unlock_irqrestore(&prison->lock, flags);
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_cell_put_v2);
|
||||
|
||||
static int __lock(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_cell_key_v2 *key,
|
||||
unsigned lock_level,
|
||||
struct dm_bio_prison_cell_v2 *cell_prealloc,
|
||||
struct dm_bio_prison_cell_v2 **cell_result)
|
||||
{
|
||||
struct dm_bio_prison_cell_v2 *cell;
|
||||
|
||||
if (__find_or_insert(prison, key, cell_prealloc, &cell)) {
|
||||
if (cell->exclusive_lock)
|
||||
return -EBUSY;
|
||||
|
||||
cell->exclusive_lock = true;
|
||||
cell->exclusive_level = lock_level;
|
||||
*cell_result = cell;
|
||||
|
||||
// FIXME: we don't yet know what level these shared locks
|
||||
// were taken at, so have to quiesce them all.
|
||||
return cell->shared_count > 0;
|
||||
|
||||
} else {
|
||||
cell = cell_prealloc;
|
||||
cell->shared_count = 0;
|
||||
cell->exclusive_lock = true;
|
||||
cell->exclusive_level = lock_level;
|
||||
*cell_result = cell;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_cell_key_v2 *key,
|
||||
unsigned lock_level,
|
||||
struct dm_bio_prison_cell_v2 *cell_prealloc,
|
||||
struct dm_bio_prison_cell_v2 **cell_result)
|
||||
{
|
||||
int r;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&prison->lock, flags);
|
||||
r = __lock(prison, key, lock_level, cell_prealloc, cell_result);
|
||||
spin_unlock_irqrestore(&prison->lock, flags);
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_cell_lock_v2);
|
||||
|
||||
static void __quiesce(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell,
|
||||
struct work_struct *continuation)
|
||||
{
|
||||
if (!cell->shared_count)
|
||||
queue_work(prison->wq, continuation);
|
||||
else
|
||||
cell->quiesce_continuation = continuation;
|
||||
}
|
||||
|
||||
void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell,
|
||||
struct work_struct *continuation)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&prison->lock, flags);
|
||||
__quiesce(prison, cell, continuation);
|
||||
spin_unlock_irqrestore(&prison->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_cell_quiesce_v2);
|
||||
|
||||
static int __promote(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell,
|
||||
unsigned new_lock_level)
|
||||
{
|
||||
if (!cell->exclusive_lock)
|
||||
return -EINVAL;
|
||||
|
||||
cell->exclusive_level = new_lock_level;
|
||||
return cell->shared_count > 0;
|
||||
}
|
||||
|
||||
int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell,
|
||||
unsigned new_lock_level)
|
||||
{
|
||||
int r;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&prison->lock, flags);
|
||||
r = __promote(prison, cell, new_lock_level);
|
||||
spin_unlock_irqrestore(&prison->lock, flags);
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_cell_lock_promote_v2);
|
||||
|
||||
static bool __unlock(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell,
|
||||
struct bio_list *bios)
|
||||
{
|
||||
BUG_ON(!cell->exclusive_lock);
|
||||
|
||||
bio_list_merge(bios, &cell->bios);
|
||||
bio_list_init(&cell->bios);
|
||||
|
||||
if (cell->shared_count) {
|
||||
cell->exclusive_lock = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
rb_erase(&cell->node, &prison->cells);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell,
|
||||
struct bio_list *bios)
|
||||
{
|
||||
bool r;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&prison->lock, flags);
|
||||
r = __unlock(prison, cell, bios);
|
||||
spin_unlock_irqrestore(&prison->lock, flags);
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_cell_unlock_v2);
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
int __init dm_bio_prison_init_v2(void)
|
||||
{
|
||||
_cell_cache = KMEM_CACHE(dm_bio_prison_cell_v2, 0);
|
||||
if (!_cell_cache)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dm_bio_prison_exit_v2(void)
|
||||
{
|
||||
kmem_cache_destroy(_cell_cache);
|
||||
_cell_cache = NULL;
|
||||
}
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* Copyright (C) 2011-2017 Red Hat, Inc.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#ifndef DM_BIO_PRISON_V2_H
|
||||
#define DM_BIO_PRISON_V2_H
|
||||
|
||||
#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */
|
||||
#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
int dm_bio_prison_init_v2(void);
|
||||
void dm_bio_prison_exit_v2(void);
|
||||
|
||||
/*
|
||||
* Sometimes we can't deal with a bio straight away. We put them in prison
|
||||
* where they can't cause any mischief. Bios are put in a cell identified
|
||||
* by a key, multiple bios can be in the same cell. When the cell is
|
||||
* subsequently unlocked the bios become available.
|
||||
*/
|
||||
struct dm_bio_prison_v2;
|
||||
|
||||
/*
|
||||
* Keys define a range of blocks within either a virtual or physical
|
||||
* device.
|
||||
*/
|
||||
struct dm_cell_key_v2 {
|
||||
int virtual;
|
||||
dm_thin_id dev;
|
||||
dm_block_t block_begin, block_end;
|
||||
};
|
||||
|
||||
/*
|
||||
* Treat this as opaque, only in header so callers can manage allocation
|
||||
* themselves.
|
||||
*/
|
||||
struct dm_bio_prison_cell_v2 {
|
||||
// FIXME: pack these
|
||||
bool exclusive_lock;
|
||||
unsigned exclusive_level;
|
||||
unsigned shared_count;
|
||||
struct work_struct *quiesce_continuation;
|
||||
|
||||
struct rb_node node;
|
||||
struct dm_cell_key_v2 key;
|
||||
struct bio_list bios;
|
||||
};
|
||||
|
||||
struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq);
|
||||
void dm_bio_prison_destroy_v2(struct dm_bio_prison_v2 *prison);
|
||||
|
||||
/*
|
||||
* These two functions just wrap a mempool. This is a transitory step:
|
||||
* Eventually all bio prison clients should manage their own cell memory.
|
||||
*
|
||||
* Like mempool_alloc(), dm_bio_prison_alloc_cell_v2() can only fail if called
|
||||
* in interrupt context or passed GFP_NOWAIT.
|
||||
*/
|
||||
struct dm_bio_prison_cell_v2 *dm_bio_prison_alloc_cell_v2(struct dm_bio_prison_v2 *prison,
|
||||
gfp_t gfp);
|
||||
void dm_bio_prison_free_cell_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell);
|
||||
|
||||
/*
|
||||
* Shared locks have a bio associated with them.
|
||||
*
|
||||
* If the lock is granted the caller can continue to use the bio, and must
|
||||
* call dm_cell_put_v2() to drop the reference count when finished using it.
|
||||
*
|
||||
* If the lock cannot be granted then the bio will be tracked within the
|
||||
* cell, and later given to the holder of the exclusive lock.
|
||||
*
|
||||
* See dm_cell_lock_v2() for discussion of the lock_level parameter.
|
||||
*
|
||||
* Compare *cell_result with cell_prealloc to see if the prealloc was used.
|
||||
* If cell_prealloc was used then inmate wasn't added to it.
|
||||
*
|
||||
* Returns true if the lock is granted.
|
||||
*/
|
||||
bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_cell_key_v2 *key,
|
||||
unsigned lock_level,
|
||||
struct bio *inmate,
|
||||
struct dm_bio_prison_cell_v2 *cell_prealloc,
|
||||
struct dm_bio_prison_cell_v2 **cell_result);
|
||||
|
||||
/*
|
||||
* Decrement the shared reference count for the lock. Returns true if
|
||||
* returning ownership of the cell (ie. you should free it).
|
||||
*/
|
||||
bool dm_cell_put_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell);
|
||||
|
||||
/*
|
||||
* Locks a cell. No associated bio. Exclusive locks get priority. These
|
||||
* locks constrain whether the io locks are granted according to level.
|
||||
*
|
||||
* Shared locks will still be granted if the lock_level is > (not = to) the
|
||||
* exclusive lock level.
|
||||
*
|
||||
* If an _exclusive_ lock is already held then -EBUSY is returned.
|
||||
*
|
||||
* Return values:
|
||||
* < 0 - error
|
||||
* 0 - locked; no quiescing needed
|
||||
* 1 - locked; quiescing needed
|
||||
*/
|
||||
int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_cell_key_v2 *key,
|
||||
unsigned lock_level,
|
||||
struct dm_bio_prison_cell_v2 *cell_prealloc,
|
||||
struct dm_bio_prison_cell_v2 **cell_result);
|
||||
|
||||
void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell,
|
||||
struct work_struct *continuation);
|
||||
|
||||
/*
|
||||
* Promotes an _exclusive_ lock to a higher lock level.
|
||||
*
|
||||
* Return values:
|
||||
* < 0 - error
|
||||
* 0 - promoted; no quiescing needed
|
||||
* 1 - promoted; quiescing needed
|
||||
*/
|
||||
int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell,
|
||||
unsigned new_lock_level);
|
||||
|
||||
/*
|
||||
* Adds any held bios to the bio list.
|
||||
*
|
||||
* There may be shared locks still held at this point even if you quiesced
|
||||
* (ie. different lock levels).
|
||||
*
|
||||
* Returns true if returning ownership of the cell (ie. you should free
|
||||
* it).
|
||||
*/
|
||||
bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison,
|
||||
struct dm_bio_prison_cell_v2 *cell,
|
||||
struct bio_list *bios);
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
#endif
|
|
@ -110,6 +110,8 @@ struct dm_bufio_client {
|
|||
struct rb_root buffer_tree;
|
||||
wait_queue_head_t free_buffer_wait;
|
||||
|
||||
sector_t start;
|
||||
|
||||
int async_write_error;
|
||||
|
||||
struct list_head client_list;
|
||||
|
@ -557,8 +559,8 @@ static void dmio_complete(unsigned long error, void *context)
|
|||
b->bio.bi_end_io(&b->bio);
|
||||
}
|
||||
|
||||
static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
|
||||
bio_end_io_t *end_io)
|
||||
static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
|
||||
unsigned n_sectors, bio_end_io_t *end_io)
|
||||
{
|
||||
int r;
|
||||
struct dm_io_request io_req = {
|
||||
|
@ -570,8 +572,8 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
|
|||
};
|
||||
struct dm_io_region region = {
|
||||
.bdev = b->c->bdev,
|
||||
.sector = block << b->c->sectors_per_block_bits,
|
||||
.count = b->c->block_size >> SECTOR_SHIFT,
|
||||
.sector = sector,
|
||||
.count = n_sectors,
|
||||
};
|
||||
|
||||
if (b->data_mode != DATA_MODE_VMALLOC) {
|
||||
|
@ -606,14 +608,14 @@ static void inline_endio(struct bio *bio)
|
|||
end_fn(bio);
|
||||
}
|
||||
|
||||
static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
|
||||
bio_end_io_t *end_io)
|
||||
static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector,
|
||||
unsigned n_sectors, bio_end_io_t *end_io)
|
||||
{
|
||||
char *ptr;
|
||||
int len;
|
||||
|
||||
bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS);
|
||||
b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits;
|
||||
b->bio.bi_iter.bi_sector = sector;
|
||||
b->bio.bi_bdev = b->c->bdev;
|
||||
b->bio.bi_end_io = inline_endio;
|
||||
/*
|
||||
|
@ -628,7 +630,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
|
|||
* If len < PAGE_SIZE the buffer doesn't cross page boundary.
|
||||
*/
|
||||
ptr = b->data;
|
||||
len = b->c->block_size;
|
||||
len = n_sectors << SECTOR_SHIFT;
|
||||
|
||||
if (len >= PAGE_SIZE)
|
||||
BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
|
||||
|
@ -640,7 +642,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
|
|||
len < PAGE_SIZE ? len : PAGE_SIZE,
|
||||
offset_in_page(ptr))) {
|
||||
BUG_ON(b->c->block_size <= PAGE_SIZE);
|
||||
use_dmio(b, rw, block, end_io);
|
||||
use_dmio(b, rw, sector, n_sectors, end_io);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -651,17 +653,22 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
|
|||
submit_bio(&b->bio);
|
||||
}
|
||||
|
||||
static void submit_io(struct dm_buffer *b, int rw, sector_t block,
|
||||
bio_end_io_t *end_io)
|
||||
static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
|
||||
{
|
||||
unsigned n_sectors;
|
||||
sector_t sector;
|
||||
|
||||
if (rw == WRITE && b->c->write_callback)
|
||||
b->c->write_callback(b);
|
||||
|
||||
if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
|
||||
sector = (b->block << b->c->sectors_per_block_bits) + b->c->start;
|
||||
n_sectors = 1 << b->c->sectors_per_block_bits;
|
||||
|
||||
if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) &&
|
||||
b->data_mode != DATA_MODE_VMALLOC)
|
||||
use_inline_bio(b, rw, block, end_io);
|
||||
use_inline_bio(b, rw, sector, n_sectors, end_io);
|
||||
else
|
||||
use_dmio(b, rw, block, end_io);
|
||||
use_dmio(b, rw, sector, n_sectors, end_io);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------
|
||||
|
@ -713,7 +720,7 @@ static void __write_dirty_buffer(struct dm_buffer *b,
|
|||
wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE);
|
||||
|
||||
if (!write_list)
|
||||
submit_io(b, WRITE, b->block, write_endio);
|
||||
submit_io(b, WRITE, write_endio);
|
||||
else
|
||||
list_add_tail(&b->write_list, write_list);
|
||||
}
|
||||
|
@ -726,7 +733,7 @@ static void __flush_write_list(struct list_head *write_list)
|
|||
struct dm_buffer *b =
|
||||
list_entry(write_list->next, struct dm_buffer, write_list);
|
||||
list_del(&b->write_list);
|
||||
submit_io(b, WRITE, b->block, write_endio);
|
||||
submit_io(b, WRITE, write_endio);
|
||||
cond_resched();
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
@ -933,10 +940,11 @@ static void __get_memory_limit(struct dm_bufio_client *c,
|
|||
{
|
||||
unsigned long buffers;
|
||||
|
||||
if (ACCESS_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch) {
|
||||
mutex_lock(&dm_bufio_clients_lock);
|
||||
__cache_size_refresh();
|
||||
mutex_unlock(&dm_bufio_clients_lock);
|
||||
if (unlikely(ACCESS_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) {
|
||||
if (mutex_trylock(&dm_bufio_clients_lock)) {
|
||||
__cache_size_refresh();
|
||||
mutex_unlock(&dm_bufio_clients_lock);
|
||||
}
|
||||
}
|
||||
|
||||
buffers = dm_bufio_cache_size_per_client >>
|
||||
|
@ -1094,7 +1102,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
|
|||
return NULL;
|
||||
|
||||
if (need_submit)
|
||||
submit_io(b, READ, b->block, read_endio);
|
||||
submit_io(b, READ, read_endio);
|
||||
|
||||
wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
|
||||
|
||||
|
@ -1164,7 +1172,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
|
|||
dm_bufio_unlock(c);
|
||||
|
||||
if (need_submit)
|
||||
submit_io(b, READ, b->block, read_endio);
|
||||
submit_io(b, READ, read_endio);
|
||||
dm_bufio_release(b);
|
||||
|
||||
cond_resched();
|
||||
|
@ -1405,7 +1413,7 @@ retry:
|
|||
old_block = b->block;
|
||||
__unlink_buffer(b);
|
||||
__link_buffer(b, new_block, b->list_mode);
|
||||
submit_io(b, WRITE, new_block, write_endio);
|
||||
submit_io(b, WRITE, write_endio);
|
||||
wait_on_bit_io(&b->state, B_WRITING,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
__unlink_buffer(b);
|
||||
|
@ -1762,6 +1770,12 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
|
||||
|
||||
void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start)
|
||||
{
|
||||
c->start = start;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset);
|
||||
|
||||
static unsigned get_max_age_hz(void)
|
||||
{
|
||||
unsigned max_age = ACCESS_ONCE(dm_bufio_max_age);
|
||||
|
@ -1782,9 +1796,17 @@ static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
|
|||
struct dm_buffer *b, *tmp;
|
||||
unsigned retain_target = get_retain_buffers(c);
|
||||
unsigned count;
|
||||
LIST_HEAD(write_list);
|
||||
|
||||
dm_bufio_lock(c);
|
||||
|
||||
__check_watermark(c, &write_list);
|
||||
if (unlikely(!list_empty(&write_list))) {
|
||||
dm_bufio_unlock(c);
|
||||
__flush_write_list(&write_list);
|
||||
dm_bufio_lock(c);
|
||||
}
|
||||
|
||||
count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
|
||||
list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) {
|
||||
if (count <= retain_target)
|
||||
|
@ -1809,6 +1831,8 @@ static void cleanup_old_buffers(void)
|
|||
|
||||
mutex_lock(&dm_bufio_clients_lock);
|
||||
|
||||
__cache_size_refresh();
|
||||
|
||||
list_for_each_entry(c, &dm_bufio_all_clients, client_list)
|
||||
__evict_old_buffers(c, max_age_hz);
|
||||
|
||||
|
|
|
@ -31,6 +31,13 @@ dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
|
|||
*/
|
||||
void dm_bufio_client_destroy(struct dm_bufio_client *c);
|
||||
|
||||
/*
|
||||
* Set the sector range.
|
||||
* When this function is called, there must be no I/O in progress on the bufio
|
||||
* client.
|
||||
*/
|
||||
void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start);
|
||||
|
||||
/*
|
||||
* WARNING: to avoid deadlocks, these conditions are observed:
|
||||
*
|
||||
|
|
|
@ -0,0 +1,238 @@
|
|||
/*
|
||||
* Copyright (C) 2017 Red Hat. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm-cache-background-tracker.h"
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
#define DM_MSG_PREFIX "dm-background-tracker"
|
||||
|
||||
struct bt_work {
|
||||
struct list_head list;
|
||||
struct rb_node node;
|
||||
struct policy_work work;
|
||||
};
|
||||
|
||||
struct background_tracker {
|
||||
unsigned max_work;
|
||||
atomic_t pending_promotes;
|
||||
atomic_t pending_writebacks;
|
||||
atomic_t pending_demotes;
|
||||
|
||||
struct list_head issued;
|
||||
struct list_head queued;
|
||||
struct rb_root pending;
|
||||
|
||||
struct kmem_cache *work_cache;
|
||||
};
|
||||
|
||||
struct background_tracker *btracker_create(unsigned max_work)
|
||||
{
|
||||
struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
|
||||
|
||||
b->max_work = max_work;
|
||||
atomic_set(&b->pending_promotes, 0);
|
||||
atomic_set(&b->pending_writebacks, 0);
|
||||
atomic_set(&b->pending_demotes, 0);
|
||||
|
||||
INIT_LIST_HEAD(&b->issued);
|
||||
INIT_LIST_HEAD(&b->queued);
|
||||
|
||||
b->pending = RB_ROOT;
|
||||
b->work_cache = KMEM_CACHE(bt_work, 0);
|
||||
if (!b->work_cache) {
|
||||
DMERR("couldn't create mempool for background work items");
|
||||
kfree(b);
|
||||
b = NULL;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(btracker_create);
|
||||
|
||||
void btracker_destroy(struct background_tracker *b)
|
||||
{
|
||||
kmem_cache_destroy(b->work_cache);
|
||||
kfree(b);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(btracker_destroy);
|
||||
|
||||
static int cmp_oblock(dm_oblock_t lhs, dm_oblock_t rhs)
|
||||
{
|
||||
if (from_oblock(lhs) < from_oblock(rhs))
|
||||
return -1;
|
||||
|
||||
if (from_oblock(rhs) < from_oblock(lhs))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool __insert_pending(struct background_tracker *b,
|
||||
struct bt_work *nw)
|
||||
{
|
||||
int cmp;
|
||||
struct bt_work *w;
|
||||
struct rb_node **new = &b->pending.rb_node, *parent = NULL;
|
||||
|
||||
while (*new) {
|
||||
w = container_of(*new, struct bt_work, node);
|
||||
|
||||
parent = *new;
|
||||
cmp = cmp_oblock(w->work.oblock, nw->work.oblock);
|
||||
if (cmp < 0)
|
||||
new = &((*new)->rb_left);
|
||||
|
||||
else if (cmp > 0)
|
||||
new = &((*new)->rb_right);
|
||||
|
||||
else
|
||||
/* already present */
|
||||
return false;
|
||||
}
|
||||
|
||||
rb_link_node(&nw->node, parent, new);
|
||||
rb_insert_color(&nw->node, &b->pending);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct bt_work *__find_pending(struct background_tracker *b,
|
||||
dm_oblock_t oblock)
|
||||
{
|
||||
int cmp;
|
||||
struct bt_work *w;
|
||||
struct rb_node **new = &b->pending.rb_node;
|
||||
|
||||
while (*new) {
|
||||
w = container_of(*new, struct bt_work, node);
|
||||
|
||||
cmp = cmp_oblock(w->work.oblock, oblock);
|
||||
if (cmp < 0)
|
||||
new = &((*new)->rb_left);
|
||||
|
||||
else if (cmp > 0)
|
||||
new = &((*new)->rb_right);
|
||||
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return *new ? w : NULL;
|
||||
}
|
||||
|
||||
|
||||
static void update_stats(struct background_tracker *b, struct policy_work *w, int delta)
|
||||
{
|
||||
switch (w->op) {
|
||||
case POLICY_PROMOTE:
|
||||
atomic_add(delta, &b->pending_promotes);
|
||||
break;
|
||||
|
||||
case POLICY_DEMOTE:
|
||||
atomic_add(delta, &b->pending_demotes);
|
||||
break;
|
||||
|
||||
case POLICY_WRITEBACK:
|
||||
atomic_add(delta, &b->pending_writebacks);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned btracker_nr_writebacks_queued(struct background_tracker *b)
|
||||
{
|
||||
return atomic_read(&b->pending_writebacks);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(btracker_nr_writebacks_queued);
|
||||
|
||||
unsigned btracker_nr_demotions_queued(struct background_tracker *b)
|
||||
{
|
||||
return atomic_read(&b->pending_demotes);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(btracker_nr_demotions_queued);
|
||||
|
||||
static bool max_work_reached(struct background_tracker *b)
|
||||
{
|
||||
// FIXME: finish
|
||||
return false;
|
||||
}
|
||||
|
||||
int btracker_queue(struct background_tracker *b,
|
||||
struct policy_work *work,
|
||||
struct policy_work **pwork)
|
||||
{
|
||||
struct bt_work *w;
|
||||
|
||||
if (pwork)
|
||||
*pwork = NULL;
|
||||
|
||||
if (max_work_reached(b))
|
||||
return -ENOMEM;
|
||||
|
||||
w = kmem_cache_alloc(b->work_cache, GFP_NOWAIT);
|
||||
if (!w)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(&w->work, work, sizeof(*work));
|
||||
|
||||
if (!__insert_pending(b, w)) {
|
||||
/*
|
||||
* There was a race, we'll just ignore this second
|
||||
* bit of work for the same oblock.
|
||||
*/
|
||||
kmem_cache_free(b->work_cache, w);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (pwork) {
|
||||
*pwork = &w->work;
|
||||
list_add(&w->list, &b->issued);
|
||||
} else
|
||||
list_add(&w->list, &b->queued);
|
||||
update_stats(b, &w->work, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(btracker_queue);
|
||||
|
||||
/*
|
||||
* Returns -ENODATA if there's no work.
|
||||
*/
|
||||
int btracker_issue(struct background_tracker *b, struct policy_work **work)
|
||||
{
|
||||
struct bt_work *w;
|
||||
|
||||
if (list_empty(&b->queued))
|
||||
return -ENODATA;
|
||||
|
||||
w = list_first_entry(&b->queued, struct bt_work, list);
|
||||
list_move(&w->list, &b->issued);
|
||||
*work = &w->work;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(btracker_issue);
|
||||
|
||||
void btracker_complete(struct background_tracker *b,
|
||||
struct policy_work *op)
|
||||
{
|
||||
struct bt_work *w = container_of(op, struct bt_work, work);
|
||||
|
||||
update_stats(b, &w->work, -1);
|
||||
rb_erase(&w->node, &b->pending);
|
||||
list_del(&w->list);
|
||||
kmem_cache_free(b->work_cache, w);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(btracker_complete);
|
||||
|
||||
bool btracker_promotion_already_present(struct background_tracker *b,
|
||||
dm_oblock_t oblock)
|
||||
{
|
||||
return __find_pending(b, oblock) != NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(btracker_promotion_already_present);
|
||||
|
||||
/*----------------------------------------------------------------*/
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (C) 2017 Red Hat. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#ifndef DM_CACHE_BACKGROUND_WORK_H
|
||||
#define DM_CACHE_BACKGROUND_WORK_H
|
||||
|
||||
#include <linux/vmalloc.h>
|
||||
#include "dm-cache-policy.h"
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
struct background_work;
|
||||
struct background_tracker;
|
||||
|
||||
/*
|
||||
* FIXME: discuss lack of locking in all methods.
|
||||
*/
|
||||
struct background_tracker *btracker_create(unsigned max_work);
|
||||
void btracker_destroy(struct background_tracker *b);
|
||||
|
||||
unsigned btracker_nr_writebacks_queued(struct background_tracker *b);
|
||||
unsigned btracker_nr_demotions_queued(struct background_tracker *b);
|
||||
|
||||
/*
|
||||
* returns -EINVAL iff the work is already queued. -ENOMEM if the work
|
||||
* couldn't be queued for another reason.
|
||||
*/
|
||||
int btracker_queue(struct background_tracker *b,
|
||||
struct policy_work *work,
|
||||
struct policy_work **pwork);
|
||||
|
||||
/*
|
||||
* Returns -ENODATA if there's no work.
|
||||
*/
|
||||
int btracker_issue(struct background_tracker *b, struct policy_work **work);
|
||||
void btracker_complete(struct background_tracker *b,
|
||||
struct policy_work *op);
|
||||
bool btracker_promotion_already_present(struct background_tracker *b,
|
||||
dm_oblock_t oblock);
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
#endif
|
|
@ -27,8 +27,6 @@
|
|||
#define MIN_CACHE_VERSION 1
|
||||
#define MAX_CACHE_VERSION 2
|
||||
|
||||
#define CACHE_METADATA_CACHE_SIZE 64
|
||||
|
||||
/*
|
||||
* 3 for btree insert +
|
||||
* 2 for btree lookup used within space map
|
||||
|
@ -535,7 +533,6 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
|
|||
{
|
||||
int r;
|
||||
cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
|
||||
CACHE_METADATA_CACHE_SIZE,
|
||||
CACHE_MAX_CONCURRENT_LOCKS);
|
||||
if (IS_ERR(cmd->bm)) {
|
||||
DMERR("could not create block manager");
|
||||
|
|
|
@ -50,6 +50,8 @@
|
|||
#define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL
|
||||
#define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL
|
||||
|
||||
struct dm_cache_metadata;
|
||||
|
||||
/*
|
||||
* Reopens or creates a new, empty metadata volume. Returns an ERR_PTR on
|
||||
* failure. If reopening then features must match.
|
||||
|
|
|
@ -1,469 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2012 Red Hat. All rights reserved.
|
||||
*
|
||||
* writeback cache policy supporting flushing out dirty cache blocks.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
||||
#include "dm-cache-policy.h"
|
||||
#include "dm.h"
|
||||
|
||||
#include <linux/hash.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
#define DM_MSG_PREFIX "cache cleaner"
|
||||
|
||||
/* Cache entry struct. */
|
||||
struct wb_cache_entry {
|
||||
struct list_head list;
|
||||
struct hlist_node hlist;
|
||||
|
||||
dm_oblock_t oblock;
|
||||
dm_cblock_t cblock;
|
||||
bool dirty:1;
|
||||
bool pending:1;
|
||||
};
|
||||
|
||||
struct hash {
|
||||
struct hlist_head *table;
|
||||
dm_block_t hash_bits;
|
||||
unsigned nr_buckets;
|
||||
};
|
||||
|
||||
struct policy {
|
||||
struct dm_cache_policy policy;
|
||||
spinlock_t lock;
|
||||
|
||||
struct list_head free;
|
||||
struct list_head clean;
|
||||
struct list_head clean_pending;
|
||||
struct list_head dirty;
|
||||
|
||||
/*
|
||||
* We know exactly how many cblocks will be needed,
|
||||
* so we can allocate them up front.
|
||||
*/
|
||||
dm_cblock_t cache_size, nr_cblocks_allocated;
|
||||
struct wb_cache_entry *cblocks;
|
||||
struct hash chash;
|
||||
};
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* Low-level functions.
|
||||
*/
|
||||
static unsigned next_power(unsigned n, unsigned min)
|
||||
{
|
||||
return roundup_pow_of_two(max(n, min));
|
||||
}
|
||||
|
||||
static struct policy *to_policy(struct dm_cache_policy *p)
|
||||
{
|
||||
return container_of(p, struct policy, policy);
|
||||
}
|
||||
|
||||
static struct list_head *list_pop(struct list_head *q)
|
||||
{
|
||||
struct list_head *r = q->next;
|
||||
|
||||
list_del(r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
/* Allocate/free various resources. */
|
||||
static int alloc_hash(struct hash *hash, unsigned elts)
|
||||
{
|
||||
hash->nr_buckets = next_power(elts >> 4, 16);
|
||||
hash->hash_bits = __ffs(hash->nr_buckets);
|
||||
hash->table = vzalloc(sizeof(*hash->table) * hash->nr_buckets);
|
||||
|
||||
return hash->table ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
static void free_hash(struct hash *hash)
|
||||
{
|
||||
vfree(hash->table);
|
||||
}
|
||||
|
||||
static int alloc_cache_blocks_with_hash(struct policy *p, dm_cblock_t cache_size)
|
||||
{
|
||||
int r = -ENOMEM;
|
||||
|
||||
p->cblocks = vzalloc(sizeof(*p->cblocks) * from_cblock(cache_size));
|
||||
if (p->cblocks) {
|
||||
unsigned u = from_cblock(cache_size);
|
||||
|
||||
while (u--)
|
||||
list_add(&p->cblocks[u].list, &p->free);
|
||||
|
||||
p->nr_cblocks_allocated = 0;
|
||||
|
||||
/* Cache entries hash. */
|
||||
r = alloc_hash(&p->chash, from_cblock(cache_size));
|
||||
if (r)
|
||||
vfree(p->cblocks);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void free_cache_blocks_and_hash(struct policy *p)
|
||||
{
|
||||
free_hash(&p->chash);
|
||||
vfree(p->cblocks);
|
||||
}
|
||||
|
||||
static struct wb_cache_entry *alloc_cache_entry(struct policy *p)
|
||||
{
|
||||
struct wb_cache_entry *e;
|
||||
|
||||
BUG_ON(from_cblock(p->nr_cblocks_allocated) >= from_cblock(p->cache_size));
|
||||
|
||||
e = list_entry(list_pop(&p->free), struct wb_cache_entry, list);
|
||||
p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) + 1);
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
/* Hash functions (lookup, insert, remove). */
|
||||
static struct wb_cache_entry *lookup_cache_entry(struct policy *p, dm_oblock_t oblock)
|
||||
{
|
||||
struct hash *hash = &p->chash;
|
||||
unsigned h = hash_64(from_oblock(oblock), hash->hash_bits);
|
||||
struct wb_cache_entry *cur;
|
||||
struct hlist_head *bucket = &hash->table[h];
|
||||
|
||||
hlist_for_each_entry(cur, bucket, hlist) {
|
||||
if (cur->oblock == oblock) {
|
||||
/* Move upfront bucket for faster access. */
|
||||
hlist_del(&cur->hlist);
|
||||
hlist_add_head(&cur->hlist, bucket);
|
||||
return cur;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void insert_cache_hash_entry(struct policy *p, struct wb_cache_entry *e)
|
||||
{
|
||||
unsigned h = hash_64(from_oblock(e->oblock), p->chash.hash_bits);
|
||||
|
||||
hlist_add_head(&e->hlist, &p->chash.table[h]);
|
||||
}
|
||||
|
||||
static void remove_cache_hash_entry(struct wb_cache_entry *e)
|
||||
{
|
||||
hlist_del(&e->hlist);
|
||||
}
|
||||
|
||||
/* Public interface (see dm-cache-policy.h */
|
||||
static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock,
|
||||
bool can_block, bool can_migrate, bool discarded_oblock,
|
||||
struct bio *bio, struct policy_locker *locker,
|
||||
struct policy_result *result)
|
||||
{
|
||||
struct policy *p = to_policy(pe);
|
||||
struct wb_cache_entry *e;
|
||||
unsigned long flags;
|
||||
|
||||
result->op = POLICY_MISS;
|
||||
|
||||
if (can_block)
|
||||
spin_lock_irqsave(&p->lock, flags);
|
||||
|
||||
else if (!spin_trylock_irqsave(&p->lock, flags))
|
||||
return -EWOULDBLOCK;
|
||||
|
||||
e = lookup_cache_entry(p, oblock);
|
||||
if (e) {
|
||||
result->op = POLICY_HIT;
|
||||
result->cblock = e->cblock;
|
||||
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&p->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int wb_lookup(struct dm_cache_policy *pe, dm_oblock_t oblock, dm_cblock_t *cblock)
|
||||
{
|
||||
int r;
|
||||
struct policy *p = to_policy(pe);
|
||||
struct wb_cache_entry *e;
|
||||
unsigned long flags;
|
||||
|
||||
if (!spin_trylock_irqsave(&p->lock, flags))
|
||||
return -EWOULDBLOCK;
|
||||
|
||||
e = lookup_cache_entry(p, oblock);
|
||||
if (e) {
|
||||
*cblock = e->cblock;
|
||||
r = 0;
|
||||
|
||||
} else
|
||||
r = -ENOENT;
|
||||
|
||||
spin_unlock_irqrestore(&p->lock, flags);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void __set_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock, bool set)
|
||||
{
|
||||
struct policy *p = to_policy(pe);
|
||||
struct wb_cache_entry *e;
|
||||
|
||||
e = lookup_cache_entry(p, oblock);
|
||||
BUG_ON(!e);
|
||||
|
||||
if (set) {
|
||||
if (!e->dirty) {
|
||||
e->dirty = true;
|
||||
list_move(&e->list, &p->dirty);
|
||||
}
|
||||
|
||||
} else {
|
||||
if (e->dirty) {
|
||||
e->pending = false;
|
||||
e->dirty = false;
|
||||
list_move(&e->list, &p->clean);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void wb_set_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock)
|
||||
{
|
||||
struct policy *p = to_policy(pe);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&p->lock, flags);
|
||||
__set_clear_dirty(pe, oblock, true);
|
||||
spin_unlock_irqrestore(&p->lock, flags);
|
||||
}
|
||||
|
||||
static void wb_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock)
|
||||
{
|
||||
struct policy *p = to_policy(pe);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&p->lock, flags);
|
||||
__set_clear_dirty(pe, oblock, false);
|
||||
spin_unlock_irqrestore(&p->lock, flags);
|
||||
}
|
||||
|
||||
static void add_cache_entry(struct policy *p, struct wb_cache_entry *e)
|
||||
{
|
||||
insert_cache_hash_entry(p, e);
|
||||
if (e->dirty)
|
||||
list_add(&e->list, &p->dirty);
|
||||
else
|
||||
list_add(&e->list, &p->clean);
|
||||
}
|
||||
|
||||
static int wb_load_mapping(struct dm_cache_policy *pe,
|
||||
dm_oblock_t oblock, dm_cblock_t cblock,
|
||||
uint32_t hint, bool hint_valid)
|
||||
{
|
||||
int r;
|
||||
struct policy *p = to_policy(pe);
|
||||
struct wb_cache_entry *e = alloc_cache_entry(p);
|
||||
|
||||
if (e) {
|
||||
e->cblock = cblock;
|
||||
e->oblock = oblock;
|
||||
e->dirty = false; /* blocks default to clean */
|
||||
add_cache_entry(p, e);
|
||||
r = 0;
|
||||
|
||||
} else
|
||||
r = -ENOMEM;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void wb_destroy(struct dm_cache_policy *pe)
|
||||
{
|
||||
struct policy *p = to_policy(pe);
|
||||
|
||||
free_cache_blocks_and_hash(p);
|
||||
kfree(p);
|
||||
}
|
||||
|
||||
static struct wb_cache_entry *__wb_force_remove_mapping(struct policy *p, dm_oblock_t oblock)
|
||||
{
|
||||
struct wb_cache_entry *r = lookup_cache_entry(p, oblock);
|
||||
|
||||
BUG_ON(!r);
|
||||
|
||||
remove_cache_hash_entry(r);
|
||||
list_del(&r->list);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void wb_remove_mapping(struct dm_cache_policy *pe, dm_oblock_t oblock)
|
||||
{
|
||||
struct policy *p = to_policy(pe);
|
||||
struct wb_cache_entry *e;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&p->lock, flags);
|
||||
e = __wb_force_remove_mapping(p, oblock);
|
||||
list_add_tail(&e->list, &p->free);
|
||||
BUG_ON(!from_cblock(p->nr_cblocks_allocated));
|
||||
p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) - 1);
|
||||
spin_unlock_irqrestore(&p->lock, flags);
|
||||
}
|
||||
|
||||
static void wb_force_mapping(struct dm_cache_policy *pe,
|
||||
dm_oblock_t current_oblock, dm_oblock_t oblock)
|
||||
{
|
||||
struct policy *p = to_policy(pe);
|
||||
struct wb_cache_entry *e;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&p->lock, flags);
|
||||
e = __wb_force_remove_mapping(p, current_oblock);
|
||||
e->oblock = oblock;
|
||||
add_cache_entry(p, e);
|
||||
spin_unlock_irqrestore(&p->lock, flags);
|
||||
}
|
||||
|
||||
static struct wb_cache_entry *get_next_dirty_entry(struct policy *p)
|
||||
{
|
||||
struct list_head *l;
|
||||
struct wb_cache_entry *r;
|
||||
|
||||
if (list_empty(&p->dirty))
|
||||
return NULL;
|
||||
|
||||
l = list_pop(&p->dirty);
|
||||
r = container_of(l, struct wb_cache_entry, list);
|
||||
list_add(l, &p->clean_pending);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int wb_writeback_work(struct dm_cache_policy *pe,
|
||||
dm_oblock_t *oblock,
|
||||
dm_cblock_t *cblock,
|
||||
bool critical_only)
|
||||
{
|
||||
int r = -ENOENT;
|
||||
struct policy *p = to_policy(pe);
|
||||
struct wb_cache_entry *e;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&p->lock, flags);
|
||||
|
||||
e = get_next_dirty_entry(p);
|
||||
if (e) {
|
||||
*oblock = e->oblock;
|
||||
*cblock = e->cblock;
|
||||
r = 0;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&p->lock, flags);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static dm_cblock_t wb_residency(struct dm_cache_policy *pe)
|
||||
{
|
||||
return to_policy(pe)->nr_cblocks_allocated;
|
||||
}
|
||||
|
||||
/* Init the policy plugin interface function pointers. */
|
||||
static void init_policy_functions(struct policy *p)
|
||||
{
|
||||
p->policy.destroy = wb_destroy;
|
||||
p->policy.map = wb_map;
|
||||
p->policy.lookup = wb_lookup;
|
||||
p->policy.set_dirty = wb_set_dirty;
|
||||
p->policy.clear_dirty = wb_clear_dirty;
|
||||
p->policy.load_mapping = wb_load_mapping;
|
||||
p->policy.get_hint = NULL;
|
||||
p->policy.remove_mapping = wb_remove_mapping;
|
||||
p->policy.writeback_work = wb_writeback_work;
|
||||
p->policy.force_mapping = wb_force_mapping;
|
||||
p->policy.residency = wb_residency;
|
||||
p->policy.tick = NULL;
|
||||
}
|
||||
|
||||
static struct dm_cache_policy *wb_create(dm_cblock_t cache_size,
|
||||
sector_t origin_size,
|
||||
sector_t cache_block_size)
|
||||
{
|
||||
int r;
|
||||
struct policy *p = kzalloc(sizeof(*p), GFP_KERNEL);
|
||||
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
init_policy_functions(p);
|
||||
INIT_LIST_HEAD(&p->free);
|
||||
INIT_LIST_HEAD(&p->clean);
|
||||
INIT_LIST_HEAD(&p->clean_pending);
|
||||
INIT_LIST_HEAD(&p->dirty);
|
||||
|
||||
p->cache_size = cache_size;
|
||||
spin_lock_init(&p->lock);
|
||||
|
||||
/* Allocate cache entry structs and add them to free list. */
|
||||
r = alloc_cache_blocks_with_hash(p, cache_size);
|
||||
if (!r)
|
||||
return &p->policy;
|
||||
|
||||
kfree(p);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
static struct dm_cache_policy_type wb_policy_type = {
|
||||
.name = "cleaner",
|
||||
.version = {1, 0, 0},
|
||||
.hint_size = 4,
|
||||
.owner = THIS_MODULE,
|
||||
.create = wb_create
|
||||
};
|
||||
|
||||
static int __init wb_init(void)
|
||||
{
|
||||
int r = dm_cache_policy_register(&wb_policy_type);
|
||||
|
||||
if (r < 0)
|
||||
DMERR("register failed %d", r);
|
||||
else
|
||||
DMINFO("version %u.%u.%u loaded",
|
||||
wb_policy_type.version[0],
|
||||
wb_policy_type.version[1],
|
||||
wb_policy_type.version[2]);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void __exit wb_exit(void)
|
||||
{
|
||||
dm_cache_policy_unregister(&wb_policy_type);
|
||||
}
|
||||
|
||||
module_init(wb_init);
|
||||
module_exit(wb_exit);
|
||||
|
||||
MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("cleaner cache policy");
|
|
@ -12,40 +12,59 @@
|
|||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* Little inline functions that simplify calling the policy methods.
|
||||
*/
|
||||
static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock,
|
||||
bool can_block, bool can_migrate, bool discarded_oblock,
|
||||
struct bio *bio, struct policy_locker *locker,
|
||||
struct policy_result *result)
|
||||
static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock,
|
||||
int data_dir, bool fast_copy, bool *background_queued)
|
||||
{
|
||||
return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, locker, result);
|
||||
return p->lookup(p, oblock, cblock, data_dir, fast_copy, background_queued);
|
||||
}
|
||||
|
||||
static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
|
||||
static inline int policy_lookup_with_work(struct dm_cache_policy *p,
|
||||
dm_oblock_t oblock, dm_cblock_t *cblock,
|
||||
int data_dir, bool fast_copy,
|
||||
struct policy_work **work)
|
||||
{
|
||||
BUG_ON(!p->lookup);
|
||||
return p->lookup(p, oblock, cblock);
|
||||
if (!p->lookup_with_work) {
|
||||
*work = NULL;
|
||||
return p->lookup(p, oblock, cblock, data_dir, fast_copy, NULL);
|
||||
}
|
||||
|
||||
return p->lookup_with_work(p, oblock, cblock, data_dir, fast_copy, work);
|
||||
}
|
||||
|
||||
static inline void policy_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
|
||||
static inline int policy_get_background_work(struct dm_cache_policy *p,
|
||||
bool idle, struct policy_work **result)
|
||||
{
|
||||
if (p->set_dirty)
|
||||
p->set_dirty(p, oblock);
|
||||
return p->get_background_work(p, idle, result);
|
||||
}
|
||||
|
||||
static inline void policy_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
|
||||
static inline void policy_complete_background_work(struct dm_cache_policy *p,
|
||||
struct policy_work *work,
|
||||
bool success)
|
||||
{
|
||||
if (p->clear_dirty)
|
||||
p->clear_dirty(p, oblock);
|
||||
return p->complete_background_work(p, work, success);
|
||||
}
|
||||
|
||||
static inline void policy_set_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
|
||||
{
|
||||
p->set_dirty(p, cblock);
|
||||
}
|
||||
|
||||
static inline void policy_clear_dirty(struct dm_cache_policy *p, dm_cblock_t cblock)
|
||||
{
|
||||
p->clear_dirty(p, cblock);
|
||||
}
|
||||
|
||||
static inline int policy_load_mapping(struct dm_cache_policy *p,
|
||||
dm_oblock_t oblock, dm_cblock_t cblock,
|
||||
uint32_t hint, bool hint_valid)
|
||||
bool dirty, uint32_t hint, bool hint_valid)
|
||||
{
|
||||
return p->load_mapping(p, oblock, cblock, hint, hint_valid);
|
||||
return p->load_mapping(p, oblock, cblock, dirty, hint, hint_valid);
|
||||
}
|
||||
|
||||
static inline int policy_invalidate_mapping(struct dm_cache_policy *p,
|
||||
dm_cblock_t cblock)
|
||||
{
|
||||
return p->invalidate_mapping(p, cblock);
|
||||
}
|
||||
|
||||
static inline uint32_t policy_get_hint(struct dm_cache_policy *p,
|
||||
|
@ -54,30 +73,6 @@ static inline uint32_t policy_get_hint(struct dm_cache_policy *p,
|
|||
return p->get_hint ? p->get_hint(p, cblock) : 0;
|
||||
}
|
||||
|
||||
static inline int policy_writeback_work(struct dm_cache_policy *p,
|
||||
dm_oblock_t *oblock,
|
||||
dm_cblock_t *cblock,
|
||||
bool critical_only)
|
||||
{
|
||||
return p->writeback_work ? p->writeback_work(p, oblock, cblock, critical_only) : -ENOENT;
|
||||
}
|
||||
|
||||
static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
|
||||
{
|
||||
p->remove_mapping(p, oblock);
|
||||
}
|
||||
|
||||
static inline int policy_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
|
||||
{
|
||||
return p->remove_cblock(p, cblock);
|
||||
}
|
||||
|
||||
static inline void policy_force_mapping(struct dm_cache_policy *p,
|
||||
dm_oblock_t current_oblock, dm_oblock_t new_oblock)
|
||||
{
|
||||
return p->force_mapping(p, current_oblock, new_oblock);
|
||||
}
|
||||
|
||||
static inline dm_cblock_t policy_residency(struct dm_cache_policy *p)
|
||||
{
|
||||
return p->residency(p);
|
||||
|
@ -107,6 +102,11 @@ static inline int policy_set_config_value(struct dm_cache_policy *p,
|
|||
return p->set_config_value ? p->set_config_value(p, key, value) : -EINVAL;
|
||||
}
|
||||
|
||||
static inline void policy_allow_migrations(struct dm_cache_policy *p, bool allow)
|
||||
{
|
||||
return p->allow_migrations(p, allow);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -13,147 +13,94 @@
|
|||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
/* FIXME: make it clear which methods are optional. Get debug policy to
|
||||
* double check this at start.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The cache policy makes the important decisions about which blocks get to
|
||||
* live on the faster cache device.
|
||||
*
|
||||
* When the core target has to remap a bio it calls the 'map' method of the
|
||||
* policy. This returns an instruction telling the core target what to do.
|
||||
*
|
||||
* POLICY_HIT:
|
||||
* That block is in the cache. Remap to the cache and carry on.
|
||||
*
|
||||
* POLICY_MISS:
|
||||
* This block is on the origin device. Remap and carry on.
|
||||
*
|
||||
* POLICY_NEW:
|
||||
* This block is currently on the origin device, but the policy wants to
|
||||
* move it. The core should:
|
||||
*
|
||||
* - hold any further io to this origin block
|
||||
* - copy the origin to the given cache block
|
||||
* - release all the held blocks
|
||||
* - remap the original block to the cache
|
||||
*
|
||||
* POLICY_REPLACE:
|
||||
* This block is currently on the origin device. The policy wants to
|
||||
* move it to the cache, with the added complication that the destination
|
||||
* cache block needs a writeback first. The core should:
|
||||
*
|
||||
* - hold any further io to this origin block
|
||||
* - hold any further io to the origin block that's being written back
|
||||
* - writeback
|
||||
* - copy new block to cache
|
||||
* - release held blocks
|
||||
* - remap bio to cache and reissue.
|
||||
*
|
||||
* Should the core run into trouble while processing a POLICY_NEW or
|
||||
* POLICY_REPLACE instruction it will roll back the policies mapping using
|
||||
* remove_mapping() or force_mapping(). These methods must not fail. This
|
||||
* approach avoids having transactional semantics in the policy (ie, the
|
||||
* core informing the policy when a migration is complete), and hence makes
|
||||
* it easier to write new policies.
|
||||
*
|
||||
* In general policy methods should never block, except in the case of the
|
||||
* map function when can_migrate is set. So be careful to implement using
|
||||
* bounded, preallocated memory.
|
||||
*/
|
||||
enum policy_operation {
|
||||
POLICY_HIT,
|
||||
POLICY_MISS,
|
||||
POLICY_NEW,
|
||||
POLICY_REPLACE
|
||||
};
|
||||
|
||||
/*
|
||||
* When issuing a POLICY_REPLACE the policy needs to make a callback to
|
||||
* lock the block being demoted. This doesn't need to occur during a
|
||||
* writeback operation since the block remains in the cache.
|
||||
*/
|
||||
struct policy_locker;
|
||||
typedef int (*policy_lock_fn)(struct policy_locker *l, dm_oblock_t oblock);
|
||||
|
||||
struct policy_locker {
|
||||
policy_lock_fn fn;
|
||||
POLICY_PROMOTE,
|
||||
POLICY_DEMOTE,
|
||||
POLICY_WRITEBACK
|
||||
};
|
||||
|
||||
/*
|
||||
* This is the instruction passed back to the core target.
|
||||
*/
|
||||
struct policy_result {
|
||||
struct policy_work {
|
||||
enum policy_operation op;
|
||||
dm_oblock_t old_oblock; /* POLICY_REPLACE */
|
||||
dm_cblock_t cblock; /* POLICY_HIT, POLICY_NEW, POLICY_REPLACE */
|
||||
dm_oblock_t oblock;
|
||||
dm_cblock_t cblock;
|
||||
};
|
||||
|
||||
/*
|
||||
* The cache policy object. Just a bunch of methods. It is envisaged that
|
||||
* this structure will be embedded in a bigger, policy specific structure
|
||||
* (ie. use container_of()).
|
||||
* The cache policy object. It is envisaged that this structure will be
|
||||
* embedded in a bigger, policy specific structure (ie. use container_of()).
|
||||
*/
|
||||
struct dm_cache_policy {
|
||||
|
||||
/*
|
||||
* FIXME: make it clear which methods are optional, and which may
|
||||
* block.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Destroys this object.
|
||||
*/
|
||||
void (*destroy)(struct dm_cache_policy *p);
|
||||
|
||||
/*
|
||||
* See large comment above.
|
||||
*
|
||||
* oblock - the origin block we're interested in.
|
||||
*
|
||||
* can_block - indicates whether the current thread is allowed to
|
||||
* block. -EWOULDBLOCK returned if it can't and would.
|
||||
*
|
||||
* can_migrate - gives permission for POLICY_NEW or POLICY_REPLACE
|
||||
* instructions. If denied and the policy would have
|
||||
* returned one of these instructions it should
|
||||
* return -EWOULDBLOCK.
|
||||
*
|
||||
* discarded_oblock - indicates whether the whole origin block is
|
||||
* in a discarded state (FIXME: better to tell the
|
||||
* policy about this sooner, so it can recycle that
|
||||
* cache block if it wants.)
|
||||
* bio - the bio that triggered this call.
|
||||
* result - gets filled in with the instruction.
|
||||
*
|
||||
* May only return 0, or -EWOULDBLOCK (if !can_migrate)
|
||||
*/
|
||||
int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock,
|
||||
bool can_block, bool can_migrate, bool discarded_oblock,
|
||||
struct bio *bio, struct policy_locker *locker,
|
||||
struct policy_result *result);
|
||||
|
||||
/*
|
||||
* Sometimes we want to see if a block is in the cache, without
|
||||
* triggering any update of stats. (ie. it's not a real hit).
|
||||
* Find the location of a block.
|
||||
*
|
||||
* Must not block.
|
||||
*
|
||||
* Returns 0 if in cache, -ENOENT if not, < 0 for other errors
|
||||
* (-EWOULDBLOCK would be typical).
|
||||
* Returns 0 if in cache (cblock will be set), -ENOENT if not, < 0 for
|
||||
* other errors (-EWOULDBLOCK would be typical). data_dir should be
|
||||
* READ or WRITE. fast_copy should be set if migrating this block would
|
||||
* be 'cheap' somehow (eg, discarded data). background_queued will be set
|
||||
* if a migration has just been queued.
|
||||
*/
|
||||
int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock);
|
||||
int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock,
|
||||
int data_dir, bool fast_copy, bool *background_queued);
|
||||
|
||||
void (*set_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock);
|
||||
void (*clear_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock);
|
||||
/*
|
||||
* Sometimes the core target can optimise a migration, eg, the
|
||||
* block may be discarded, or the bio may cover an entire block.
|
||||
* In order to optimise it needs the migration immediately though
|
||||
* so it knows to do something different with the bio.
|
||||
*
|
||||
* This method is optional (policy-internal will fallback to using
|
||||
* lookup).
|
||||
*/
|
||||
int (*lookup_with_work)(struct dm_cache_policy *p,
|
||||
dm_oblock_t oblock, dm_cblock_t *cblock,
|
||||
int data_dir, bool fast_copy,
|
||||
struct policy_work **work);
|
||||
|
||||
/*
|
||||
* Retrieves background work. Returns -ENODATA when there's no
|
||||
* background work.
|
||||
*/
|
||||
int (*get_background_work)(struct dm_cache_policy *p, bool idle,
|
||||
struct policy_work **result);
|
||||
|
||||
/*
|
||||
* You must pass in the same work pointer that you were given, not
|
||||
* a copy.
|
||||
*/
|
||||
void (*complete_background_work)(struct dm_cache_policy *p,
|
||||
struct policy_work *work,
|
||||
bool success);
|
||||
|
||||
void (*set_dirty)(struct dm_cache_policy *p, dm_cblock_t cblock);
|
||||
void (*clear_dirty)(struct dm_cache_policy *p, dm_cblock_t cblock);
|
||||
|
||||
/*
|
||||
* Called when a cache target is first created. Used to load a
|
||||
* mapping from the metadata device into the policy.
|
||||
*/
|
||||
int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock,
|
||||
dm_cblock_t cblock, uint32_t hint, bool hint_valid);
|
||||
dm_cblock_t cblock, bool dirty,
|
||||
uint32_t hint, bool hint_valid);
|
||||
|
||||
/*
|
||||
* Drops the mapping, irrespective of whether it's clean or dirty.
|
||||
* Returns -ENODATA if cblock is not mapped.
|
||||
*/
|
||||
int (*invalidate_mapping)(struct dm_cache_policy *p, dm_cblock_t cblock);
|
||||
|
||||
/*
|
||||
* Gets the hint for a given cblock. Called in a single threaded
|
||||
|
@ -161,36 +108,6 @@ struct dm_cache_policy {
|
|||
*/
|
||||
uint32_t (*get_hint)(struct dm_cache_policy *p, dm_cblock_t cblock);
|
||||
|
||||
/*
|
||||
* Override functions used on the error paths of the core target.
|
||||
* They must succeed.
|
||||
*/
|
||||
void (*remove_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock);
|
||||
void (*force_mapping)(struct dm_cache_policy *p, dm_oblock_t current_oblock,
|
||||
dm_oblock_t new_oblock);
|
||||
|
||||
/*
|
||||
* This is called via the invalidate_cblocks message. It is
|
||||
* possible the particular cblock has already been removed due to a
|
||||
* write io in passthrough mode. In which case this should return
|
||||
* -ENODATA.
|
||||
*/
|
||||
int (*remove_cblock)(struct dm_cache_policy *p, dm_cblock_t cblock);
|
||||
|
||||
/*
|
||||
* Provide a dirty block to be written back by the core target. If
|
||||
* critical_only is set then the policy should only provide work if
|
||||
* it urgently needs it.
|
||||
*
|
||||
* Returns:
|
||||
*
|
||||
* 0 and @cblock,@oblock: block to write back provided
|
||||
*
|
||||
* -ENODATA: no dirty blocks available
|
||||
*/
|
||||
int (*writeback_work)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock,
|
||||
bool critical_only);
|
||||
|
||||
/*
|
||||
* How full is the cache?
|
||||
*/
|
||||
|
@ -202,6 +119,8 @@ struct dm_cache_policy {
|
|||
* queue merging has occurred). To stop the policy being fooled by
|
||||
* these, the core target sends regular tick() calls to the policy.
|
||||
* The policy should only count an entry as hit once per tick.
|
||||
*
|
||||
* This method is optional.
|
||||
*/
|
||||
void (*tick)(struct dm_cache_policy *p, bool can_block);
|
||||
|
||||
|
@ -213,6 +132,8 @@ struct dm_cache_policy {
|
|||
int (*set_config_value)(struct dm_cache_policy *p,
|
||||
const char *key, const char *value);
|
||||
|
||||
void (*allow_migrations)(struct dm_cache_policy *p, bool allow);
|
||||
|
||||
/*
|
||||
* Book keeping ptr for the policy register, not for general use.
|
||||
*/
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -47,7 +47,7 @@ struct mapped_device {
|
|||
struct request_queue *queue;
|
||||
int numa_node_id;
|
||||
|
||||
unsigned type;
|
||||
enum dm_queue_mode type;
|
||||
/* Protect queue and type against concurrent access. */
|
||||
struct mutex type_lock;
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -340,6 +340,7 @@ out:
|
|||
static struct target_type delay_target = {
|
||||
.name = "delay",
|
||||
.version = {1, 2, 1},
|
||||
.features = DM_TARGET_PASSES_INTEGRITY,
|
||||
.module = THIS_MODULE,
|
||||
.ctr = delay_ctr,
|
||||
.dtr = delay_dtr,
|
||||
|
|
|
@ -254,7 +254,6 @@ static struct dm_block_validator sb_validator = {
|
|||
* Low level metadata handling
|
||||
*--------------------------------------------------------------*/
|
||||
#define DM_ERA_METADATA_BLOCK_SIZE 4096
|
||||
#define DM_ERA_METADATA_CACHE_SIZE 64
|
||||
#define ERA_MAX_CONCURRENT_LOCKS 5
|
||||
|
||||
struct era_metadata {
|
||||
|
@ -615,7 +614,6 @@ static int create_persistent_data_objects(struct era_metadata *md,
|
|||
int r;
|
||||
|
||||
md->bm = dm_block_manager_create(md->bdev, DM_ERA_METADATA_BLOCK_SIZE,
|
||||
DM_ERA_METADATA_CACHE_SIZE,
|
||||
ERA_MAX_CONCURRENT_LOCKS);
|
||||
if (IS_ERR(md->bm)) {
|
||||
DMERR("could not create block manager");
|
||||
|
@ -961,18 +959,18 @@ static int metadata_commit(struct era_metadata *md)
|
|||
}
|
||||
}
|
||||
|
||||
r = save_sm_root(md);
|
||||
if (r) {
|
||||
DMERR("%s: save_sm_root failed", __func__);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = dm_tm_pre_commit(md->tm);
|
||||
if (r) {
|
||||
DMERR("%s: pre commit failed", __func__);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = save_sm_root(md);
|
||||
if (r) {
|
||||
DMERR("%s: save_sm_root failed", __func__);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = superblock_lock(md, &sblock);
|
||||
if (r) {
|
||||
DMERR("%s: superblock lock failed", __func__);
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -37,14 +37,6 @@ struct hash_cell {
|
|||
struct dm_table *new_map;
|
||||
};
|
||||
|
||||
/*
|
||||
* A dummy definition to make RCU happy.
|
||||
* struct dm_table should never be dereferenced in this file.
|
||||
*/
|
||||
struct dm_table {
|
||||
int undefined__;
|
||||
};
|
||||
|
||||
struct vers_iter {
|
||||
size_t param_size;
|
||||
struct dm_target_versions *vers, *old_vers;
|
||||
|
@ -1268,7 +1260,7 @@ static int populate_table(struct dm_table *table,
|
|||
return dm_table_complete(table);
|
||||
}
|
||||
|
||||
static bool is_valid_type(unsigned cur, unsigned new)
|
||||
static bool is_valid_type(enum dm_queue_mode cur, enum dm_queue_mode new)
|
||||
{
|
||||
if (cur == new ||
|
||||
(cur == DM_TYPE_BIO_BASED && new == DM_TYPE_DAX_BIO_BASED))
|
||||
|
@ -1778,12 +1770,12 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
|
|||
cmd == DM_LIST_VERSIONS_CMD)
|
||||
return 0;
|
||||
|
||||
if ((cmd == DM_DEV_CREATE_CMD)) {
|
||||
if (cmd == DM_DEV_CREATE_CMD) {
|
||||
if (!*param->name) {
|
||||
DMWARN("name not supplied when creating device");
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if ((*param->uuid && *param->name)) {
|
||||
} else if (*param->uuid && *param->name) {
|
||||
DMWARN("only supply one of name or uuid, cmd(%u)", cmd);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -1848,7 +1840,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
|
|||
if (r)
|
||||
goto out;
|
||||
|
||||
param->data_size = sizeof(*param);
|
||||
param->data_size = offsetof(struct dm_ioctl, data);
|
||||
r = fn(param, input_param_size);
|
||||
|
||||
if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) &&
|
||||
|
|
|
@ -163,6 +163,7 @@ static long linear_direct_access(struct dm_target *ti, sector_t sector,
|
|||
static struct target_type linear_target = {
|
||||
.name = "linear",
|
||||
.version = {1, 3, 0},
|
||||
.features = DM_TARGET_PASSES_INTEGRITY,
|
||||
.module = THIS_MODULE,
|
||||
.ctr = linear_ctr,
|
||||
.dtr = linear_dtr,
|
||||
|
|
|
@ -90,7 +90,7 @@ struct multipath {
|
|||
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
|
||||
atomic_t pg_init_count; /* Number of times pg_init called */
|
||||
|
||||
unsigned queue_mode;
|
||||
enum dm_queue_mode queue_mode;
|
||||
|
||||
struct mutex work_mutex;
|
||||
struct work_struct trigger_event;
|
||||
|
@ -111,7 +111,8 @@ typedef int (*action_fn) (struct pgpath *pgpath);
|
|||
|
||||
static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
|
||||
static void trigger_event(struct work_struct *work);
|
||||
static void activate_path(struct work_struct *work);
|
||||
static void activate_or_offline_path(struct pgpath *pgpath);
|
||||
static void activate_path_work(struct work_struct *work);
|
||||
static void process_queued_bios(struct work_struct *work);
|
||||
|
||||
/*-----------------------------------------------
|
||||
|
@ -136,7 +137,7 @@ static struct pgpath *alloc_pgpath(void)
|
|||
|
||||
if (pgpath) {
|
||||
pgpath->is_active = true;
|
||||
INIT_DELAYED_WORK(&pgpath->activate_path, activate_path);
|
||||
INIT_DELAYED_WORK(&pgpath->activate_path, activate_path_work);
|
||||
}
|
||||
|
||||
return pgpath;
|
||||
|
@ -297,6 +298,8 @@ static int __pg_init_all_paths(struct multipath *m)
|
|||
struct pgpath *pgpath;
|
||||
unsigned long pg_init_delay = 0;
|
||||
|
||||
lockdep_assert_held(&m->lock);
|
||||
|
||||
if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
|
||||
return 0;
|
||||
|
||||
|
@ -321,13 +324,16 @@ static int __pg_init_all_paths(struct multipath *m)
|
|||
return atomic_read(&m->pg_init_in_progress);
|
||||
}
|
||||
|
||||
static void pg_init_all_paths(struct multipath *m)
|
||||
static int pg_init_all_paths(struct multipath *m)
|
||||
{
|
||||
int ret;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&m->lock, flags);
|
||||
__pg_init_all_paths(m);
|
||||
ret = __pg_init_all_paths(m);
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __switch_pg(struct multipath *m, struct priority_group *pg)
|
||||
|
@ -436,45 +442,21 @@ failed:
|
|||
}
|
||||
|
||||
/*
|
||||
* Check whether bios must be queued in the device-mapper core rather
|
||||
* than here in the target.
|
||||
*
|
||||
* If m->queue_if_no_path and m->saved_queue_if_no_path hold the
|
||||
* same value then we are not between multipath_presuspend()
|
||||
* and multipath_resume() calls and we have no need to check
|
||||
* for the DMF_NOFLUSH_SUSPENDING flag.
|
||||
* dm_report_EIO() is a macro instead of a function to make pr_debug()
|
||||
* report the function name and line number of the function from which
|
||||
* it has been invoked.
|
||||
*/
|
||||
static bool __must_push_back(struct multipath *m)
|
||||
{
|
||||
return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
|
||||
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
|
||||
dm_noflush_suspending(m->ti));
|
||||
}
|
||||
|
||||
static bool must_push_back_rq(struct multipath *m)
|
||||
{
|
||||
bool r;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&m->lock, flags);
|
||||
r = (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
|
||||
__must_push_back(m));
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static bool must_push_back_bio(struct multipath *m)
|
||||
{
|
||||
bool r;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&m->lock, flags);
|
||||
r = __must_push_back(m);
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
|
||||
return r;
|
||||
}
|
||||
#define dm_report_EIO(m) \
|
||||
({ \
|
||||
struct mapped_device *md = dm_table_get_md((m)->ti->table); \
|
||||
\
|
||||
pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \
|
||||
dm_device_name(md), \
|
||||
test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \
|
||||
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \
|
||||
dm_noflush_suspending((m)->ti)); \
|
||||
-EIO; \
|
||||
})
|
||||
|
||||
/*
|
||||
* Map cloned requests (request-based multipath)
|
||||
|
@ -484,11 +466,11 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
|
|||
struct request **__clone)
|
||||
{
|
||||
struct multipath *m = ti->private;
|
||||
int r = DM_MAPIO_REQUEUE;
|
||||
size_t nr_bytes = blk_rq_bytes(rq);
|
||||
struct pgpath *pgpath;
|
||||
struct block_device *bdev;
|
||||
struct dm_mpath_io *mpio = get_mpio(map_context);
|
||||
struct request_queue *q;
|
||||
struct request *clone;
|
||||
|
||||
/* Do we need to select a new pgpath? */
|
||||
|
@ -497,13 +479,14 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
|
|||
pgpath = choose_pgpath(m, nr_bytes);
|
||||
|
||||
if (!pgpath) {
|
||||
if (must_push_back_rq(m))
|
||||
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
||||
return DM_MAPIO_DELAY_REQUEUE;
|
||||
return -EIO; /* Failed */
|
||||
return dm_report_EIO(m); /* Failed */
|
||||
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
|
||||
test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
|
||||
pg_init_all_paths(m);
|
||||
return r;
|
||||
if (pg_init_all_paths(m))
|
||||
return DM_MAPIO_DELAY_REQUEUE;
|
||||
return DM_MAPIO_REQUEUE;
|
||||
}
|
||||
|
||||
memset(mpio, 0, sizeof(*mpio));
|
||||
|
@ -511,13 +494,19 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
|
|||
mpio->nr_bytes = nr_bytes;
|
||||
|
||||
bdev = pgpath->path.dev->bdev;
|
||||
|
||||
clone = blk_get_request(bdev_get_queue(bdev),
|
||||
rq->cmd_flags | REQ_NOMERGE,
|
||||
GFP_ATOMIC);
|
||||
q = bdev_get_queue(bdev);
|
||||
clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC);
|
||||
if (IS_ERR(clone)) {
|
||||
/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
|
||||
return r;
|
||||
bool queue_dying = blk_queue_dying(q);
|
||||
DMERR_LIMIT("blk_get_request() returned %ld%s - requeuing",
|
||||
PTR_ERR(clone), queue_dying ? " (path offline)" : "");
|
||||
if (queue_dying) {
|
||||
atomic_inc(&m->pg_init_in_progress);
|
||||
activate_or_offline_path(pgpath);
|
||||
return DM_MAPIO_REQUEUE;
|
||||
}
|
||||
return DM_MAPIO_DELAY_REQUEUE;
|
||||
}
|
||||
clone->bio = clone->biotail = NULL;
|
||||
clone->rq_disk = bdev->bd_disk;
|
||||
|
@ -567,9 +556,9 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
|
|||
}
|
||||
|
||||
if (!pgpath) {
|
||||
if (!must_push_back_bio(m))
|
||||
return -EIO;
|
||||
return DM_MAPIO_REQUEUE;
|
||||
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
||||
return DM_MAPIO_REQUEUE;
|
||||
return dm_report_EIO(m);
|
||||
}
|
||||
|
||||
mpio->pgpath = pgpath;
|
||||
|
@ -640,6 +629,14 @@ static void process_queued_bios(struct work_struct *work)
|
|||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
static void assign_bit(bool value, long nr, unsigned long *addr)
|
||||
{
|
||||
if (value)
|
||||
set_bit(nr, addr);
|
||||
else
|
||||
clear_bit(nr, addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we run out of usable paths, should we queue I/O or error it?
|
||||
*/
|
||||
|
@ -649,23 +646,11 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
|
|||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&m->lock, flags);
|
||||
|
||||
if (save_old_value) {
|
||||
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
||||
set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
|
||||
else
|
||||
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
|
||||
} else {
|
||||
if (queue_if_no_path)
|
||||
set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
|
||||
else
|
||||
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
|
||||
}
|
||||
if (queue_if_no_path)
|
||||
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
|
||||
else
|
||||
clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
|
||||
|
||||
assign_bit((save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) ||
|
||||
(!save_old_value && queue_if_no_path),
|
||||
MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
|
||||
assign_bit(queue_if_no_path || dm_noflush_suspending(m->ti),
|
||||
MPATHF_QUEUE_IF_NO_PATH, &m->flags);
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
|
||||
if (!queue_if_no_path) {
|
||||
|
@ -1438,10 +1423,8 @@ out:
|
|||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
}
|
||||
|
||||
static void activate_path(struct work_struct *work)
|
||||
static void activate_or_offline_path(struct pgpath *pgpath)
|
||||
{
|
||||
struct pgpath *pgpath =
|
||||
container_of(work, struct pgpath, activate_path.work);
|
||||
struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
|
||||
|
||||
if (pgpath->is_active && !blk_queue_dying(q))
|
||||
|
@ -1450,6 +1433,14 @@ static void activate_path(struct work_struct *work)
|
|||
pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED);
|
||||
}
|
||||
|
||||
static void activate_path_work(struct work_struct *work)
|
||||
{
|
||||
struct pgpath *pgpath =
|
||||
container_of(work, struct pgpath, activate_path.work);
|
||||
|
||||
activate_or_offline_path(pgpath);
|
||||
}
|
||||
|
||||
static int noretry_error(int error)
|
||||
{
|
||||
switch (error) {
|
||||
|
@ -1501,12 +1492,9 @@ static int do_end_io(struct multipath *m, struct request *clone,
|
|||
if (mpio->pgpath)
|
||||
fail_path(mpio->pgpath);
|
||||
|
||||
if (!atomic_read(&m->nr_valid_paths)) {
|
||||
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
|
||||
if (!must_push_back_rq(m))
|
||||
r = -EIO;
|
||||
}
|
||||
}
|
||||
if (atomic_read(&m->nr_valid_paths) == 0 &&
|
||||
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
||||
r = dm_report_EIO(m);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -1547,13 +1535,9 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
|
|||
if (mpio->pgpath)
|
||||
fail_path(mpio->pgpath);
|
||||
|
||||
if (!atomic_read(&m->nr_valid_paths)) {
|
||||
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
|
||||
if (!must_push_back_bio(m))
|
||||
return -EIO;
|
||||
return DM_ENDIO_REQUEUE;
|
||||
}
|
||||
}
|
||||
if (atomic_read(&m->nr_valid_paths) == 0 &&
|
||||
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
||||
return dm_report_EIO(m);
|
||||
|
||||
/* Queue for the daemon to resubmit */
|
||||
dm_bio_restore(get_bio_details_from_bio(clone), clone);
|
||||
|
@ -1619,10 +1603,8 @@ static void multipath_resume(struct dm_target *ti)
|
|||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&m->lock, flags);
|
||||
if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags))
|
||||
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
|
||||
else
|
||||
clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
|
||||
assign_bit(test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags),
|
||||
MPATHF_QUEUE_IF_NO_PATH, &m->flags);
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
}
|
||||
|
||||
|
@ -1682,6 +1664,9 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
|
|||
case DM_TYPE_MQ_REQUEST_BASED:
|
||||
DMEMIT("queue_mode mq ");
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (C) 2010-2011 Neil Brown
|
||||
* Copyright (C) 2010-2016 Red Hat, Inc. All rights reserved.
|
||||
* Copyright (C) 2010-2017 Red Hat, Inc. All rights reserved.
|
||||
*
|
||||
* This file is released under the GPL.
|
||||
*/
|
||||
|
@ -79,7 +79,10 @@ struct raid_dev {
|
|||
#define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */
|
||||
|
||||
/* New for v1.10.0 */
|
||||
#define __CTR_FLAG_JOURNAL_DEV 15 /* 2 */ /* Only with raid4/5/6! */
|
||||
#define __CTR_FLAG_JOURNAL_DEV 15 /* 2 */ /* Only with raid4/5/6 (journal device)! */
|
||||
|
||||
/* New for v1.11.1 */
|
||||
#define __CTR_FLAG_JOURNAL_MODE 16 /* 2 */ /* Only with raid4/5/6 (journal mode)! */
|
||||
|
||||
/*
|
||||
* Flags for rs->ctr_flags field.
|
||||
|
@ -100,6 +103,7 @@ struct raid_dev {
|
|||
#define CTR_FLAG_DATA_OFFSET (1 << __CTR_FLAG_DATA_OFFSET)
|
||||
#define CTR_FLAG_RAID10_USE_NEAR_SETS (1 << __CTR_FLAG_RAID10_USE_NEAR_SETS)
|
||||
#define CTR_FLAG_JOURNAL_DEV (1 << __CTR_FLAG_JOURNAL_DEV)
|
||||
#define CTR_FLAG_JOURNAL_MODE (1 << __CTR_FLAG_JOURNAL_MODE)
|
||||
|
||||
#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET)
|
||||
|
||||
|
@ -175,7 +179,8 @@ struct raid_dev {
|
|||
CTR_FLAG_REGION_SIZE | \
|
||||
CTR_FLAG_DELTA_DISKS | \
|
||||
CTR_FLAG_DATA_OFFSET | \
|
||||
CTR_FLAG_JOURNAL_DEV)
|
||||
CTR_FLAG_JOURNAL_DEV | \
|
||||
CTR_FLAG_JOURNAL_MODE)
|
||||
|
||||
#define RAID6_VALID_FLAGS (CTR_FLAG_SYNC | \
|
||||
CTR_FLAG_REBUILD | \
|
||||
|
@ -186,7 +191,8 @@ struct raid_dev {
|
|||
CTR_FLAG_REGION_SIZE | \
|
||||
CTR_FLAG_DELTA_DISKS | \
|
||||
CTR_FLAG_DATA_OFFSET | \
|
||||
CTR_FLAG_JOURNAL_DEV)
|
||||
CTR_FLAG_JOURNAL_DEV | \
|
||||
CTR_FLAG_JOURNAL_MODE)
|
||||
/* ...valid options definitions per raid level */
|
||||
|
||||
/*
|
||||
|
@ -239,6 +245,7 @@ struct raid_set {
|
|||
struct journal_dev {
|
||||
struct dm_dev *dev;
|
||||
struct md_rdev rdev;
|
||||
int mode;
|
||||
} journal_dev;
|
||||
|
||||
struct raid_dev dev[0];
|
||||
|
@ -326,6 +333,7 @@ static struct arg_name_flag {
|
|||
{ CTR_FLAG_DELTA_DISKS, "delta_disks"},
|
||||
{ CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"},
|
||||
{ CTR_FLAG_JOURNAL_DEV, "journal_dev" },
|
||||
{ CTR_FLAG_JOURNAL_MODE, "journal_mode" },
|
||||
};
|
||||
|
||||
/* Return argument name string for given @flag */
|
||||
|
@ -344,6 +352,39 @@ static const char *dm_raid_arg_name_by_flag(const uint32_t flag)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* Define correlation of raid456 journal cache modes and dm-raid target line parameters */
|
||||
static struct {
|
||||
const int mode;
|
||||
const char *param;
|
||||
} _raid456_journal_mode[] = {
|
||||
{ R5C_JOURNAL_MODE_WRITE_THROUGH , "writethrough" },
|
||||
{ R5C_JOURNAL_MODE_WRITE_BACK , "writeback" }
|
||||
};
|
||||
|
||||
/* Return MD raid4/5/6 journal mode for dm @journal_mode one */
|
||||
static int dm_raid_journal_mode_to_md(const char *mode)
|
||||
{
|
||||
int m = ARRAY_SIZE(_raid456_journal_mode);
|
||||
|
||||
while (m--)
|
||||
if (!strcasecmp(mode, _raid456_journal_mode[m].param))
|
||||
return _raid456_journal_mode[m].mode;
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Return dm-raid raid4/5/6 journal mode string for @mode */
|
||||
static const char *md_journal_mode_to_dm_raid(const int mode)
|
||||
{
|
||||
int m = ARRAY_SIZE(_raid456_journal_mode);
|
||||
|
||||
while (m--)
|
||||
if (mode == _raid456_journal_mode[m].mode)
|
||||
return _raid456_journal_mode[m].param;
|
||||
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
/*
|
||||
* Bool helpers to test for various raid levels of a raid set.
|
||||
* It's level as reported by the superblock rather than
|
||||
|
@ -1183,7 +1224,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
|
|||
continue;
|
||||
}
|
||||
|
||||
/* "journal_dev dev" */
|
||||
/* "journal_dev <dev>" */
|
||||
if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV))) {
|
||||
int r;
|
||||
struct md_rdev *jdev;
|
||||
|
@ -1211,10 +1252,32 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
|
|||
rs->ti->error = "No space for raid4/5/6 journal";
|
||||
return -ENOSPC;
|
||||
}
|
||||
rs->journal_dev.mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
|
||||
set_bit(Journal, &jdev->flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* "journal_mode <mode>" ("journal_dev" mandatory!) */
|
||||
if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_MODE))) {
|
||||
int r;
|
||||
|
||||
if (!test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
|
||||
rs->ti->error = "raid4/5/6 'journal_mode' is invalid without 'journal_dev'";
|
||||
return -EINVAL;
|
||||
}
|
||||
if (test_and_set_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
|
||||
rs->ti->error = "Only one raid4/5/6 'journal_mode' argument allowed";
|
||||
return -EINVAL;
|
||||
}
|
||||
r = dm_raid_journal_mode_to_md(arg);
|
||||
if (r < 0) {
|
||||
rs->ti->error = "Invalid 'journal_mode' argument";
|
||||
return r;
|
||||
}
|
||||
rs->journal_dev.mode = r;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parameters with number values from here on.
|
||||
*/
|
||||
|
@ -3076,6 +3139,16 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
rs->callbacks.congested_fn = raid_is_congested;
|
||||
dm_table_add_target_callbacks(ti->table, &rs->callbacks);
|
||||
|
||||
/* If raid4/5/6 journal mode explictely requested (only possible with journal dev) -> set it */
|
||||
if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags)) {
|
||||
r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
|
||||
if (r) {
|
||||
ti->error = "Failed to set raid4/5/6 journal mode";
|
||||
mddev_unlock(&rs->md);
|
||||
goto bad_journal_mode_set;
|
||||
}
|
||||
}
|
||||
|
||||
mddev_suspend(&rs->md);
|
||||
|
||||
/* Try to adjust the raid4/5/6 stripe cache size to the stripe size */
|
||||
|
@ -3109,6 +3182,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
mddev_unlock(&rs->md);
|
||||
return 0;
|
||||
|
||||
bad_journal_mode_set:
|
||||
bad_stripe_cache:
|
||||
bad_check_reshape:
|
||||
md_stop(&rs->md);
|
||||
|
@ -3180,18 +3254,18 @@ static const char *decipher_sync_action(struct mddev *mddev)
|
|||
* Status characters:
|
||||
*
|
||||
* 'D' = Dead/Failed raid set component or raid4/5/6 journal device
|
||||
* 'a' = Alive but not in-sync
|
||||
* 'A' = Alive and in-sync raid set component or alive raid4/5/6 journal device
|
||||
* 'a' = Alive but not in-sync raid set component _or_ alive raid4/5/6 'write_back' journal device
|
||||
* 'A' = Alive and in-sync raid set component _or_ alive raid4/5/6 'write_through' journal device
|
||||
* '-' = Non-existing device (i.e. uspace passed '- -' into the ctr)
|
||||
*/
|
||||
static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync)
|
||||
static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev, bool array_in_sync)
|
||||
{
|
||||
if (!rdev->bdev)
|
||||
return "-";
|
||||
else if (test_bit(Faulty, &rdev->flags))
|
||||
return "D";
|
||||
else if (test_bit(Journal, &rdev->flags))
|
||||
return "A";
|
||||
return (rs->journal_dev.mode == R5C_JOURNAL_MODE_WRITE_THROUGH) ? "A" : "a";
|
||||
else if (!array_in_sync || !test_bit(In_sync, &rdev->flags))
|
||||
return "a";
|
||||
else
|
||||
|
@ -3315,7 +3389,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
|
||||
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
|
||||
for (i = 0; i < rs->raid_disks; i++)
|
||||
DMEMIT(__raid_dev_status(&rs->dev[i].rdev, array_in_sync));
|
||||
DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev, array_in_sync));
|
||||
|
||||
/*
|
||||
* In-sync/Reshape ratio:
|
||||
|
@ -3366,7 +3440,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
* v1.10.0+:
|
||||
*/
|
||||
DMEMIT(" %s", test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ?
|
||||
__raid_dev_status(&rs->journal_dev.rdev, 0) : "-");
|
||||
__raid_dev_status(rs, &rs->journal_dev.rdev, 0) : "-");
|
||||
break;
|
||||
|
||||
case STATUSTYPE_TABLE:
|
||||
|
@ -3381,39 +3455,30 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
write_mostly_params +
|
||||
hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) +
|
||||
hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 +
|
||||
(test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0);
|
||||
(test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0) +
|
||||
(test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags) ? 2 : 0);
|
||||
|
||||
/* Emit table line */
|
||||
/* This has to be in the documented order for userspace! */
|
||||
DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors);
|
||||
if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags))
|
||||
DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT),
|
||||
raid10_md_layout_to_format(mddev->layout));
|
||||
if (test_bit(__CTR_FLAG_RAID10_COPIES, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_COPIES),
|
||||
raid10_md_layout_to_copies(mddev->layout));
|
||||
if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
|
||||
DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC));
|
||||
if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
|
||||
DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_SYNC));
|
||||
if (test_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE),
|
||||
(unsigned long long) to_sector(mddev->bitmap_info.chunksize));
|
||||
if (test_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags))
|
||||
DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET),
|
||||
(unsigned long long) rs->data_offset);
|
||||
if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags))
|
||||
DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP),
|
||||
mddev->bitmap_info.daemon_sleep);
|
||||
if (test_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_DELTA_DISKS),
|
||||
max(rs->delta_disks, mddev->delta_disks));
|
||||
if (test_bit(__CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_STRIPE_CACHE),
|
||||
max_nr_stripes);
|
||||
if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags))
|
||||
DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC));
|
||||
if (rebuild_disks)
|
||||
for (i = 0; i < rs->raid_disks; i++)
|
||||
if (test_bit(rs->dev[i].rdev.raid_disk, (void *) rs->rebuild_disks))
|
||||
DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD),
|
||||
rs->dev[i].rdev.raid_disk);
|
||||
if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags))
|
||||
DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP),
|
||||
mddev->bitmap_info.daemon_sleep);
|
||||
if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE),
|
||||
mddev->sync_speed_min);
|
||||
if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE),
|
||||
mddev->sync_speed_max);
|
||||
if (write_mostly_params)
|
||||
for (i = 0; i < rs->raid_disks; i++)
|
||||
if (test_bit(WriteMostly, &rs->dev[i].rdev.flags))
|
||||
|
@ -3422,15 +3487,30 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
if (test_bit(__CTR_FLAG_MAX_WRITE_BEHIND, &rs->ctr_flags))
|
||||
DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_WRITE_BEHIND),
|
||||
mddev->bitmap_info.max_write_behind);
|
||||
if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE),
|
||||
mddev->sync_speed_max);
|
||||
if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE),
|
||||
mddev->sync_speed_min);
|
||||
if (test_bit(__CTR_FLAG_STRIPE_CACHE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_STRIPE_CACHE),
|
||||
max_nr_stripes);
|
||||
if (test_bit(__CTR_FLAG_REGION_SIZE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_REGION_SIZE),
|
||||
(unsigned long long) to_sector(mddev->bitmap_info.chunksize));
|
||||
if (test_bit(__CTR_FLAG_RAID10_COPIES, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_COPIES),
|
||||
raid10_md_layout_to_copies(mddev->layout));
|
||||
if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags))
|
||||
DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT),
|
||||
raid10_md_layout_to_format(mddev->layout));
|
||||
if (test_bit(__CTR_FLAG_DELTA_DISKS, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_DELTA_DISKS),
|
||||
max(rs->delta_disks, mddev->delta_disks));
|
||||
if (test_bit(__CTR_FLAG_DATA_OFFSET, &rs->ctr_flags))
|
||||
DMEMIT(" %s %llu", dm_raid_arg_name_by_flag(CTR_FLAG_DATA_OFFSET),
|
||||
(unsigned long long) rs->data_offset);
|
||||
if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags))
|
||||
DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV),
|
||||
__get_dev_name(rs->journal_dev.dev));
|
||||
if (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_MODE),
|
||||
md_journal_mode_to_dm_raid(rs->journal_dev.mode));
|
||||
DMEMIT(" %d", rs->raid_disks);
|
||||
for (i = 0; i < rs->raid_disks; i++)
|
||||
DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev),
|
||||
|
@ -3791,7 +3871,7 @@ static void raid_resume(struct dm_target *ti)
|
|||
|
||||
static struct target_type raid_target = {
|
||||
.name = "raid",
|
||||
.version = {1, 10, 1},
|
||||
.version = {1, 11, 1},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = raid_ctr,
|
||||
.dtr = raid_dtr,
|
||||
|
|
|
@ -280,7 +280,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
|
|||
if (!rq->q->mq_ops)
|
||||
dm_old_requeue_request(rq);
|
||||
else
|
||||
dm_mq_delay_requeue_request(rq, delay_requeue ? 5000 : 0);
|
||||
dm_mq_delay_requeue_request(rq, delay_requeue ? 100/*ms*/ : 0);
|
||||
|
||||
rq_completed(md, rw, false);
|
||||
}
|
||||
|
@ -815,10 +815,14 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
|
|||
dm_init_md_queue(md);
|
||||
|
||||
/* backfill 'mq' sysfs registration normally done in blk_register_queue */
|
||||
blk_mq_register_dev(disk_to_dev(md->disk), q);
|
||||
err = blk_mq_register_dev(disk_to_dev(md->disk), q);
|
||||
if (err)
|
||||
goto out_cleanup_queue;
|
||||
|
||||
return 0;
|
||||
|
||||
out_cleanup_queue:
|
||||
blk_cleanup_queue(q);
|
||||
out_tag_set:
|
||||
blk_mq_free_tag_set(md->tag_set);
|
||||
out_kfree_tag_set:
|
||||
|
|
|
@ -442,6 +442,7 @@ static void stripe_io_hints(struct dm_target *ti,
|
|||
static struct target_type stripe_target = {
|
||||
.name = "striped",
|
||||
.version = {1, 6, 0},
|
||||
.features = DM_TARGET_PASSES_INTEGRITY,
|
||||
.module = THIS_MODULE,
|
||||
.ctr = stripe_ctr,
|
||||
.dtr = stripe_dtr,
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
|
||||
struct dm_table {
|
||||
struct mapped_device *md;
|
||||
unsigned type;
|
||||
enum dm_queue_mode type;
|
||||
|
||||
/* btree table */
|
||||
unsigned int depth;
|
||||
|
@ -47,6 +47,7 @@ struct dm_table {
|
|||
bool integrity_supported:1;
|
||||
bool singleton:1;
|
||||
bool all_blk_mq:1;
|
||||
unsigned integrity_added:1;
|
||||
|
||||
/*
|
||||
* Indicates the rw permissions for the new logical
|
||||
|
@ -372,7 +373,7 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
|
|||
*/
|
||||
dev_t dm_get_dev_t(const char *path)
|
||||
{
|
||||
dev_t uninitialized_var(dev);
|
||||
dev_t dev;
|
||||
struct block_device *bdev;
|
||||
|
||||
bdev = lookup_bdev(path);
|
||||
|
@ -626,13 +627,13 @@ static int validate_hardware_logical_block_alignment(struct dm_table *table,
|
|||
|
||||
struct dm_target *uninitialized_var(ti);
|
||||
struct queue_limits ti_limits;
|
||||
unsigned i = 0;
|
||||
unsigned i;
|
||||
|
||||
/*
|
||||
* Check each entry in the table in turn.
|
||||
*/
|
||||
while (i < dm_table_get_num_targets(table)) {
|
||||
ti = dm_table_get_target(table, i++);
|
||||
for (i = 0; i < dm_table_get_num_targets(table); i++) {
|
||||
ti = dm_table_get_target(table, i);
|
||||
|
||||
blk_set_stacking_limits(&ti_limits);
|
||||
|
||||
|
@ -725,6 +726,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
|
|||
t->immutable_target_type = tgt->type;
|
||||
}
|
||||
|
||||
if (dm_target_has_integrity(tgt->type))
|
||||
t->integrity_added = 1;
|
||||
|
||||
tgt->table = t;
|
||||
tgt->begin = start;
|
||||
tgt->len = len;
|
||||
|
@ -821,19 +825,19 @@ void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
|
|||
}
|
||||
EXPORT_SYMBOL(dm_consume_args);
|
||||
|
||||
static bool __table_type_bio_based(unsigned table_type)
|
||||
static bool __table_type_bio_based(enum dm_queue_mode table_type)
|
||||
{
|
||||
return (table_type == DM_TYPE_BIO_BASED ||
|
||||
table_type == DM_TYPE_DAX_BIO_BASED);
|
||||
}
|
||||
|
||||
static bool __table_type_request_based(unsigned table_type)
|
||||
static bool __table_type_request_based(enum dm_queue_mode table_type)
|
||||
{
|
||||
return (table_type == DM_TYPE_REQUEST_BASED ||
|
||||
table_type == DM_TYPE_MQ_REQUEST_BASED);
|
||||
}
|
||||
|
||||
void dm_table_set_type(struct dm_table *t, unsigned type)
|
||||
void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
|
||||
{
|
||||
t->type = type;
|
||||
}
|
||||
|
@ -850,11 +854,11 @@ static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
|
|||
static bool dm_table_supports_dax(struct dm_table *t)
|
||||
{
|
||||
struct dm_target *ti;
|
||||
unsigned i = 0;
|
||||
unsigned i;
|
||||
|
||||
/* Ensure that all targets support DAX. */
|
||||
while (i < dm_table_get_num_targets(t)) {
|
||||
ti = dm_table_get_target(t, i++);
|
||||
for (i = 0; i < dm_table_get_num_targets(t); i++) {
|
||||
ti = dm_table_get_target(t, i);
|
||||
|
||||
if (!ti->type->direct_access)
|
||||
return false;
|
||||
|
@ -875,7 +879,7 @@ static int dm_table_determine_type(struct dm_table *t)
|
|||
struct dm_target *tgt;
|
||||
struct dm_dev_internal *dd;
|
||||
struct list_head *devices = dm_table_get_devices(t);
|
||||
unsigned live_md_type = dm_get_md_type(t->md);
|
||||
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
|
||||
|
||||
if (t->type != DM_TYPE_NONE) {
|
||||
/* target already set the table's type */
|
||||
|
@ -984,7 +988,7 @@ verify_rq_based:
|
|||
return 0;
|
||||
}
|
||||
|
||||
unsigned dm_table_get_type(struct dm_table *t)
|
||||
enum dm_queue_mode dm_table_get_type(struct dm_table *t)
|
||||
{
|
||||
return t->type;
|
||||
}
|
||||
|
@ -1006,11 +1010,11 @@ struct dm_target *dm_table_get_immutable_target(struct dm_table *t)
|
|||
|
||||
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t)
|
||||
{
|
||||
struct dm_target *uninitialized_var(ti);
|
||||
unsigned i = 0;
|
||||
struct dm_target *ti;
|
||||
unsigned i;
|
||||
|
||||
while (i < dm_table_get_num_targets(t)) {
|
||||
ti = dm_table_get_target(t, i++);
|
||||
for (i = 0; i < dm_table_get_num_targets(t); i++) {
|
||||
ti = dm_table_get_target(t, i);
|
||||
if (dm_target_is_wildcard(ti->type))
|
||||
return ti;
|
||||
}
|
||||
|
@ -1035,7 +1039,7 @@ bool dm_table_all_blk_mq_devices(struct dm_table *t)
|
|||
|
||||
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
|
||||
{
|
||||
unsigned type = dm_table_get_type(t);
|
||||
enum dm_queue_mode type = dm_table_get_type(t);
|
||||
unsigned per_io_data_size = 0;
|
||||
struct dm_target *tgt;
|
||||
unsigned i;
|
||||
|
@ -1131,6 +1135,13 @@ static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t)
|
|||
struct list_head *devices = dm_table_get_devices(t);
|
||||
struct dm_dev_internal *dd = NULL;
|
||||
struct gendisk *prev_disk = NULL, *template_disk = NULL;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < dm_table_get_num_targets(t); i++) {
|
||||
struct dm_target *ti = dm_table_get_target(t, i);
|
||||
if (!dm_target_passes_integrity(ti->type))
|
||||
goto no_integrity;
|
||||
}
|
||||
|
||||
list_for_each_entry(dd, devices, list) {
|
||||
template_disk = dd->dm_dev->bdev->bd_disk;
|
||||
|
@ -1168,6 +1179,10 @@ static int dm_table_register_integrity(struct dm_table *t)
|
|||
struct mapped_device *md = t->md;
|
||||
struct gendisk *template_disk = NULL;
|
||||
|
||||
/* If target handles integrity itself do not register it here. */
|
||||
if (t->integrity_added)
|
||||
return 0;
|
||||
|
||||
template_disk = dm_table_get_integrity_disk(t);
|
||||
if (!template_disk)
|
||||
return 0;
|
||||
|
@ -1313,15 +1328,16 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev,
|
|||
*/
|
||||
bool dm_table_has_no_data_devices(struct dm_table *table)
|
||||
{
|
||||
struct dm_target *uninitialized_var(ti);
|
||||
unsigned i = 0, num_devices = 0;
|
||||
struct dm_target *ti;
|
||||
unsigned i, num_devices;
|
||||
|
||||
while (i < dm_table_get_num_targets(table)) {
|
||||
ti = dm_table_get_target(table, i++);
|
||||
for (i = 0; i < dm_table_get_num_targets(table); i++) {
|
||||
ti = dm_table_get_target(table, i);
|
||||
|
||||
if (!ti->type->iterate_devices)
|
||||
return false;
|
||||
|
||||
num_devices = 0;
|
||||
ti->type->iterate_devices(ti, count_device, &num_devices);
|
||||
if (num_devices)
|
||||
return false;
|
||||
|
@ -1336,16 +1352,16 @@ bool dm_table_has_no_data_devices(struct dm_table *table)
|
|||
int dm_calculate_queue_limits(struct dm_table *table,
|
||||
struct queue_limits *limits)
|
||||
{
|
||||
struct dm_target *uninitialized_var(ti);
|
||||
struct dm_target *ti;
|
||||
struct queue_limits ti_limits;
|
||||
unsigned i = 0;
|
||||
unsigned i;
|
||||
|
||||
blk_set_stacking_limits(limits);
|
||||
|
||||
while (i < dm_table_get_num_targets(table)) {
|
||||
for (i = 0; i < dm_table_get_num_targets(table); i++) {
|
||||
blk_set_stacking_limits(&ti_limits);
|
||||
|
||||
ti = dm_table_get_target(table, i++);
|
||||
ti = dm_table_get_target(table, i);
|
||||
|
||||
if (!ti->type->iterate_devices)
|
||||
goto combine_limits;
|
||||
|
@ -1394,6 +1410,9 @@ static void dm_table_verify_integrity(struct dm_table *t)
|
|||
{
|
||||
struct gendisk *template_disk = NULL;
|
||||
|
||||
if (t->integrity_added)
|
||||
return;
|
||||
|
||||
if (t->integrity_supported) {
|
||||
/*
|
||||
* Verify that the original integrity profile
|
||||
|
@ -1424,7 +1443,7 @@ static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
|
|||
static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
|
||||
{
|
||||
struct dm_target *ti;
|
||||
unsigned i = 0;
|
||||
unsigned i;
|
||||
|
||||
/*
|
||||
* Require at least one underlying device to support flushes.
|
||||
|
@ -1432,8 +1451,8 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
|
|||
* so we need to use iterate_devices here, which targets
|
||||
* supporting flushes must provide.
|
||||
*/
|
||||
while (i < dm_table_get_num_targets(t)) {
|
||||
ti = dm_table_get_target(t, i++);
|
||||
for (i = 0; i < dm_table_get_num_targets(t); i++) {
|
||||
ti = dm_table_get_target(t, i);
|
||||
|
||||
if (!ti->num_flush_bios)
|
||||
continue;
|
||||
|
@ -1477,10 +1496,10 @@ static bool dm_table_all_devices_attribute(struct dm_table *t,
|
|||
iterate_devices_callout_fn func)
|
||||
{
|
||||
struct dm_target *ti;
|
||||
unsigned i = 0;
|
||||
unsigned i;
|
||||
|
||||
while (i < dm_table_get_num_targets(t)) {
|
||||
ti = dm_table_get_target(t, i++);
|
||||
for (i = 0; i < dm_table_get_num_targets(t); i++) {
|
||||
ti = dm_table_get_target(t, i);
|
||||
|
||||
if (!ti->type->iterate_devices ||
|
||||
!ti->type->iterate_devices(ti, func, NULL))
|
||||
|
@ -1501,10 +1520,10 @@ static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *de
|
|||
static bool dm_table_supports_write_same(struct dm_table *t)
|
||||
{
|
||||
struct dm_target *ti;
|
||||
unsigned i = 0;
|
||||
unsigned i;
|
||||
|
||||
while (i < dm_table_get_num_targets(t)) {
|
||||
ti = dm_table_get_target(t, i++);
|
||||
for (i = 0; i < dm_table_get_num_targets(t); i++) {
|
||||
ti = dm_table_get_target(t, i);
|
||||
|
||||
if (!ti->num_write_same_bios)
|
||||
return false;
|
||||
|
@ -1556,7 +1575,7 @@ static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
|
|||
static bool dm_table_supports_discards(struct dm_table *t)
|
||||
{
|
||||
struct dm_target *ti;
|
||||
unsigned i = 0;
|
||||
unsigned i;
|
||||
|
||||
/*
|
||||
* Unless any target used by the table set discards_supported,
|
||||
|
@ -1565,8 +1584,8 @@ static bool dm_table_supports_discards(struct dm_table *t)
|
|||
* so we need to use iterate_devices here, which targets
|
||||
* supporting discard selectively must provide.
|
||||
*/
|
||||
while (i < dm_table_get_num_targets(t)) {
|
||||
ti = dm_table_get_target(t, i++);
|
||||
for (i = 0; i < dm_table_get_num_targets(t); i++) {
|
||||
ti = dm_table_get_target(t, i);
|
||||
|
||||
if (!ti->num_discard_bios)
|
||||
continue;
|
||||
|
@ -1672,6 +1691,8 @@ static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
|
|||
int i = t->num_targets;
|
||||
struct dm_target *ti = t->targets;
|
||||
|
||||
lockdep_assert_held(&t->md->suspend_lock);
|
||||
|
||||
while (i--) {
|
||||
switch (mode) {
|
||||
case PRESUSPEND:
|
||||
|
@ -1719,6 +1740,8 @@ int dm_table_resume_targets(struct dm_table *t)
|
|||
{
|
||||
int i, r = 0;
|
||||
|
||||
lockdep_assert_held(&t->md->suspend_lock);
|
||||
|
||||
for (i = 0; i < t->num_targets; i++) {
|
||||
struct dm_target *ti = t->targets + i;
|
||||
|
||||
|
|
|
@ -77,7 +77,6 @@
|
|||
#define THIN_SUPERBLOCK_MAGIC 27022010
|
||||
#define THIN_SUPERBLOCK_LOCATION 0
|
||||
#define THIN_VERSION 2
|
||||
#define THIN_METADATA_CACHE_SIZE 64
|
||||
#define SECTOR_TO_BLOCK_SHIFT 3
|
||||
|
||||
/*
|
||||
|
@ -686,7 +685,6 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f
|
|||
int r;
|
||||
|
||||
pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
|
||||
THIN_METADATA_CACHE_SIZE,
|
||||
THIN_MAX_CONCURRENT_LOCKS);
|
||||
if (IS_ERR(pmd->bm)) {
|
||||
DMERR("could not create block manager");
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
#include "dm-thin-metadata.h"
|
||||
#include "dm-bio-prison.h"
|
||||
#include "dm-bio-prison-v1.h"
|
||||
#include "dm.h"
|
||||
|
||||
#include <linux/device-mapper.h>
|
||||
|
@ -1069,6 +1069,7 @@ static void passdown_endio(struct bio *bio)
|
|||
* to unmap (we ignore err).
|
||||
*/
|
||||
queue_passdown_pt2(bio->bi_private);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
|
||||
|
|
|
@ -188,7 +188,7 @@ error:
|
|||
static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io,
|
||||
u8 *want_digest, u8 *data)
|
||||
{
|
||||
if (unlikely(verity_hash(v, verity_io_hash_desc(v, io),
|
||||
if (unlikely(verity_hash(v, verity_io_hash_req(v, io),
|
||||
data, 1 << v->data_dev_block_bits,
|
||||
verity_io_real_digest(v, io))))
|
||||
return 0;
|
||||
|
@ -397,7 +397,7 @@ static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io,
|
|||
}
|
||||
|
||||
/* Always re-validate the corrected block against the expected hash */
|
||||
r = verity_hash(v, verity_io_hash_desc(v, io), fio->output,
|
||||
r = verity_hash(v, verity_io_hash_req(v, io), fio->output,
|
||||
1 << v->data_dev_block_bits,
|
||||
verity_io_real_digest(v, io));
|
||||
if (unlikely(r < 0))
|
||||
|
|
|
@ -93,81 +93,123 @@ static sector_t verity_position_at_level(struct dm_verity *v, sector_t block,
|
|||
}
|
||||
|
||||
/*
|
||||
* Wrapper for crypto_shash_init, which handles verity salting.
|
||||
* Callback function for asynchrnous crypto API completion notification
|
||||
*/
|
||||
static int verity_hash_init(struct dm_verity *v, struct shash_desc *desc)
|
||||
static void verity_op_done(struct crypto_async_request *base, int err)
|
||||
{
|
||||
struct verity_result *res = (struct verity_result *)base->data;
|
||||
|
||||
if (err == -EINPROGRESS)
|
||||
return;
|
||||
|
||||
res->err = err;
|
||||
complete(&res->completion);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for async crypto API callback
|
||||
*/
|
||||
static inline int verity_complete_op(struct verity_result *res, int ret)
|
||||
{
|
||||
switch (ret) {
|
||||
case 0:
|
||||
break;
|
||||
|
||||
case -EINPROGRESS:
|
||||
case -EBUSY:
|
||||
ret = wait_for_completion_interruptible(&res->completion);
|
||||
if (!ret)
|
||||
ret = res->err;
|
||||
reinit_completion(&res->completion);
|
||||
break;
|
||||
|
||||
default:
|
||||
DMERR("verity_wait_hash: crypto op submission failed: %d", ret);
|
||||
}
|
||||
|
||||
if (unlikely(ret < 0))
|
||||
DMERR("verity_wait_hash: crypto op failed: %d", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int verity_hash_update(struct dm_verity *v, struct ahash_request *req,
|
||||
const u8 *data, size_t len,
|
||||
struct verity_result *res)
|
||||
{
|
||||
struct scatterlist sg;
|
||||
|
||||
sg_init_one(&sg, data, len);
|
||||
ahash_request_set_crypt(req, &sg, NULL, len);
|
||||
|
||||
return verity_complete_op(res, crypto_ahash_update(req));
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrapper for crypto_ahash_init, which handles verity salting.
|
||||
*/
|
||||
static int verity_hash_init(struct dm_verity *v, struct ahash_request *req,
|
||||
struct verity_result *res)
|
||||
{
|
||||
int r;
|
||||
|
||||
desc->tfm = v->tfm;
|
||||
desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
ahash_request_set_tfm(req, v->tfm);
|
||||
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
|
||||
CRYPTO_TFM_REQ_MAY_BACKLOG,
|
||||
verity_op_done, (void *)res);
|
||||
init_completion(&res->completion);
|
||||
|
||||
r = crypto_shash_init(desc);
|
||||
r = verity_complete_op(res, crypto_ahash_init(req));
|
||||
|
||||
if (unlikely(r < 0)) {
|
||||
DMERR("crypto_shash_init failed: %d", r);
|
||||
DMERR("crypto_ahash_init failed: %d", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
if (likely(v->version >= 1)) {
|
||||
r = crypto_shash_update(desc, v->salt, v->salt_size);
|
||||
|
||||
if (unlikely(r < 0)) {
|
||||
DMERR("crypto_shash_update failed: %d", r);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verity_hash_update(struct dm_verity *v, struct shash_desc *desc,
|
||||
const u8 *data, size_t len)
|
||||
{
|
||||
int r = crypto_shash_update(desc, data, len);
|
||||
|
||||
if (unlikely(r < 0))
|
||||
DMERR("crypto_shash_update failed: %d", r);
|
||||
if (likely(v->version >= 1))
|
||||
r = verity_hash_update(v, req, v->salt, v->salt_size, res);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int verity_hash_final(struct dm_verity *v, struct shash_desc *desc,
|
||||
u8 *digest)
|
||||
static int verity_hash_final(struct dm_verity *v, struct ahash_request *req,
|
||||
u8 *digest, struct verity_result *res)
|
||||
{
|
||||
int r;
|
||||
|
||||
if (unlikely(!v->version)) {
|
||||
r = crypto_shash_update(desc, v->salt, v->salt_size);
|
||||
r = verity_hash_update(v, req, v->salt, v->salt_size, res);
|
||||
|
||||
if (r < 0) {
|
||||
DMERR("crypto_shash_update failed: %d", r);
|
||||
return r;
|
||||
DMERR("verity_hash_final failed updating salt: %d", r);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
r = crypto_shash_final(desc, digest);
|
||||
|
||||
if (unlikely(r < 0))
|
||||
DMERR("crypto_shash_final failed: %d", r);
|
||||
|
||||
ahash_request_set_crypt(req, NULL, digest, 0);
|
||||
r = verity_complete_op(res, crypto_ahash_final(req));
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
int verity_hash(struct dm_verity *v, struct shash_desc *desc,
|
||||
int verity_hash(struct dm_verity *v, struct ahash_request *req,
|
||||
const u8 *data, size_t len, u8 *digest)
|
||||
{
|
||||
int r;
|
||||
struct verity_result res;
|
||||
|
||||
r = verity_hash_init(v, desc);
|
||||
r = verity_hash_init(v, req, &res);
|
||||
if (unlikely(r < 0))
|
||||
return r;
|
||||
goto out;
|
||||
|
||||
r = verity_hash_update(v, desc, data, len);
|
||||
r = verity_hash_update(v, req, data, len, &res);
|
||||
if (unlikely(r < 0))
|
||||
return r;
|
||||
goto out;
|
||||
|
||||
return verity_hash_final(v, desc, digest);
|
||||
r = verity_hash_final(v, req, digest, &res);
|
||||
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level,
|
||||
|
@ -275,7 +317,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
|
|||
goto release_ret_r;
|
||||
}
|
||||
|
||||
r = verity_hash(v, verity_io_hash_desc(v, io),
|
||||
r = verity_hash(v, verity_io_hash_req(v, io),
|
||||
data, 1 << v->hash_dev_block_bits,
|
||||
verity_io_real_digest(v, io));
|
||||
if (unlikely(r < 0))
|
||||
|
@ -343,6 +385,49 @@ out:
|
|||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculates the digest for the given bio
|
||||
*/
|
||||
int verity_for_io_block(struct dm_verity *v, struct dm_verity_io *io,
|
||||
struct bvec_iter *iter, struct verity_result *res)
|
||||
{
|
||||
unsigned int todo = 1 << v->data_dev_block_bits;
|
||||
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
|
||||
struct scatterlist sg;
|
||||
struct ahash_request *req = verity_io_hash_req(v, io);
|
||||
|
||||
do {
|
||||
int r;
|
||||
unsigned int len;
|
||||
struct bio_vec bv = bio_iter_iovec(bio, *iter);
|
||||
|
||||
sg_init_table(&sg, 1);
|
||||
|
||||
len = bv.bv_len;
|
||||
|
||||
if (likely(len >= todo))
|
||||
len = todo;
|
||||
/*
|
||||
* Operating on a single page at a time looks suboptimal
|
||||
* until you consider the typical block size is 4,096B.
|
||||
* Going through this loops twice should be very rare.
|
||||
*/
|
||||
sg_set_page(&sg, bv.bv_page, len, bv.bv_offset);
|
||||
ahash_request_set_crypt(req, &sg, NULL, len);
|
||||
r = verity_complete_op(res, crypto_ahash_update(req));
|
||||
|
||||
if (unlikely(r < 0)) {
|
||||
DMERR("verity_for_io_block crypto op failed: %d", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
bio_advance_iter(bio, iter, len);
|
||||
todo -= len;
|
||||
} while (todo);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calls function process for 1 << v->data_dev_block_bits bytes in the bio_vec
|
||||
* starting from iter.
|
||||
|
@ -381,12 +466,6 @@ int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int verity_bv_hash_update(struct dm_verity *v, struct dm_verity_io *io,
|
||||
u8 *data, size_t len)
|
||||
{
|
||||
return verity_hash_update(v, verity_io_hash_desc(v, io), data, len);
|
||||
}
|
||||
|
||||
static int verity_bv_zero(struct dm_verity *v, struct dm_verity_io *io,
|
||||
u8 *data, size_t len)
|
||||
{
|
||||
|
@ -403,10 +482,11 @@ static int verity_verify_io(struct dm_verity_io *io)
|
|||
struct dm_verity *v = io->v;
|
||||
struct bvec_iter start;
|
||||
unsigned b;
|
||||
struct verity_result res;
|
||||
|
||||
for (b = 0; b < io->n_blocks; b++) {
|
||||
int r;
|
||||
struct shash_desc *desc = verity_io_hash_desc(v, io);
|
||||
struct ahash_request *req = verity_io_hash_req(v, io);
|
||||
|
||||
r = verity_hash_for_block(v, io, io->block + b,
|
||||
verity_io_want_digest(v, io),
|
||||
|
@ -427,16 +507,17 @@ static int verity_verify_io(struct dm_verity_io *io)
|
|||
continue;
|
||||
}
|
||||
|
||||
r = verity_hash_init(v, desc);
|
||||
r = verity_hash_init(v, req, &res);
|
||||
if (unlikely(r < 0))
|
||||
return r;
|
||||
|
||||
start = io->iter;
|
||||
r = verity_for_bv_block(v, io, &io->iter, verity_bv_hash_update);
|
||||
r = verity_for_io_block(v, io, &io->iter, &res);
|
||||
if (unlikely(r < 0))
|
||||
return r;
|
||||
|
||||
r = verity_hash_final(v, desc, verity_io_real_digest(v, io));
|
||||
r = verity_hash_final(v, req, verity_io_real_digest(v, io),
|
||||
&res);
|
||||
if (unlikely(r < 0))
|
||||
return r;
|
||||
|
||||
|
@ -705,7 +786,7 @@ static void verity_dtr(struct dm_target *ti)
|
|||
kfree(v->zero_digest);
|
||||
|
||||
if (v->tfm)
|
||||
crypto_free_shash(v->tfm);
|
||||
crypto_free_ahash(v->tfm);
|
||||
|
||||
kfree(v->alg_name);
|
||||
|
||||
|
@ -723,7 +804,7 @@ static void verity_dtr(struct dm_target *ti)
|
|||
static int verity_alloc_zero_digest(struct dm_verity *v)
|
||||
{
|
||||
int r = -ENOMEM;
|
||||
struct shash_desc *desc;
|
||||
struct ahash_request *req;
|
||||
u8 *zero_data;
|
||||
|
||||
v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL);
|
||||
|
@ -731,9 +812,9 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
|
|||
if (!v->zero_digest)
|
||||
return r;
|
||||
|
||||
desc = kmalloc(v->shash_descsize, GFP_KERNEL);
|
||||
req = kmalloc(v->ahash_reqsize, GFP_KERNEL);
|
||||
|
||||
if (!desc)
|
||||
if (!req)
|
||||
return r; /* verity_dtr will free zero_digest */
|
||||
|
||||
zero_data = kzalloc(1 << v->data_dev_block_bits, GFP_KERNEL);
|
||||
|
@ -741,11 +822,11 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
|
|||
if (!zero_data)
|
||||
goto out;
|
||||
|
||||
r = verity_hash(v, desc, zero_data, 1 << v->data_dev_block_bits,
|
||||
r = verity_hash(v, req, zero_data, 1 << v->data_dev_block_bits,
|
||||
v->zero_digest);
|
||||
|
||||
out:
|
||||
kfree(desc);
|
||||
kfree(req);
|
||||
kfree(zero_data);
|
||||
|
||||
return r;
|
||||
|
@ -923,21 +1004,21 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||
goto bad;
|
||||
}
|
||||
|
||||
v->tfm = crypto_alloc_shash(v->alg_name, 0, 0);
|
||||
v->tfm = crypto_alloc_ahash(v->alg_name, 0, 0);
|
||||
if (IS_ERR(v->tfm)) {
|
||||
ti->error = "Cannot initialize hash function";
|
||||
r = PTR_ERR(v->tfm);
|
||||
v->tfm = NULL;
|
||||
goto bad;
|
||||
}
|
||||
v->digest_size = crypto_shash_digestsize(v->tfm);
|
||||
v->digest_size = crypto_ahash_digestsize(v->tfm);
|
||||
if ((1 << v->hash_dev_block_bits) < v->digest_size * 2) {
|
||||
ti->error = "Digest size too big";
|
||||
r = -EINVAL;
|
||||
goto bad;
|
||||
}
|
||||
v->shash_descsize =
|
||||
sizeof(struct shash_desc) + crypto_shash_descsize(v->tfm);
|
||||
v->ahash_reqsize = sizeof(struct ahash_request) +
|
||||
crypto_ahash_reqsize(v->tfm);
|
||||
|
||||
v->root_digest = kmalloc(v->digest_size, GFP_KERNEL);
|
||||
if (!v->root_digest) {
|
||||
|
@ -1037,7 +1118,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
|||
}
|
||||
|
||||
ti->per_io_data_size = sizeof(struct dm_verity_io) +
|
||||
v->shash_descsize + v->digest_size * 2;
|
||||
v->ahash_reqsize + v->digest_size * 2;
|
||||
|
||||
r = verity_fec_ctr(v);
|
||||
if (r)
|
||||
|
|
|
@ -37,7 +37,7 @@ struct dm_verity {
|
|||
struct dm_target *ti;
|
||||
struct dm_bufio_client *bufio;
|
||||
char *alg_name;
|
||||
struct crypto_shash *tfm;
|
||||
struct crypto_ahash *tfm;
|
||||
u8 *root_digest; /* digest of the root block */
|
||||
u8 *salt; /* salt: its size is salt_size */
|
||||
u8 *zero_digest; /* digest for a zero block */
|
||||
|
@ -52,7 +52,7 @@ struct dm_verity {
|
|||
unsigned char levels; /* the number of tree levels */
|
||||
unsigned char version;
|
||||
unsigned digest_size; /* digest size for the current hash algorithm */
|
||||
unsigned shash_descsize;/* the size of temporary space for crypto */
|
||||
unsigned int ahash_reqsize;/* the size of temporary space for crypto */
|
||||
int hash_failed; /* set to 1 if hash of any block failed */
|
||||
enum verity_mode mode; /* mode for handling verification errors */
|
||||
unsigned corrupted_errs;/* Number of errors for corrupted blocks */
|
||||
|
@ -81,31 +81,36 @@ struct dm_verity_io {
|
|||
/*
|
||||
* Three variably-size fields follow this struct:
|
||||
*
|
||||
* u8 hash_desc[v->shash_descsize];
|
||||
* u8 hash_req[v->ahash_reqsize];
|
||||
* u8 real_digest[v->digest_size];
|
||||
* u8 want_digest[v->digest_size];
|
||||
*
|
||||
* To access them use: verity_io_hash_desc(), verity_io_real_digest()
|
||||
* To access them use: verity_io_hash_req(), verity_io_real_digest()
|
||||
* and verity_io_want_digest().
|
||||
*/
|
||||
};
|
||||
|
||||
static inline struct shash_desc *verity_io_hash_desc(struct dm_verity *v,
|
||||
struct verity_result {
|
||||
struct completion completion;
|
||||
int err;
|
||||
};
|
||||
|
||||
static inline struct ahash_request *verity_io_hash_req(struct dm_verity *v,
|
||||
struct dm_verity_io *io)
|
||||
{
|
||||
return (struct shash_desc *)(io + 1);
|
||||
return (struct ahash_request *)(io + 1);
|
||||
}
|
||||
|
||||
static inline u8 *verity_io_real_digest(struct dm_verity *v,
|
||||
struct dm_verity_io *io)
|
||||
{
|
||||
return (u8 *)(io + 1) + v->shash_descsize;
|
||||
return (u8 *)(io + 1) + v->ahash_reqsize;
|
||||
}
|
||||
|
||||
static inline u8 *verity_io_want_digest(struct dm_verity *v,
|
||||
struct dm_verity_io *io)
|
||||
{
|
||||
return (u8 *)(io + 1) + v->shash_descsize + v->digest_size;
|
||||
return (u8 *)(io + 1) + v->ahash_reqsize + v->digest_size;
|
||||
}
|
||||
|
||||
static inline u8 *verity_io_digest_end(struct dm_verity *v,
|
||||
|
@ -120,7 +125,7 @@ extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,
|
|||
struct dm_verity_io *io,
|
||||
u8 *data, size_t len));
|
||||
|
||||
extern int verity_hash(struct dm_verity *v, struct shash_desc *desc,
|
||||
extern int verity_hash(struct dm_verity *v, struct ahash_request *req,
|
||||
const u8 *data, size_t len, u8 *digest);
|
||||
|
||||
extern int verity_hash_for_block(struct dm_verity *v, struct dm_verity_io *io,
|
||||
|
|
|
@ -1104,8 +1104,18 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
|
|||
|
||||
__bio_clone_fast(clone, bio);
|
||||
|
||||
if (bio_integrity(bio)) {
|
||||
int r = bio_integrity_clone(clone, bio, GFP_NOIO);
|
||||
if (unlikely(bio_integrity(bio) != NULL)) {
|
||||
int r;
|
||||
|
||||
if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
|
||||
!dm_target_passes_integrity(tio->ti->type))) {
|
||||
DMWARN("%s: the target %s doesn't support integrity data.",
|
||||
dm_device_name(tio->io->md),
|
||||
tio->ti->type->name);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
r = bio_integrity_clone(clone, bio, GFP_NOIO);
|
||||
if (r < 0)
|
||||
return r;
|
||||
}
|
||||
|
@ -1113,7 +1123,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
|
|||
bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
|
||||
clone->bi_iter.bi_size = to_bytes(len);
|
||||
|
||||
if (bio_integrity(bio))
|
||||
if (unlikely(bio_integrity(bio) != NULL))
|
||||
bio_integrity_trim(clone, 0, len);
|
||||
|
||||
return 0;
|
||||
|
@ -1715,6 +1725,8 @@ static void event_callback(void *context)
|
|||
*/
|
||||
static void __set_size(struct mapped_device *md, sector_t size)
|
||||
{
|
||||
lockdep_assert_held(&md->suspend_lock);
|
||||
|
||||
set_capacity(md->disk, size);
|
||||
|
||||
i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
|
||||
|
@ -1822,13 +1834,13 @@ void dm_unlock_md_type(struct mapped_device *md)
|
|||
mutex_unlock(&md->type_lock);
|
||||
}
|
||||
|
||||
void dm_set_md_type(struct mapped_device *md, unsigned type)
|
||||
void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type)
|
||||
{
|
||||
BUG_ON(!mutex_is_locked(&md->type_lock));
|
||||
md->type = type;
|
||||
}
|
||||
|
||||
unsigned dm_get_md_type(struct mapped_device *md)
|
||||
enum dm_queue_mode dm_get_md_type(struct mapped_device *md)
|
||||
{
|
||||
return md->type;
|
||||
}
|
||||
|
@ -1855,7 +1867,7 @@ EXPORT_SYMBOL_GPL(dm_get_queue_limits);
|
|||
int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
|
||||
{
|
||||
int r;
|
||||
unsigned type = dm_get_md_type(md);
|
||||
enum dm_queue_mode type = dm_get_md_type(md);
|
||||
|
||||
switch (type) {
|
||||
case DM_TYPE_REQUEST_BASED:
|
||||
|
@ -1886,6 +1898,9 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
|
|||
if (type == DM_TYPE_DAX_BIO_BASED)
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DAX, md->queue);
|
||||
break;
|
||||
case DM_TYPE_NONE:
|
||||
WARN_ON_ONCE(true);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -2164,8 +2179,6 @@ static void unlock_fs(struct mapped_device *md)
|
|||
* If __dm_suspend returns 0, the device is completely quiescent
|
||||
* now. There is no request-processing activity. All new requests
|
||||
* are being added to md->deferred list.
|
||||
*
|
||||
* Caller must hold md->suspend_lock
|
||||
*/
|
||||
static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
|
||||
unsigned suspend_flags, long task_state,
|
||||
|
@ -2183,6 +2196,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
|
|||
*/
|
||||
if (noflush)
|
||||
set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
|
||||
else
|
||||
pr_debug("%s: suspending with flush\n", dm_device_name(md));
|
||||
|
||||
/*
|
||||
* This gets reverted if there's an error later and the targets
|
||||
|
@ -2381,6 +2396,8 @@ static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_fla
|
|||
{
|
||||
struct dm_table *map = NULL;
|
||||
|
||||
lockdep_assert_held(&md->suspend_lock);
|
||||
|
||||
if (md->internal_suspend_count++)
|
||||
return; /* nested internal suspend */
|
||||
|
||||
|
@ -2571,7 +2588,7 @@ int dm_noflush_suspending(struct dm_target *ti)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
|
||||
|
||||
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
|
||||
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
|
||||
unsigned integrity, unsigned per_io_data_size)
|
||||
{
|
||||
struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
|
||||
|
|
|
@ -64,7 +64,7 @@ void dm_table_presuspend_undo_targets(struct dm_table *t);
|
|||
void dm_table_postsuspend_targets(struct dm_table *t);
|
||||
int dm_table_resume_targets(struct dm_table *t);
|
||||
int dm_table_any_congested(struct dm_table *t, int bdi_bits);
|
||||
unsigned dm_table_get_type(struct dm_table *t);
|
||||
enum dm_queue_mode dm_table_get_type(struct dm_table *t);
|
||||
struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
|
||||
struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
|
||||
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
|
||||
|
@ -76,8 +76,8 @@ struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
|
|||
|
||||
void dm_lock_md_type(struct mapped_device *md);
|
||||
void dm_unlock_md_type(struct mapped_device *md);
|
||||
void dm_set_md_type(struct mapped_device *md, unsigned type);
|
||||
unsigned dm_get_md_type(struct mapped_device *md);
|
||||
void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type);
|
||||
enum dm_queue_mode dm_get_md_type(struct mapped_device *md);
|
||||
struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
|
||||
|
||||
int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
|
||||
|
@ -204,7 +204,7 @@ void dm_kcopyd_exit(void);
|
|||
/*
|
||||
* Mempool operations
|
||||
*/
|
||||
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
|
||||
struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
|
||||
unsigned integrity, unsigned per_bio_data_size);
|
||||
void dm_free_md_mempools(struct dm_md_mempools *pools);
|
||||
|
||||
|
|
|
@ -378,7 +378,6 @@ struct dm_block_manager {
|
|||
|
||||
struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
|
||||
unsigned block_size,
|
||||
unsigned cache_size,
|
||||
unsigned max_held_per_thread)
|
||||
{
|
||||
int r;
|
||||
|
|
|
@ -33,7 +33,7 @@ void *dm_block_data(struct dm_block *b);
|
|||
struct dm_block_manager;
|
||||
struct dm_block_manager *dm_block_manager_create(
|
||||
struct block_device *bdev, unsigned block_size,
|
||||
unsigned cache_size, unsigned max_held_per_thread);
|
||||
unsigned max_held_per_thread);
|
||||
void dm_block_manager_destroy(struct dm_block_manager *bm);
|
||||
|
||||
unsigned dm_bm_block_size(struct dm_block_manager *bm);
|
||||
|
|
|
@ -902,8 +902,12 @@ static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest,
|
|||
else
|
||||
*result_key = le64_to_cpu(ro_node(s)->keys[0]);
|
||||
|
||||
if (next_block || flags & INTERNAL_NODE)
|
||||
block = value64(ro_node(s), i);
|
||||
if (next_block || flags & INTERNAL_NODE) {
|
||||
if (find_highest)
|
||||
block = value64(ro_node(s), i);
|
||||
else
|
||||
block = value64(ro_node(s), 0);
|
||||
}
|
||||
|
||||
} while (flags & INTERNAL_NODE);
|
||||
|
||||
|
|
|
@ -53,16 +53,6 @@
|
|||
*/
|
||||
#define R5L_POOL_SIZE 4
|
||||
|
||||
/*
|
||||
* r5c journal modes of the array: write-back or write-through.
|
||||
* write-through mode has identical behavior as existing log only
|
||||
* implementation.
|
||||
*/
|
||||
enum r5c_journal_mode {
|
||||
R5C_JOURNAL_MODE_WRITE_THROUGH = 0,
|
||||
R5C_JOURNAL_MODE_WRITE_BACK = 1,
|
||||
};
|
||||
|
||||
static char *r5c_journal_mode_str[] = {"write-through",
|
||||
"write-back"};
|
||||
/*
|
||||
|
@ -2327,40 +2317,56 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t r5c_journal_mode_store(struct mddev *mddev,
|
||||
const char *page, size_t length)
|
||||
/*
|
||||
* Set journal cache mode on @mddev (external API initially needed by dm-raid).
|
||||
*
|
||||
* @mode as defined in 'enum r5c_journal_mode'.
|
||||
*
|
||||
*/
|
||||
int r5c_journal_mode_set(struct mddev *mddev, int mode)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct r5l_log *log = conf->log;
|
||||
int val = -1, i;
|
||||
int len = length;
|
||||
|
||||
if (!log)
|
||||
return -ENODEV;
|
||||
|
||||
if (len && page[len - 1] == '\n')
|
||||
len -= 1;
|
||||
for (i = 0; i < ARRAY_SIZE(r5c_journal_mode_str); i++)
|
||||
if (strlen(r5c_journal_mode_str[i]) == len &&
|
||||
strncmp(page, r5c_journal_mode_str[i], len) == 0) {
|
||||
val = i;
|
||||
break;
|
||||
}
|
||||
if (val < R5C_JOURNAL_MODE_WRITE_THROUGH ||
|
||||
val > R5C_JOURNAL_MODE_WRITE_BACK)
|
||||
if (mode < R5C_JOURNAL_MODE_WRITE_THROUGH ||
|
||||
mode > R5C_JOURNAL_MODE_WRITE_BACK)
|
||||
return -EINVAL;
|
||||
|
||||
if (raid5_calc_degraded(conf) > 0 &&
|
||||
val == R5C_JOURNAL_MODE_WRITE_BACK)
|
||||
mode == R5C_JOURNAL_MODE_WRITE_BACK)
|
||||
return -EINVAL;
|
||||
|
||||
mddev_suspend(mddev);
|
||||
conf->log->r5c_journal_mode = val;
|
||||
conf->log->r5c_journal_mode = mode;
|
||||
mddev_resume(mddev);
|
||||
|
||||
pr_debug("md/raid:%s: setting r5c cache mode to %d: %s\n",
|
||||
mdname(mddev), val, r5c_journal_mode_str[val]);
|
||||
return length;
|
||||
mdname(mddev), mode, r5c_journal_mode_str[mode]);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(r5c_journal_mode_set);
|
||||
|
||||
static ssize_t r5c_journal_mode_store(struct mddev *mddev,
|
||||
const char *page, size_t length)
|
||||
{
|
||||
int mode = ARRAY_SIZE(r5c_journal_mode_str);
|
||||
size_t len = length;
|
||||
|
||||
if (len < 2)
|
||||
return -EINVAL;
|
||||
|
||||
if (page[len - 1] == '\n')
|
||||
len--;
|
||||
|
||||
while (mode--)
|
||||
if (strlen(r5c_journal_mode_str[mode]) == len &&
|
||||
!strncmp(page, r5c_journal_mode_str[mode], len))
|
||||
break;
|
||||
|
||||
return r5c_journal_mode_set(mddev, mode) ?: length;
|
||||
}
|
||||
|
||||
struct md_sysfs_entry
|
||||
|
|
|
@ -547,6 +547,16 @@ struct r5worker_group {
|
|||
int stripes_cnt;
|
||||
};
|
||||
|
||||
/*
|
||||
* r5c journal modes of the array: write-back or write-through.
|
||||
* write-through mode has identical behavior as existing log only
|
||||
* implementation.
|
||||
*/
|
||||
enum r5c_journal_mode {
|
||||
R5C_JOURNAL_MODE_WRITE_THROUGH = 0,
|
||||
R5C_JOURNAL_MODE_WRITE_BACK = 1,
|
||||
};
|
||||
|
||||
enum r5_cache_state {
|
||||
R5_INACTIVE_BLOCKED, /* release of inactive stripes blocked,
|
||||
* waiting for 25% to be free
|
||||
|
@ -795,4 +805,5 @@ extern void r5c_check_cached_full_stripe(struct r5conf *conf);
|
|||
extern struct md_sysfs_entry r5c_journal_mode;
|
||||
extern void r5c_update_on_rdev_error(struct mddev *mddev);
|
||||
extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
|
||||
extern int r5c_journal_mode_set(struct mddev *mddev, int journal_mode);
|
||||
#endif
|
||||
|
|
|
@ -22,11 +22,13 @@ struct bio_vec;
|
|||
/*
|
||||
* Type of table, mapped_device's mempool and request_queue
|
||||
*/
|
||||
#define DM_TYPE_NONE 0
|
||||
#define DM_TYPE_BIO_BASED 1
|
||||
#define DM_TYPE_REQUEST_BASED 2
|
||||
#define DM_TYPE_MQ_REQUEST_BASED 3
|
||||
#define DM_TYPE_DAX_BIO_BASED 4
|
||||
enum dm_queue_mode {
|
||||
DM_TYPE_NONE = 0,
|
||||
DM_TYPE_BIO_BASED = 1,
|
||||
DM_TYPE_REQUEST_BASED = 2,
|
||||
DM_TYPE_MQ_REQUEST_BASED = 3,
|
||||
DM_TYPE_DAX_BIO_BASED = 4,
|
||||
};
|
||||
|
||||
typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
|
||||
|
||||
|
@ -221,6 +223,18 @@ struct target_type {
|
|||
*/
|
||||
typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio);
|
||||
|
||||
/*
|
||||
* A target implements own bio data integrity.
|
||||
*/
|
||||
#define DM_TARGET_INTEGRITY 0x00000010
|
||||
#define dm_target_has_integrity(type) ((type)->features & DM_TARGET_INTEGRITY)
|
||||
|
||||
/*
|
||||
* A target passes integrity data to the lower device.
|
||||
*/
|
||||
#define DM_TARGET_PASSES_INTEGRITY 0x00000020
|
||||
#define dm_target_passes_integrity(type) ((type)->features & DM_TARGET_PASSES_INTEGRITY)
|
||||
|
||||
struct dm_target {
|
||||
struct dm_table *table;
|
||||
struct target_type *type;
|
||||
|
@ -465,7 +479,7 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback
|
|||
* Useful for "hybrid" target (supports both bio-based
|
||||
* and request-based).
|
||||
*/
|
||||
void dm_table_set_type(struct dm_table *t, unsigned type);
|
||||
void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type);
|
||||
|
||||
/*
|
||||
* Finally call this to make the table ready for use.
|
||||
|
|
Загрузка…
Ссылка в новой задаче