2016-05-12 23:28:10 +03:00
|
|
|
/*
|
|
|
|
* Internal header file _only_ for device mapper core
|
|
|
|
*
|
|
|
|
* Copyright (C) 2016 Red Hat, Inc. All rights reserved.
|
|
|
|
*
|
|
|
|
* This file is released under the LGPL.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef DM_CORE_INTERNAL_H
|
|
|
|
#define DM_CORE_INTERNAL_H
|
|
|
|
|
|
|
|
#include <linux/kthread.h>
|
|
|
|
#include <linux/ktime.h>
|
2020-09-19 20:09:11 +03:00
|
|
|
#include <linux/genhd.h>
|
2016-05-12 23:28:10 +03:00
|
|
|
#include <linux/blk-mq.h>
|
2021-02-01 08:10:17 +03:00
|
|
|
#include <linux/keyslot-manager.h>
|
2016-05-12 23:28:10 +03:00
|
|
|
|
|
|
|
#include <trace/events/block.h>
|
|
|
|
|
|
|
|
#include "dm.h"
|
|
|
|
|
|
|
|
#define DM_RESERVED_MAX_IOS 1024
|
|
|
|
|
|
|
|
struct dm_kobject_holder {
|
|
|
|
struct kobject kobj;
|
|
|
|
struct completion completion;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
2020-09-19 20:09:11 +03:00
|
|
|
* DM core internal structures used directly by dm.c, dm-rq.c and dm-table.c.
|
|
|
|
* DM targets must _not_ deference a mapped_device or dm_table to directly
|
|
|
|
* access their members!
|
2016-05-12 23:28:10 +03:00
|
|
|
*/
|
2020-09-19 20:09:11 +03:00
|
|
|
|
2016-05-12 23:28:10 +03:00
|
|
|
struct mapped_device {
|
|
|
|
struct mutex suspend_lock;
|
|
|
|
|
2018-05-23 01:26:20 +03:00
|
|
|
struct mutex table_devices_lock;
|
|
|
|
struct list_head table_devices;
|
|
|
|
|
2016-05-12 23:28:10 +03:00
|
|
|
/*
|
|
|
|
* The current mapping (struct dm_table *).
|
|
|
|
* Use dm_get_live_table{_fast} or take suspend_lock for
|
|
|
|
* dereference.
|
|
|
|
*/
|
|
|
|
void __rcu *map;
|
|
|
|
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
/* Protect queue and type against concurrent access. */
|
|
|
|
struct mutex type_lock;
|
2018-05-23 01:26:20 +03:00
|
|
|
enum dm_queue_mode type;
|
|
|
|
|
|
|
|
int numa_node_id;
|
|
|
|
struct request_queue *queue;
|
2016-05-12 23:28:10 +03:00
|
|
|
|
|
|
|
atomic_t holders;
|
|
|
|
atomic_t open_count;
|
|
|
|
|
|
|
|
struct dm_target *immutable_target;
|
|
|
|
struct target_type *immutable_target_type;
|
|
|
|
|
2018-05-23 01:26:20 +03:00
|
|
|
char name[16];
|
2016-05-12 23:28:10 +03:00
|
|
|
struct gendisk *disk;
|
2017-04-12 22:35:44 +03:00
|
|
|
struct dax_device *dax_dev;
|
2016-05-12 23:28:10 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* A list of ios that arrived while we were suspended.
|
|
|
|
*/
|
|
|
|
struct work_struct work;
|
2018-05-23 01:26:20 +03:00
|
|
|
wait_queue_head_t wait;
|
2016-05-12 23:28:10 +03:00
|
|
|
spinlock_t deferred_lock;
|
|
|
|
struct bio_list deferred;
|
|
|
|
|
2018-05-23 01:26:20 +03:00
|
|
|
void *interface_ptr;
|
|
|
|
|
2016-05-12 23:28:10 +03:00
|
|
|
/*
|
|
|
|
* Event handling.
|
|
|
|
*/
|
|
|
|
wait_queue_head_t eventq;
|
|
|
|
atomic_t event_nr;
|
|
|
|
atomic_t uevent_seq;
|
|
|
|
struct list_head uevent_list;
|
|
|
|
spinlock_t uevent_lock; /* Protect access to uevent_list */
|
|
|
|
|
|
|
|
/* the number of internal suspends */
|
|
|
|
unsigned internal_suspend_count;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* io objects are allocated from here.
|
|
|
|
*/
|
2018-05-21 01:25:53 +03:00
|
|
|
struct bio_set io_bs;
|
|
|
|
struct bio_set bs;
|
2016-05-12 23:28:10 +03:00
|
|
|
|
2018-05-23 01:26:20 +03:00
|
|
|
/*
|
|
|
|
* Processing queue (flush)
|
|
|
|
*/
|
|
|
|
struct workqueue_struct *wq;
|
|
|
|
|
2016-05-12 23:28:10 +03:00
|
|
|
/* forced geometry settings */
|
|
|
|
struct hd_geometry geometry;
|
|
|
|
|
|
|
|
/* kobject and completion */
|
|
|
|
struct dm_kobject_holder kobj_holder;
|
|
|
|
|
2021-02-10 23:26:23 +03:00
|
|
|
int swap_bios;
|
|
|
|
struct semaphore swap_bios_semaphore;
|
|
|
|
struct mutex swap_bios_lock;
|
|
|
|
|
2016-05-12 23:28:10 +03:00
|
|
|
struct dm_stats stats;
|
|
|
|
|
|
|
|
/* for blk-mq request-based DM support */
|
|
|
|
struct blk_mq_tag_set *tag_set;
|
|
|
|
bool init_tio_pdu:1;
|
2017-11-01 02:33:02 +03:00
|
|
|
|
|
|
|
struct srcu_struct io_barrier;
|
dm: introduce zone append emulation
For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.
To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().
Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.
Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.
The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().
For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.
All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2021-05-26 00:25:00 +03:00
|
|
|
|
|
|
|
#ifdef CONFIG_BLK_DEV_ZONED
|
|
|
|
unsigned int nr_zones;
|
|
|
|
unsigned int *zwp_offset;
|
|
|
|
#endif
|
2016-05-12 23:28:10 +03:00
|
|
|
};
|
|
|
|
|
2021-05-26 00:24:59 +03:00
|
|
|
/*
|
|
|
|
* Bits for the flags field of struct mapped_device.
|
|
|
|
*/
|
|
|
|
#define DMF_BLOCK_IO_FOR_SUSPEND 0
|
|
|
|
#define DMF_SUSPENDED 1
|
|
|
|
#define DMF_FROZEN 2
|
|
|
|
#define DMF_FREEING 3
|
|
|
|
#define DMF_DELETING 4
|
|
|
|
#define DMF_NOFLUSH_SUSPENDING 5
|
|
|
|
#define DMF_DEFERRED_REMOVE 6
|
|
|
|
#define DMF_SUSPENDED_INTERNALLY 7
|
|
|
|
#define DMF_POST_SUSPENDING 8
|
dm: introduce zone append emulation
For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.
To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().
Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.
Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.
The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().
For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.
All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2021-05-26 00:25:00 +03:00
|
|
|
#define DMF_EMULATE_ZONE_APPEND 9
|
2021-05-26 00:24:59 +03:00
|
|
|
|
dm: disable DISCARD if the underlying storage no longer supports it
Storage devices which report supporting discard commands like
WRITE_SAME_16 with unmap, but reject discard commands sent to the
storage device. This is a clear storage firmware bug but it doesn't
change the fact that should a program cause discards to be sent to a
multipath device layered on this buggy storage, all paths can end up
failed at the same time from the discards, causing possible I/O loss.
The first discard to a path will fail with Illegal Request, Invalid
field in cdb, e.g.:
kernel: sd 8:0:8:19: [sdfn] tag#0 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
kernel: sd 8:0:8:19: [sdfn] tag#0 Sense Key : Illegal Request [current]
kernel: sd 8:0:8:19: [sdfn] tag#0 Add. Sense: Invalid field in cdb
kernel: sd 8:0:8:19: [sdfn] tag#0 CDB: Write same(16) 93 08 00 00 00 00 00 a0 08 00 00 00 80 00 00 00
kernel: blk_update_request: critical target error, dev sdfn, sector 10487808
The SCSI layer converts this to the BLK_STS_TARGET error number, the sd
device disables its support for discard on this path, and because of the
BLK_STS_TARGET error multipath fails the discard without failing any
path or retrying down a different path. But subsequent discards can
cause path failures. Any discards sent to the path which already failed
a discard ends up failing with EIO from blk_cloned_rq_check_limits with
an "over max size limit" error since the discard limit was set to 0 by
the sd driver for the path. As the error is EIO, this now fails the
path and multipath tries to send the discard down the next path. This
cycle continues as discards are sent until all paths fail.
Fix this by training DM core to disable DISCARD if the underlying
storage already did so.
Also, fix branching in dm_done() and clone_endio() to reflect the
mutually exclussive nature of the IO operations in question.
Cc: stable@vger.kernel.org
Reported-by: David Jeffery <djeffery@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2019-04-03 19:23:11 +03:00
|
|
|
void disable_discard(struct mapped_device *md);
|
2016-05-12 23:28:10 +03:00
|
|
|
void disable_write_same(struct mapped_device *md);
|
2017-04-05 20:21:05 +03:00
|
|
|
void disable_write_zeroes(struct mapped_device *md);
|
2016-05-12 23:28:10 +03:00
|
|
|
|
2020-09-19 20:09:11 +03:00
|
|
|
static inline sector_t dm_get_size(struct mapped_device *md)
|
|
|
|
{
|
|
|
|
return get_capacity(md->disk);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct dm_stats *dm_get_stats(struct mapped_device *md)
|
|
|
|
{
|
|
|
|
return &md->stats;
|
|
|
|
}
|
|
|
|
|
dm: introduce zone append emulation
For zoned targets that cannot support zone append operations, implement
an emulation using regular write operations. If the original BIO
submitted by the user is a zone append operation, change its clone into
a regular write operation directed at the target zone write pointer
position.
To do so, an array of write pointer offsets (write pointer position
relative to the start of a zone) is added to struct mapped_device. All
operations that modify a sequential zone write pointer (writes, zone
reset, zone finish and zone append) are intersepted in __map_bio() and
processed using the new functions dm_zone_map_bio().
Detection of the target ability to natively support zone append
operations is done from dm_table_set_restrictions() by calling the
function dm_set_zones_restrictions(). A target that does not support
zone append operation, either by explicitly declaring it using the new
struct dm_target field zone_append_not_supported, or because the device
table contains a non-zoned device, has its mapped device marked with the
new flag DMF_ZONE_APPEND_EMULATED. The helper function
dm_emulate_zone_append() is introduced to test a mapped device for this
new flag.
Atomicity of the zones write pointer tracking and updates is done using
a zone write locking mechanism based on a bitmap. This is similar to
the block layer method but based on BIOs rather than struct request.
A zone write lock is taken in dm_zone_map_bio() for any clone BIO with
an operation type that changes the BIO target zone write pointer
position. The zone write lock is released if the clone BIO is failed
before submission or when dm_zone_endio() is called when the clone BIO
completes.
The zone write lock bitmap of the mapped device, together with a bitmap
indicating zone types (conv_zones_bitmap) and the write pointer offset
array (zwp_offset) are allocated and initialized with a full device zone
report in dm_set_zones_restrictions() using the function
dm_revalidate_zones().
For failed operations that may have modified a zone write pointer, the
zone write pointer offset is marked as invalid in dm_zone_endio().
Zones with an invalid write pointer offset are checked and the write
pointer updated using an internal report zone operation when the
faulty zone is accessed again by the user.
All functions added for this emulation have a minimal overhead for
zoned targets natively supporting zone append operations. Regular
device targets are also not affected. The added code also does not
impact builds with CONFIG_BLK_DEV_ZONED disabled by stubbing out all
dm zone related functions.
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
2021-05-26 00:25:00 +03:00
|
|
|
static inline bool dm_emulate_zone_append(struct mapped_device *md)
|
|
|
|
{
|
|
|
|
if (blk_queue_is_zoned(md->queue))
|
|
|
|
return test_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-09-19 20:09:11 +03:00
|
|
|
#define DM_TABLE_MAX_DEPTH 16
|
|
|
|
|
|
|
|
struct dm_table {
|
|
|
|
struct mapped_device *md;
|
|
|
|
enum dm_queue_mode type;
|
|
|
|
|
|
|
|
/* btree table */
|
|
|
|
unsigned int depth;
|
|
|
|
unsigned int counts[DM_TABLE_MAX_DEPTH]; /* in nodes */
|
|
|
|
sector_t *index[DM_TABLE_MAX_DEPTH];
|
|
|
|
|
|
|
|
unsigned int num_targets;
|
|
|
|
unsigned int num_allocated;
|
|
|
|
sector_t *highs;
|
|
|
|
struct dm_target *targets;
|
|
|
|
|
|
|
|
struct target_type *immutable_target_type;
|
|
|
|
|
|
|
|
bool integrity_supported:1;
|
|
|
|
bool singleton:1;
|
|
|
|
unsigned integrity_added:1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Indicates the rw permissions for the new logical
|
|
|
|
* device. This should be a combination of FMODE_READ
|
|
|
|
* and FMODE_WRITE.
|
|
|
|
*/
|
|
|
|
fmode_t mode;
|
|
|
|
|
|
|
|
/* a list of devices used by this table */
|
|
|
|
struct list_head devices;
|
|
|
|
|
|
|
|
/* events get handed up using this callback */
|
|
|
|
void (*event_fn)(void *);
|
|
|
|
void *event_context;
|
|
|
|
|
|
|
|
struct dm_md_mempools *mempools;
|
2021-02-01 08:10:17 +03:00
|
|
|
|
|
|
|
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
|
|
|
|
struct blk_keyslot_manager *ksm;
|
|
|
|
#endif
|
2020-09-19 20:09:11 +03:00
|
|
|
};
|
|
|
|
|
2021-05-26 00:24:59 +03:00
|
|
|
/*
|
|
|
|
* One of these is allocated per clone bio.
|
|
|
|
*/
|
|
|
|
#define DM_TIO_MAGIC 7282014
|
|
|
|
struct dm_target_io {
|
|
|
|
unsigned int magic;
|
|
|
|
struct dm_io *io;
|
|
|
|
struct dm_target *ti;
|
|
|
|
unsigned int target_bio_nr;
|
|
|
|
unsigned int *len_ptr;
|
|
|
|
bool inside_dm_io;
|
|
|
|
struct bio clone;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* One of these is allocated per original bio.
|
|
|
|
* It contains the first clone used for that original.
|
|
|
|
*/
|
|
|
|
#define DM_IO_MAGIC 5191977
|
|
|
|
struct dm_io {
|
|
|
|
unsigned int magic;
|
|
|
|
struct mapped_device *md;
|
|
|
|
blk_status_t status;
|
|
|
|
atomic_t io_count;
|
|
|
|
struct bio *orig_bio;
|
|
|
|
unsigned long start_time;
|
|
|
|
spinlock_t endio_lock;
|
|
|
|
struct dm_stats_aux stats_aux;
|
|
|
|
/* last member of dm_target_io is 'struct bio' */
|
|
|
|
struct dm_target_io tio;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline void dm_io_inc_pending(struct dm_io *io)
|
|
|
|
{
|
|
|
|
atomic_inc(&io->io_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
void dm_io_dec_pending(struct dm_io *io, blk_status_t error);
|
|
|
|
|
2016-05-12 23:28:10 +03:00
|
|
|
static inline struct completion *dm_get_completion_from_kobject(struct kobject *kobj)
|
|
|
|
{
|
|
|
|
return &container_of(kobj, struct dm_kobject_holder, kobj)->completion;
|
|
|
|
}
|
|
|
|
|
|
|
|
unsigned __dm_get_module_param(unsigned *module_param, unsigned def, unsigned max);
|
|
|
|
|
|
|
|
static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
|
|
|
|
{
|
|
|
|
return !maxlen || strlen(result) + 1 >= maxlen;
|
|
|
|
}
|
|
|
|
|
2017-01-17 00:05:59 +03:00
|
|
|
extern atomic_t dm_global_event_nr;
|
|
|
|
extern wait_queue_head_t dm_global_eventq;
|
2017-09-20 14:29:49 +03:00
|
|
|
void dm_issue_global_event(void);
|
2017-01-17 00:05:59 +03:00
|
|
|
|
2016-05-12 23:28:10 +03:00
|
|
|
#endif
|