-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl7VPc4QHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpgQkEACnQlzWOfNQMz1AzgUAv/S8IYDJCLrkbjLZ
 JK4pJv8Hjhss/7sS+fd8kyKe9VtaZz2IjmrXcC66RMMwtpx4iHnkRffoNAgEdGOl
 /M5TCZGhs+F/mp3Lc0WdR5DFHkM6yy2Tkk9wCFLreB4bW67janAWnd7nbU4INqJj
 +WqIgpzNMc/kfUhpBYTeQLORhL4e2TG9ADTi/zeUITlpnEsA65LOgXKEpeIFYnSX
 KTl4GIZ9tjazG3Y1Eva7DYHDIErNNAtX67KBqf+WBgMV98eB0O6xIPN1WlmhDTqj
 FGMLkb8msH1HHntvxDAuc4/ortnUy8vPI4o6zKP89HJJNjIM5p5eHEuVF5JnBw42
 Rtu9Om6JqWx51nhAhJNBj9bUStYbhEl0vVQCwbkfPbDJhzTy3RR8z709q9+ZwOrL
 xbp4aJBzqrzscjBEiSQbNCf2PyuOAdU0r1x81UN81ZN41d5qUcumcinjw4Y7vru8
 z5zMlo1Iy/AWQYyu7jgHmnpI7ZyA/1Qclo5dV7aa72bLFaJa35e7QxgfQOFBA5dY
 UZl6QPJRlnB80uGRzD5jCh2O2sQ3XZqYnpaKsUAka1GgbceCp9IC4A5mfZvpACsh
 Xk8VXjlhvY/iPJsKLqrh4Oedg4Dj5M3PLL9C3MDfYeIP2qgXpbnk87UV1TPNSpY0
 QcTxsXXXIw==
 =H+/Z
 -----END PGP SIGNATURE-----

Merge tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block

Pull block driver updates from Jens Axboe:
 "On top of the core changes, here are the block driver changes for this
  merge window:

   - NVMe changes:
        - NVMe over Fibre Channel protocol updates, which also reach
          over to drivers/scsi/lpfc (James Smart)
        - namespace revalidation support on the target (Anthony
          Iliopoulos)
        - gcc zero length array fix (Arnd Bergmann)
        - nvmet cleanups (Chaitanya Kulkarni)
        - misc cleanups and fixes (me, Keith Busch, Sagi Grimberg)
        - use a SRQ per completion vector (Max Gurtovoy)
        - fix handling of runtime changes to the queue count (Weiping
          Zhang)
        - t10 protection information support for nvme-rdma and
          nvmet-rdma (Israel Rukshin and Max Gurtovoy)
        - target side AEN improvements (Chaitanya Kulkarni)
        - various fixes and minor improvements all over, icluding the
          nvme part of the lpfc driver"

   - Floppy code cleanup series (Willy, Denis)

   - Floppy contention fix (Jiri)

   - Loop CONFIGURE support (Martijn)

   - bcache fixes/improvements (Coly, Joe, Colin)

   - q->queuedata cleanups (Christoph)

   - Get rid of ioctl_by_bdev (Christoph, Stefan)

   - md/raid5 allocation fixes (Coly)

   - zero length array fixes (Gustavo)

   - swim3 task state fix (Xu)"

* tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block: (166 commits)
  bcache: configure the asynchronous registertion to be experimental
  bcache: asynchronous devices registration
  bcache: fix refcount underflow in bcache_device_free()
  bcache: Convert pr_<level> uses to a more typical style
  bcache: remove redundant variables i and n
  lpfc: Fix return value in __lpfc_nvme_ls_abort
  lpfc: fix axchg pointer reference after free and double frees
  lpfc: Fix pointer checks and comments in LS receive refactoring
  nvme: set dma alignment to qword
  nvmet: cleanups the loop in nvmet_async_events_process
  nvmet: fix memory leak when removing namespaces and controllers concurrently
  nvmet-rdma: add metadata/T10-PI support
  nvmet: add metadata support for block devices
  nvmet: add metadata/T10-PI support
  nvme: add Metadata Capabilities enumerations
  nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len
  nvmet: rename nvmet_rw_len to nvmet_rw_data_len
  nvmet: add metadata characteristics for a namespace
  nvme-rdma: add metadata/T10-PI support
  nvme-rdma: introduce nvme_rdma_sgl structure
  ...
This commit is contained in:
Linus Torvalds 2020-06-02 15:37:03 -07:00
Родитель 750a02ab8d 0c8d3fcead
Коммит bce159d734
81 изменённых файлов: 5520 добавлений и 2444 удалений

Просмотреть файл

@ -14711,6 +14711,7 @@ S: Supported
W: http://www.ibm.com/developerworks/linux/linux390/
F: block/partitions/ibm.c
F: drivers/s390/block/dasd*
F: include/linux/dasd_mod.h
S390 IOMMU (PCI)
M: Gerald Schaefer <gerald.schaefer@de.ibm.com>

Просмотреть файл

@ -11,8 +11,8 @@
#define __ASM_ALPHA_FLOPPY_H
#define fd_inb(port) inb_p(port)
#define fd_outb(value,port) outb_p(value,port)
#define fd_inb(base, reg) inb_p((base) + (reg))
#define fd_outb(value, base, reg) outb_p(value, (base) + (reg))
#define fd_enable_dma() enable_dma(FLOPPY_DMA)
#define fd_disable_dma() disable_dma(FLOPPY_DMA)

Просмотреть файл

@ -9,20 +9,20 @@
#ifndef __ASM_ARM_FLOPPY_H
#define __ASM_ARM_FLOPPY_H
#define fd_outb(val,port) \
#define fd_outb(val, base, reg) \
do { \
int new_val = (val); \
if (((port) & 7) == FD_DOR) { \
if ((reg) == FD_DOR) { \
if (new_val & 0xf0) \
new_val = (new_val & 0x0c) | \
floppy_selects[new_val & 3]; \
else \
new_val &= 0x0c; \
} \
outb(new_val, (port)); \
outb(new_val, (base) + (reg)); \
} while(0)
#define fd_inb(port) inb((port))
#define fd_inb(base, reg) inb((base) + (reg))
#define fd_request_irq() request_irq(IRQ_FLOPPYDISK,floppy_interrupt,\
0,"floppy",NULL)
#define fd_free_irq() free_irq(IRQ_FLOPPYDISK,NULL)

Просмотреть файл

@ -63,21 +63,21 @@ static __inline__ void release_dma_lock(unsigned long flags)
}
static __inline__ unsigned char fd_inb(int port)
static __inline__ unsigned char fd_inb(int base, int reg)
{
if(MACH_IS_Q40)
return inb_p(port);
return inb_p(base + reg);
else if(MACH_IS_SUN3X)
return sun3x_82072_fd_inb(port);
return sun3x_82072_fd_inb(base + reg);
return 0;
}
static __inline__ void fd_outb(unsigned char value, int port)
static __inline__ void fd_outb(unsigned char value, int base, int reg)
{
if(MACH_IS_Q40)
outb_p(value, port);
outb_p(value, base + reg);
else if(MACH_IS_SUN3X)
sun3x_82072_fd_outb(value, port);
sun3x_82072_fd_outb(value, base + reg);
}
@ -211,26 +211,27 @@ asmlinkage irqreturn_t floppy_hardint(int irq, void *dev_id)
st=1;
for(lcount=virtual_dma_count, lptr=virtual_dma_addr;
lcount; lcount--, lptr++) {
st=inb(virtual_dma_port+4) & 0xa0 ;
if(st != 0xa0)
st = inb(virtual_dma_port + FD_STATUS);
st &= STATUS_DMA | STATUS_READY;
if (st != (STATUS_DMA | STATUS_READY))
break;
if(virtual_dma_mode)
outb_p(*lptr, virtual_dma_port+5);
outb_p(*lptr, virtual_dma_port + FD_DATA);
else
*lptr = inb_p(virtual_dma_port+5);
*lptr = inb_p(virtual_dma_port + FD_DATA);
}
virtual_dma_count = lcount;
virtual_dma_addr = lptr;
st = inb(virtual_dma_port+4);
st = inb(virtual_dma_port + FD_STATUS);
}
#ifdef TRACE_FLPY_INT
calls++;
#endif
if(st == 0x20)
if (st == STATUS_DMA)
return IRQ_HANDLED;
if(!(st & 0x20)) {
if (!(st & STATUS_DMA)) {
virtual_dma_residue += virtual_dma_count;
virtual_dma_count=0;
#ifdef TRACE_FLPY_INT

Просмотреть файл

@ -26,14 +26,14 @@
/*
* How to access the FDC's registers.
*/
static inline unsigned char fd_inb(unsigned int port)
static inline unsigned char fd_inb(unsigned int base, unsigned int reg)
{
return inb_p(port);
return inb_p(base + reg);
}
static inline void fd_outb(unsigned char value, unsigned int port)
static inline void fd_outb(unsigned char value, unsigned int base, unsigned int reg)
{
outb_p(value, port);
outb_p(value, base + reg);
}
/*

Просмотреть файл

@ -17,19 +17,19 @@
#include <asm/jazzdma.h>
#include <asm/pgtable.h>
static inline unsigned char fd_inb(unsigned int port)
static inline unsigned char fd_inb(unsigned int base, unsigned int reg)
{
unsigned char c;
c = *(volatile unsigned char *) port;
c = *(volatile unsigned char *) (base + reg);
udelay(1);
return c;
}
static inline void fd_outb(unsigned char value, unsigned int port)
static inline void fd_outb(unsigned char value, unsigned int base, unsigned int reg)
{
*(volatile unsigned char *) port = value;
*(volatile unsigned char *) (base + reg) = value;
}
/*

Просмотреть файл

@ -29,8 +29,8 @@
#define CSW fd_routine[can_use_virtual_dma & 1]
#define fd_inb(port) readb(port)
#define fd_outb(value, port) writeb(value, port)
#define fd_inb(base, reg) readb((base) + (reg))
#define fd_outb(value, base, reg) writeb(value, (base) + (reg))
#define fd_request_dma() CSW._request_dma(FLOPPY_DMA,"floppy")
#define fd_free_dma() CSW._free_dma(FLOPPY_DMA)
@ -75,27 +75,28 @@ static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
register char *lptr = virtual_dma_addr;
for (lcount = virtual_dma_count; lcount; lcount--) {
st = fd_inb(virtual_dma_port+4) & 0xa0 ;
if (st != 0xa0)
st = fd_inb(virtual_dma_port, FD_STATUS);
st &= STATUS_DMA | STATUS_READY;
if (st != (STATUS_DMA | STATUS_READY))
break;
if (virtual_dma_mode) {
fd_outb(*lptr, virtual_dma_port+5);
fd_outb(*lptr, virtual_dma_port, FD_DATA);
} else {
*lptr = fd_inb(virtual_dma_port+5);
*lptr = fd_inb(virtual_dma_port, FD_DATA);
}
lptr++;
}
virtual_dma_count = lcount;
virtual_dma_addr = lptr;
st = fd_inb(virtual_dma_port+4);
st = fd_inb(virtual_dma_port, FD_STATUS);
}
#ifdef TRACE_FLPY_INT
calls++;
#endif
if (st == 0x20)
if (st == STATUS_DMA)
return;
if (!(st & 0x20)) {
if (!(st & STATUS_DMA)) {
virtual_dma_residue += virtual_dma_count;
virtual_dma_count = 0;
#ifdef TRACE_FLPY_INT

Просмотреть файл

@ -13,8 +13,8 @@
#include <asm/machdep.h>
#define fd_inb(port) inb_p(port)
#define fd_outb(value,port) outb_p(value,port)
#define fd_inb(base, reg) inb_p((base) + (reg))
#define fd_outb(value, base, reg) outb_p(value, (base) + (reg))
#define fd_enable_dma() enable_dma(FLOPPY_DMA)
#define fd_disable_dma() fd_ops->_disable_dma(FLOPPY_DMA)
@ -61,21 +61,22 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
st = 1;
for (lcount=virtual_dma_count, lptr=virtual_dma_addr;
lcount; lcount--, lptr++) {
st=inb(virtual_dma_port+4) & 0xa0 ;
if (st != 0xa0)
st = inb(virtual_dma_port + FD_STATUS);
st &= STATUS_DMA | STATUS_READY;
if (st != (STATUS_DMA | STATUS_READY))
break;
if (virtual_dma_mode)
outb_p(*lptr, virtual_dma_port+5);
outb_p(*lptr, virtual_dma_port + FD_DATA);
else
*lptr = inb_p(virtual_dma_port+5);
*lptr = inb_p(virtual_dma_port + FD_DATA);
}
virtual_dma_count = lcount;
virtual_dma_addr = lptr;
st = inb(virtual_dma_port+4);
st = inb(virtual_dma_port + FD_STATUS);
if (st == 0x20)
if (st == STATUS_DMA)
return IRQ_HANDLED;
if (!(st & 0x20)) {
if (!(st & STATUS_DMA)) {
virtual_dma_residue += virtual_dma_count;
virtual_dma_count=0;
doing_vdma = 0;

Просмотреть файл

@ -59,8 +59,8 @@ struct sun_floppy_ops {
static struct sun_floppy_ops sun_fdops;
#define fd_inb(port) sun_fdops.fd_inb(port)
#define fd_outb(value,port) sun_fdops.fd_outb(value,port)
#define fd_inb(base, reg) sun_fdops.fd_inb(reg)
#define fd_outb(value, base, reg) sun_fdops.fd_outb(value, reg)
#define fd_enable_dma() sun_fd_enable_dma()
#define fd_disable_dma() sun_fd_disable_dma()
#define fd_request_dma() (0) /* nothing... */
@ -114,15 +114,15 @@ static unsigned char sun_read_dir(void)
static unsigned char sun_82072_fd_inb(int port)
{
udelay(5);
switch(port & 7) {
switch (port) {
default:
printk("floppy: Asked to read unknown port %d\n", port);
panic("floppy: Port bolixed.");
case 4: /* FD_STATUS */
case FD_STATUS:
return sun_fdc->status_82072 & ~STATUS_DMA;
case 5: /* FD_DATA */
case FD_DATA:
return sun_fdc->data_82072;
case 7: /* FD_DIR */
case FD_DIR:
return sun_read_dir();
}
panic("sun_82072_fd_inb: How did I get here?");
@ -131,20 +131,20 @@ static unsigned char sun_82072_fd_inb(int port)
static void sun_82072_fd_outb(unsigned char value, int port)
{
udelay(5);
switch(port & 7) {
switch (port) {
default:
printk("floppy: Asked to write to unknown port %d\n", port);
panic("floppy: Port bolixed.");
case 2: /* FD_DOR */
case FD_DOR:
sun_set_dor(value, 0);
break;
case 5: /* FD_DATA */
case FD_DATA:
sun_fdc->data_82072 = value;
break;
case 7: /* FD_DCR */
case FD_DCR:
sun_fdc->dcr_82072 = value;
break;
case 4: /* FD_STATUS */
case FD_DSR:
sun_fdc->status_82072 = value;
break;
}
@ -154,23 +154,23 @@ static void sun_82072_fd_outb(unsigned char value, int port)
static unsigned char sun_82077_fd_inb(int port)
{
udelay(5);
switch(port & 7) {
switch (port) {
default:
printk("floppy: Asked to read unknown port %d\n", port);
panic("floppy: Port bolixed.");
case 0: /* FD_STATUS_0 */
case FD_SRA:
return sun_fdc->status1_82077;
case 1: /* FD_STATUS_1 */
case FD_SRB:
return sun_fdc->status2_82077;
case 2: /* FD_DOR */
case FD_DOR:
return sun_fdc->dor_82077;
case 3: /* FD_TDR */
case FD_TDR:
return sun_fdc->tapectl_82077;
case 4: /* FD_STATUS */
case FD_STATUS:
return sun_fdc->status_82077 & ~STATUS_DMA;
case 5: /* FD_DATA */
case FD_DATA:
return sun_fdc->data_82077;
case 7: /* FD_DIR */
case FD_DIR:
return sun_read_dir();
}
panic("sun_82077_fd_inb: How did I get here?");
@ -179,23 +179,23 @@ static unsigned char sun_82077_fd_inb(int port)
static void sun_82077_fd_outb(unsigned char value, int port)
{
udelay(5);
switch(port & 7) {
switch (port) {
default:
printk("floppy: Asked to write to unknown port %d\n", port);
panic("floppy: Port bolixed.");
case 2: /* FD_DOR */
case FD_DOR:
sun_set_dor(value, 1);
break;
case 5: /* FD_DATA */
case FD_DATA:
sun_fdc->data_82077 = value;
break;
case 7: /* FD_DCR */
case FD_DCR:
sun_fdc->dcr_82077 = value;
break;
case 4: /* FD_STATUS */
case FD_DSR:
sun_fdc->status_82077 = value;
break;
case 3: /* FD_TDR */
case FD_TDR:
sun_fdc->tapectl_82077 = value;
break;
}

Просмотреть файл

@ -47,8 +47,9 @@ unsigned long fdc_status;
static struct platform_device *floppy_op = NULL;
struct sun_floppy_ops {
unsigned char (*fd_inb) (unsigned long port);
void (*fd_outb) (unsigned char value, unsigned long port);
unsigned char (*fd_inb) (unsigned long port, unsigned int reg);
void (*fd_outb) (unsigned char value, unsigned long base,
unsigned int reg);
void (*fd_enable_dma) (void);
void (*fd_disable_dma) (void);
void (*fd_set_dma_mode) (int);
@ -62,8 +63,8 @@ struct sun_floppy_ops {
static struct sun_floppy_ops sun_fdops;
#define fd_inb(port) sun_fdops.fd_inb(port)
#define fd_outb(value,port) sun_fdops.fd_outb(value,port)
#define fd_inb(base, reg) sun_fdops.fd_inb(base, reg)
#define fd_outb(value, base, reg) sun_fdops.fd_outb(value, base, reg)
#define fd_enable_dma() sun_fdops.fd_enable_dma()
#define fd_disable_dma() sun_fdops.fd_disable_dma()
#define fd_request_dma() (0) /* nothing... */
@ -97,42 +98,43 @@ static int sun_floppy_types[2] = { 0, 0 };
/* No 64k boundary crossing problems on the Sparc. */
#define CROSS_64KB(a,s) (0)
static unsigned char sun_82077_fd_inb(unsigned long port)
static unsigned char sun_82077_fd_inb(unsigned long base, unsigned int reg)
{
udelay(5);
switch(port & 7) {
switch (reg) {
default:
printk("floppy: Asked to read unknown port %lx\n", port);
printk("floppy: Asked to read unknown port %x\n", reg);
panic("floppy: Port bolixed.");
case 4: /* FD_STATUS */
case FD_STATUS:
return sbus_readb(&sun_fdc->status_82077) & ~STATUS_DMA;
case 5: /* FD_DATA */
case FD_DATA:
return sbus_readb(&sun_fdc->data_82077);
case 7: /* FD_DIR */
case FD_DIR:
/* XXX: Is DCL on 0x80 in sun4m? */
return sbus_readb(&sun_fdc->dir_82077);
}
panic("sun_82072_fd_inb: How did I get here?");
}
static void sun_82077_fd_outb(unsigned char value, unsigned long port)
static void sun_82077_fd_outb(unsigned char value, unsigned long base,
unsigned int reg)
{
udelay(5);
switch(port & 7) {
switch (reg) {
default:
printk("floppy: Asked to write to unknown port %lx\n", port);
printk("floppy: Asked to write to unknown port %x\n", reg);
panic("floppy: Port bolixed.");
case 2: /* FD_DOR */
case FD_DOR:
/* Happily, the 82077 has a real DOR register. */
sbus_writeb(value, &sun_fdc->dor_82077);
break;
case 5: /* FD_DATA */
case FD_DATA:
sbus_writeb(value, &sun_fdc->data_82077);
break;
case 7: /* FD_DCR */
case FD_DCR:
sbus_writeb(value, &sun_fdc->dcr_82077);
break;
case 4: /* FD_STATUS */
case FD_DSR:
sbus_writeb(value, &sun_fdc->status_82077);
break;
}
@ -298,19 +300,21 @@ static struct sun_pci_dma_op sun_pci_dma_pending = { -1U, 0, 0, NULL};
irqreturn_t floppy_interrupt(int irq, void *dev_id);
static unsigned char sun_pci_fd_inb(unsigned long port)
static unsigned char sun_pci_fd_inb(unsigned long base, unsigned int reg)
{
udelay(5);
return inb(port);
return inb(base + reg);
}
static void sun_pci_fd_outb(unsigned char val, unsigned long port)
static void sun_pci_fd_outb(unsigned char val, unsigned long base,
unsigned int reg)
{
udelay(5);
outb(val, port);
outb(val, base + reg);
}
static void sun_pci_fd_broken_outb(unsigned char val, unsigned long port)
static void sun_pci_fd_broken_outb(unsigned char val, unsigned long base,
unsigned int reg)
{
udelay(5);
/*
@ -320,16 +324,17 @@ static void sun_pci_fd_broken_outb(unsigned char val, unsigned long port)
* this does not hurt correct hardware like the AXmp.
* (Eddie, Sep 12 1998).
*/
if (port == ((unsigned long)sun_fdc) + 2) {
if (reg == FD_DOR) {
if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x20)) {
val |= 0x10;
}
}
outb(val, port);
outb(val, base + reg);
}
#ifdef PCI_FDC_SWAP_DRIVES
static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long port)
static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long base,
unsigned int reg)
{
udelay(5);
/*
@ -339,13 +344,13 @@ static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long port)
* this does not hurt correct hardware like the AXmp.
* (Eddie, Sep 12 1998).
*/
if (port == ((unsigned long)sun_fdc) + 2) {
if (reg == FD_DOR) {
if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x10)) {
val &= ~(0x03);
val |= 0x21;
}
}
outb(val, port);
outb(val, base + reg);
}
#endif /* PCI_FDC_SWAP_DRIVES */

Просмотреть файл

@ -31,8 +31,8 @@
#define CSW fd_routine[can_use_virtual_dma & 1]
#define fd_inb(port) inb_p(port)
#define fd_outb(value, port) outb_p(value, port)
#define fd_inb(base, reg) inb_p((base) + (reg))
#define fd_outb(value, base, reg) outb_p(value, (base) + (reg))
#define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy")
#define fd_free_dma() CSW._free_dma(FLOPPY_DMA)
@ -77,25 +77,26 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
st = 1;
for (lcount = virtual_dma_count, lptr = virtual_dma_addr;
lcount; lcount--, lptr++) {
st = inb(virtual_dma_port + 4) & 0xa0;
if (st != 0xa0)
st = inb(virtual_dma_port + FD_STATUS);
st &= STATUS_DMA | STATUS_READY;
if (st != (STATUS_DMA | STATUS_READY))
break;
if (virtual_dma_mode)
outb_p(*lptr, virtual_dma_port + 5);
outb_p(*lptr, virtual_dma_port + FD_DATA);
else
*lptr = inb_p(virtual_dma_port + 5);
*lptr = inb_p(virtual_dma_port + FD_DATA);
}
virtual_dma_count = lcount;
virtual_dma_addr = lptr;
st = inb(virtual_dma_port + 4);
st = inb(virtual_dma_port + FD_STATUS);
}
#ifdef TRACE_FLPY_INT
calls++;
#endif
if (st == 0x20)
if (st == STATUS_DMA)
return IRQ_HANDLED;
if (!(st & 0x20)) {
if (!(st & STATUS_DMA)) {
virtual_dma_residue += virtual_dma_count;
virtual_dma_count = 0;
#ifdef TRACE_FLPY_INT

Просмотреть файл

@ -13,10 +13,11 @@
#include <asm/ebcdic.h>
#include <linux/uaccess.h>
#include <asm/vtoc.h>
#include <linux/module.h>
#include <linux/dasd_mod.h>
#include "check.h"
union label_t {
struct vtoc_volume_label_cdl vol;
struct vtoc_volume_label_ldl lnx;
@ -288,7 +289,9 @@ static int find_cms1_partitions(struct parsed_partitions *state,
*/
int ibm_partition(struct parsed_partitions *state)
{
int (*fn)(struct gendisk *disk, dasd_information2_t *info);
struct block_device *bdev = state->bdev;
struct gendisk *disk = bdev->bd_disk;
int blocksize, res;
loff_t i_size, offset, size;
dasd_information2_t *info;
@ -299,24 +302,31 @@ int ibm_partition(struct parsed_partitions *state)
union label_t *label;
res = 0;
if (!disk->fops->getgeo)
goto out_exit;
fn = symbol_get(dasd_biodasdinfo);
if (!fn)
goto out_exit;
blocksize = bdev_logical_block_size(bdev);
if (blocksize <= 0)
goto out_exit;
goto out_symbol;
i_size = i_size_read(bdev->bd_inode);
if (i_size == 0)
goto out_exit;
goto out_symbol;
info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
if (info == NULL)
goto out_exit;
goto out_symbol;
geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL);
if (geo == NULL)
goto out_nogeo;
label = kmalloc(sizeof(union label_t), GFP_KERNEL);
if (label == NULL)
goto out_nolab;
if (ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
/* set start if not filled by getgeo function e.g. virtblk */
geo->start = get_start_sect(bdev);
if (disk->fops->getgeo(bdev, geo))
goto out_freeall;
if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0) {
if (fn(disk, info)) {
kfree(info);
info = NULL;
}
@ -359,6 +369,8 @@ out_nolab:
kfree(geo);
out_nogeo:
kfree(info);
out_symbol:
symbol_put(dasd_biodasdinfo);
out_exit:
return res;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -228,24 +228,34 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
blk_mq_unfreeze_queue(lo->lo_queue);
}
/**
* loop_validate_block_size() - validates the passed in block size
* @bsize: size to validate
*/
static int
figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
loop_validate_block_size(unsigned short bsize)
{
if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
return -EINVAL;
return 0;
}
/**
* loop_set_size() - sets device size and notifies userspace
* @lo: struct loop_device to set the size for
* @size: new size of the loop device
*
* Callers must validate that the size passed into this function fits into
* a sector_t, eg using loop_validate_size()
*/
static void loop_set_size(struct loop_device *lo, loff_t size)
{
loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
sector_t x = (sector_t)size;
struct block_device *bdev = lo->lo_device;
if (unlikely((loff_t)x != size))
return -EFBIG;
if (lo->lo_offset != offset)
lo->lo_offset = offset;
if (lo->lo_sizelimit != sizelimit)
lo->lo_sizelimit = sizelimit;
set_capacity(lo->lo_disk, x);
bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
return 0;
bd_set_size(bdev, size << SECTOR_SHIFT);
set_capacity_revalidate_and_notify(lo->lo_disk, size, false);
}
static inline int
@ -952,132 +962,6 @@ static void loop_update_rotational(struct loop_device *lo)
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
}
static int loop_set_fd(struct loop_device *lo, fmode_t mode,
struct block_device *bdev, unsigned int arg)
{
struct file *file;
struct inode *inode;
struct address_space *mapping;
struct block_device *claimed_bdev = NULL;
int lo_flags = 0;
int error;
loff_t size;
bool partscan;
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
error = -EBADF;
file = fget(arg);
if (!file)
goto out;
/*
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
*/
if (!(mode & FMODE_EXCL)) {
claimed_bdev = bd_start_claiming(bdev, loop_set_fd);
if (IS_ERR(claimed_bdev)) {
error = PTR_ERR(claimed_bdev);
goto out_putf;
}
}
error = mutex_lock_killable(&loop_ctl_mutex);
if (error)
goto out_bdev;
error = -EBUSY;
if (lo->lo_state != Lo_unbound)
goto out_unlock;
error = loop_validate_file(file, bdev);
if (error)
goto out_unlock;
mapping = file->f_mapping;
inode = mapping->host;
if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
!file->f_op->write_iter)
lo_flags |= LO_FLAGS_READ_ONLY;
error = -EFBIG;
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size)
goto out_unlock;
error = loop_prepare_queue(lo);
if (error)
goto out_unlock;
error = 0;
set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->use_dio = false;
lo->lo_device = bdev;
lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
lo->transfer = NULL;
lo->ioctl = NULL;
lo->lo_sizelimit = 0;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_write_cache(lo->lo_queue, true, false);
if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) {
/* In case of direct I/O, match underlying block size */
unsigned short bsize = bdev_logical_block_size(
inode->i_sb->s_bdev);
blk_queue_logical_block_size(lo->lo_queue, bsize);
blk_queue_physical_block_size(lo->lo_queue, bsize);
blk_queue_io_min(lo->lo_queue, bsize);
}
loop_update_rotational(lo);
loop_update_dio(lo);
set_capacity(lo->lo_disk, size);
bd_set_size(bdev, size << 9);
loop_sysfs_init(lo);
/* let user-space know about the new size */
kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
block_size(inode->i_bdev) : PAGE_SIZE);
lo->lo_state = Lo_bound;
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
/* Grab the block_device to prevent its destruction after we
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
*/
bdgrab(bdev);
mutex_unlock(&loop_ctl_mutex);
if (partscan)
loop_reread_partitions(lo, bdev);
if (claimed_bdev)
bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
return 0;
out_unlock:
mutex_unlock(&loop_ctl_mutex);
out_bdev:
if (claimed_bdev)
bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
out_putf:
fput(file);
out:
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return error;
}
static int
loop_release_xfer(struct loop_device *lo)
{
@ -1115,6 +999,203 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
return err;
}
/**
* loop_set_status_from_info - configure device from loop_info
* @lo: struct loop_device to configure
* @info: struct loop_info64 to configure the device with
*
* Configures the loop device parameters according to the passed
* in loop_info64 configuration.
*/
static int
loop_set_status_from_info(struct loop_device *lo,
const struct loop_info64 *info)
{
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
return -EINVAL;
err = loop_release_xfer(lo);
if (err)
return err;
if (info->lo_encrypt_type) {
unsigned int type = info->lo_encrypt_type;
if (type >= MAX_LO_CRYPT)
return -EINVAL;
xfer = xfer_funcs[type];
if (xfer == NULL)
return -EINVAL;
} else
xfer = NULL;
err = loop_init_xfer(lo, xfer, info);
if (err)
return err;
lo->lo_offset = info->lo_offset;
lo->lo_sizelimit = info->lo_sizelimit;
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
if (!xfer)
xfer = &none_funcs;
lo->transfer = xfer->transfer;
lo->ioctl = xfer->ioctl;
lo->lo_flags = info->lo_flags;
lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
lo->lo_init[0] = info->lo_init[0];
lo->lo_init[1] = info->lo_init[1];
if (info->lo_encrypt_key_size) {
memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
info->lo_encrypt_key_size);
lo->lo_key_owner = uid;
}
return 0;
}
static int loop_configure(struct loop_device *lo, fmode_t mode,
struct block_device *bdev,
const struct loop_config *config)
{
struct file *file;
struct inode *inode;
struct address_space *mapping;
struct block_device *claimed_bdev = NULL;
int error;
loff_t size;
bool partscan;
unsigned short bsize;
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
error = -EBADF;
file = fget(config->fd);
if (!file)
goto out;
/*
* If we don't hold exclusive handle for the device, upgrade to it
* here to avoid changing device under exclusive owner.
*/
if (!(mode & FMODE_EXCL)) {
claimed_bdev = bd_start_claiming(bdev, loop_configure);
if (IS_ERR(claimed_bdev)) {
error = PTR_ERR(claimed_bdev);
goto out_putf;
}
}
error = mutex_lock_killable(&loop_ctl_mutex);
if (error)
goto out_bdev;
error = -EBUSY;
if (lo->lo_state != Lo_unbound)
goto out_unlock;
error = loop_validate_file(file, bdev);
if (error)
goto out_unlock;
mapping = file->f_mapping;
inode = mapping->host;
size = get_loop_size(lo, file);
if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
error = -EINVAL;
goto out_unlock;
}
if (config->block_size) {
error = loop_validate_block_size(config->block_size);
if (error)
goto out_unlock;
}
error = loop_set_status_from_info(lo, &config->info);
if (error)
goto out_unlock;
if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
!file->f_op->write_iter)
lo->lo_flags |= LO_FLAGS_READ_ONLY;
error = loop_prepare_queue(lo);
if (error)
goto out_unlock;
set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
lo->lo_backing_file = file;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_write_cache(lo->lo_queue, true, false);
if (config->block_size)
bsize = config->block_size;
else if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev)
/* In case of direct I/O, match underlying block size */
bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
else
bsize = 512;
blk_queue_logical_block_size(lo->lo_queue, bsize);
blk_queue_physical_block_size(lo->lo_queue, bsize);
blk_queue_io_min(lo->lo_queue, bsize);
loop_update_rotational(lo);
loop_update_dio(lo);
loop_sysfs_init(lo);
loop_set_size(lo, size);
set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
block_size(inode->i_bdev) : PAGE_SIZE);
lo->lo_state = Lo_bound;
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
/* Grab the block_device to prevent its destruction after we
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
*/
bdgrab(bdev);
mutex_unlock(&loop_ctl_mutex);
if (partscan)
loop_reread_partitions(lo, bdev);
if (claimed_bdev)
bd_abort_claiming(bdev, claimed_bdev, loop_configure);
return 0;
out_unlock:
mutex_unlock(&loop_ctl_mutex);
out_bdev:
if (claimed_bdev)
bd_abort_claiming(bdev, claimed_bdev, loop_configure);
out_putf:
fput(file);
out:
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
return error;
}
static int __loop_clr_fd(struct loop_device *lo, bool release)
{
struct file *filp = NULL;
@ -1263,10 +1344,11 @@ static int
loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
{
int err;
struct loop_func_table *xfer;
kuid_t uid = current_uid();
struct block_device *bdev;
kuid_t uid = current_uid();
int prev_lo_flags;
bool partscan = false;
bool size_changed = false;
err = mutex_lock_killable(&loop_ctl_mutex);
if (err)
@ -1281,13 +1363,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
err = -ENXIO;
goto out_unlock;
}
if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) {
err = -EINVAL;
goto out_unlock;
}
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
size_changed = true;
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
}
@ -1295,79 +1374,44 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
err = loop_release_xfer(lo);
if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
/* If any pages were dirtied after kill_bdev(), try again */
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
}
prev_lo_flags = lo->lo_flags;
err = loop_set_status_from_info(lo, info);
if (err)
goto out_unfreeze;
if (info->lo_encrypt_type) {
unsigned int type = info->lo_encrypt_type;
/* Mask out flags that can't be set using LOOP_SET_STATUS. */
lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS;
/* For those flags, use the previous values instead */
lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS;
/* For flags that can't be cleared, use previous values too */
lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_CLEARABLE_FLAGS;
if (type >= MAX_LO_CRYPT) {
err = -EINVAL;
goto out_unfreeze;
}
xfer = xfer_funcs[type];
if (xfer == NULL) {
err = -EINVAL;
goto out_unfreeze;
}
} else
xfer = NULL;
err = loop_init_xfer(lo, xfer, info);
if (err)
goto out_unfreeze;
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
/* kill_bdev should have truncated all the pages */
if (lo->lo_device->bd_inode->i_mapping->nrpages) {
err = -EAGAIN;
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
__func__, lo->lo_number, lo->lo_file_name,
lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
}
if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
err = -EFBIG;
goto out_unfreeze;
}
if (size_changed) {
loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
lo->lo_backing_file);
loop_set_size(lo, new_size);
}
loop_config_discard(lo);
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
if (!xfer)
xfer = &none_funcs;
lo->transfer = xfer->transfer;
lo->ioctl = xfer->ioctl;
if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
(info->lo_flags & LO_FLAGS_AUTOCLEAR))
lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
lo->lo_init[0] = info->lo_init[0];
lo->lo_init[1] = info->lo_init[1];
if (info->lo_encrypt_key_size) {
memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
info->lo_encrypt_key_size);
lo->lo_key_owner = uid;
}
/* update dio if lo_offset or transfer is changed */
__loop_update_dio(lo, lo->use_dio);
out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
!(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_flags |= LO_FLAGS_PARTSCAN;
if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) &&
!(prev_lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
bdev = lo->lo_device;
partscan = true;
@ -1531,10 +1575,15 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
static int loop_set_capacity(struct loop_device *lo)
{
loff_t size;
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
size = get_loop_size(lo, lo->lo_backing_file);
loop_set_size(lo, size);
return 0;
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@ -1558,8 +1607,9 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
if (lo->lo_state != Lo_bound)
return -ENXIO;
if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
return -EINVAL;
err = loop_validate_block_size(arg);
if (err)
return err;
if (lo->lo_queue->limits.logical_block_size == arg)
return 0;
@ -1617,11 +1667,31 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct loop_device *lo = bdev->bd_disk->private_data;
void __user *argp = (void __user *) arg;
int err;
switch (cmd) {
case LOOP_SET_FD:
return loop_set_fd(lo, mode, bdev, arg);
case LOOP_SET_FD: {
/*
* Legacy case - pass in a zeroed out struct loop_config with
* only the file descriptor set , which corresponds with the
* default parameters we'd have used otherwise.
*/
struct loop_config config;
memset(&config, 0, sizeof(config));
config.fd = arg;
return loop_configure(lo, mode, bdev, &config);
}
case LOOP_CONFIGURE: {
struct loop_config config;
if (copy_from_user(&config, argp, sizeof(config)))
return -EFAULT;
return loop_configure(lo, mode, bdev, &config);
}
case LOOP_CHANGE_FD:
return loop_change_fd(lo, bdev, arg);
case LOOP_CLR_FD:
@ -1629,21 +1699,19 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
case LOOP_SET_STATUS:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
err = loop_set_status_old(lo,
(struct loop_info __user *)arg);
err = loop_set_status_old(lo, argp);
}
break;
case LOOP_GET_STATUS:
return loop_get_status_old(lo, (struct loop_info __user *) arg);
return loop_get_status_old(lo, argp);
case LOOP_SET_STATUS64:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
err = loop_set_status64(lo,
(struct loop_info64 __user *) arg);
err = loop_set_status64(lo, argp);
}
break;
case LOOP_GET_STATUS64:
return loop_get_status64(lo, (struct loop_info64 __user *) arg);
return loop_get_status64(lo, argp);
case LOOP_SET_CAPACITY:
case LOOP_SET_DIRECT_IO:
case LOOP_SET_BLOCK_SIZE:
@ -1795,6 +1863,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
case LOOP_CLR_FD:
case LOOP_GET_STATUS64:
case LOOP_SET_STATUS64:
case LOOP_CONFIGURE:
arg = (unsigned long) compat_ptr(arg);
/* fall through */
case LOOP_SET_FD:

Просмотреть файл

@ -327,7 +327,7 @@ static inline void swim_motor(struct swim __iomem *base,
swim_select(base, RELAX);
if (swim_readbit(base, MOTOR_ON))
break;
current->state = TASK_INTERRUPTIBLE;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
}
} else if (action == OFF) {
@ -346,7 +346,7 @@ static inline void swim_eject(struct swim __iomem *base)
swim_select(base, RELAX);
if (!swim_readbit(base, DISK_IN))
break;
current->state = TASK_INTERRUPTIBLE;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
}
swim_select(base, RELAX);
@ -370,7 +370,7 @@ static inline int swim_step(struct swim __iomem *base)
for (wait = 0; wait < HZ; wait++) {
current->state = TASK_INTERRUPTIBLE;
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
swim_select(base, RELAX);

Просмотреть файл

@ -26,3 +26,12 @@ config BCACHE_CLOSURES_DEBUG
Keeps all active closures in a linked list and provides a debugfs
interface to list them, which makes it possible to see asynchronous
operations that get stuck.
config BCACHE_ASYNC_REGISTRAION
bool "Asynchronous device registration (EXPERIMENTAL)"
depends on BCACHE
help
Add a sysfs file /sys/fs/bcache/register_async. Writing registering
device path into this file will returns immediately and the real
registration work is handled in kernel work queue in asynchronous
way.

Просмотреть файл

@ -176,7 +176,7 @@
* - updates to non leaf nodes just happen synchronously (see btree_split()).
*/
#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
#include <linux/bcache.h>
#include <linux/bio.h>

Просмотреть файл

@ -6,7 +6,7 @@
* Copyright 2012 Google, Inc.
*/
#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
#include "util.h"
#include "bset.h"
@ -31,7 +31,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set)
if (b->ops->key_dump)
b->ops->key_dump(b, k);
else
pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
if (next < bset_bkey_last(i) &&
bkey_cmp(k, b->ops->is_extents ?
@ -1225,7 +1225,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0;
pr_debug("sorted %i keys", out->keys);
pr_debug("sorted %i keys\n", out->keys);
}
static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,

Просмотреть файл

@ -619,7 +619,7 @@ retry:
* and BTREE_NODE_journal_flush bit cleared by btree_flush_write().
*/
if (btree_node_journal_flush(b)) {
pr_debug("bnode %p is flushing by journal, retry", b);
pr_debug("bnode %p is flushing by journal, retry\n", b);
mutex_unlock(&b->write_lock);
udelay(1);
goto retry;
@ -802,7 +802,7 @@ int bch_btree_cache_alloc(struct cache_set *c)
c->shrink.batch = c->btree_pages * 2;
if (register_shrinker(&c->shrink))
pr_warn("bcache: %s: could not register shrinker",
pr_warn("bcache: %s: could not register shrinker\n",
__func__);
return 0;
@ -1054,7 +1054,7 @@ retry:
*/
if (btree_node_journal_flush(b)) {
mutex_unlock(&b->write_lock);
pr_debug("bnode %p journal_flush set, retry", b);
pr_debug("bnode %p journal_flush set, retry\n", b);
udelay(1);
goto retry;
}
@ -1798,7 +1798,7 @@ static void bch_btree_gc(struct cache_set *c)
schedule_timeout_interruptible(msecs_to_jiffies
(GC_SLEEP_MS));
else if (ret)
pr_warn("gc failed!");
pr_warn("gc failed!\n");
} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
bch_btree_gc_finish(c);
@ -1907,10 +1907,8 @@ static int bch_btree_check_thread(void *arg)
struct btree_iter iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
int i, n;
k = p = NULL;
i = n = 0;
cur_idx = prev_idx = 0;
ret = 0;
@ -2045,7 +2043,7 @@ int bch_btree_check(struct cache_set *c)
&check_state->infos[i],
name);
if (IS_ERR(check_state->infos[i].thread)) {
pr_err("fails to run thread bch_btrchk[%d]", i);
pr_err("fails to run thread bch_btrchk[%d]\n", i);
for (--i; i >= 0; i--)
kthread_stop(check_state->infos[i].thread);
ret = -ENOMEM;
@ -2456,7 +2454,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
if (ret) {
struct bkey *k;
pr_err("error %i", ret);
pr_err("error %i\n", ret);
while ((k = bch_keylist_pop(keys)))
bkey_put(c, k);
@ -2744,7 +2742,7 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
break;
if (bkey_cmp(&buf->last_scanned, end) >= 0) {
pr_debug("scan finished");
pr_debug("scan finished\n");
break;
}

Просмотреть файл

@ -130,18 +130,18 @@ static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
char buf[80];
bch_extent_to_text(buf, sizeof(buf), k);
pr_err(" %s", buf);
pr_cont(" %s", buf);
for (j = 0; j < KEY_PTRS(k); j++) {
size_t n = PTR_BUCKET_NR(b->c, k, j);
pr_err(" bucket %zu", n);
pr_cont(" bucket %zu", n);
if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
pr_err(" prio %i",
PTR_BUCKET(b->c, k, j)->prio);
pr_cont(" prio %i",
PTR_BUCKET(b->c, k, j)->prio);
}
pr_err(" %s\n", bch_ptr_status(b->c, k));
pr_cont(" %s\n", bch_ptr_status(b->c, k));
}
/* Btree ptrs */
@ -553,7 +553,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
if (stale && KEY_DIRTY(k)) {
bch_extent_to_text(buf, sizeof(buf), k);
pr_info("stale dirty pointer, stale %u, key: %s",
pr_info("stale dirty pointer, stale %u, key: %s\n",
stale, buf);
}

Просмотреть файл

@ -65,14 +65,14 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
* we shouldn't count failed REQ_RAHEAD bio to dc->io_errors.
*/
if (bio->bi_opf & REQ_RAHEAD) {
pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore",
pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n",
dc->backing_dev_name);
return;
}
errors = atomic_add_return(1, &dc->io_errors);
if (errors < dc->error_limit)
pr_err("%s: IO error on backing device, unrecoverable",
pr_err("%s: IO error on backing device, unrecoverable\n",
dc->backing_dev_name);
else
bch_cached_dev_error(dc);
@ -123,12 +123,12 @@ void bch_count_io_errors(struct cache *ca,
errors >>= IO_ERROR_SHIFT;
if (errors < ca->set->error_limit)
pr_err("%s: IO error on %s%s",
pr_err("%s: IO error on %s%s\n",
ca->cache_dev_name, m,
is_read ? ", recovering." : ".");
else
bch_cache_set_error(ca->set,
"%s: too many IO errors %s",
"%s: too many IO errors %s\n",
ca->cache_dev_name, m);
}
}

Просмотреть файл

@ -47,7 +47,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
closure_init_stack(&cl);
pr_debug("reading %u", bucket_index);
pr_debug("reading %u\n", bucket_index);
while (offset < ca->sb.bucket_size) {
reread: left = ca->sb.bucket_size - offset;
@ -78,13 +78,13 @@ reread: left = ca->sb.bucket_size - offset;
size_t blocks, bytes = set_bytes(j);
if (j->magic != jset_magic(&ca->sb)) {
pr_debug("%u: bad magic", bucket_index);
pr_debug("%u: bad magic\n", bucket_index);
return ret;
}
if (bytes > left << 9 ||
bytes > PAGE_SIZE << JSET_BITS) {
pr_info("%u: too big, %zu bytes, offset %u",
pr_info("%u: too big, %zu bytes, offset %u\n",
bucket_index, bytes, offset);
return ret;
}
@ -93,7 +93,7 @@ reread: left = ca->sb.bucket_size - offset;
goto reread;
if (j->csum != csum_set(j)) {
pr_info("%u: bad csum, %zu bytes, offset %u",
pr_info("%u: bad csum, %zu bytes, offset %u\n",
bucket_index, bytes, offset);
return ret;
}
@ -190,7 +190,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
uint64_t seq;
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
pr_debug("%u journal buckets", ca->sb.njournal_buckets);
pr_debug("%u journal buckets\n", ca->sb.njournal_buckets);
/*
* Read journal buckets ordered by golden ratio hash to quickly
@ -215,7 +215,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
* If that fails, check all the buckets we haven't checked
* already
*/
pr_debug("falling back to linear search");
pr_debug("falling back to linear search\n");
for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
l < ca->sb.njournal_buckets;
@ -233,7 +233,7 @@ bsearch:
/* Binary search */
m = l;
r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
pr_debug("starting binary search, l %u r %u", l, r);
pr_debug("starting binary search, l %u r %u\n", l, r);
while (l + 1 < r) {
seq = list_entry(list->prev, struct journal_replay,
@ -253,7 +253,7 @@ bsearch:
* Read buckets in reverse order until we stop finding more
* journal entries
*/
pr_debug("finishing up: m %u njournal_buckets %u",
pr_debug("finishing up: m %u njournal_buckets %u\n",
m, ca->sb.njournal_buckets);
l = m;
@ -370,10 +370,10 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
if (n != i->j.seq) {
if (n == start && is_discard_enabled(s))
pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)",
pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n",
n, i->j.seq - 1, start, end);
else {
pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)",
pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n",
n, i->j.seq - 1, start, end);
ret = -EIO;
goto err;
@ -403,7 +403,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
entries++;
}
pr_info("journal replay done, %i keys in %i entries, seq %llu",
pr_info("journal replay done, %i keys in %i entries, seq %llu\n",
keys, entries, end);
err:
while (!list_empty(list)) {
@ -481,7 +481,7 @@ static void btree_flush_write(struct cache_set *c)
break;
if (btree_node_journal_flush(b))
pr_err("BUG: flush_write bit should not be set here!");
pr_err("BUG: flush_write bit should not be set here!\n");
mutex_lock(&b->write_lock);
@ -534,13 +534,13 @@ static void btree_flush_write(struct cache_set *c)
for (i = 0; i < nr; i++) {
b = btree_nodes[i];
if (!b) {
pr_err("BUG: btree_nodes[%d] is NULL", i);
pr_err("BUG: btree_nodes[%d] is NULL\n", i);
continue;
}
/* safe to check without holding b->write_lock */
if (!btree_node_journal_flush(b)) {
pr_err("BUG: bnode %p: journal_flush bit cleaned", b);
pr_err("BUG: bnode %p: journal_flush bit cleaned\n", b);
continue;
}
@ -548,14 +548,14 @@ static void btree_flush_write(struct cache_set *c)
if (!btree_current_write(b)->journal) {
clear_bit(BTREE_NODE_journal_flush, &b->flags);
mutex_unlock(&b->write_lock);
pr_debug("bnode %p: written by others", b);
pr_debug("bnode %p: written by others\n", b);
continue;
}
if (!btree_node_dirty(b)) {
clear_bit(BTREE_NODE_journal_flush, &b->flags);
mutex_unlock(&b->write_lock);
pr_debug("bnode %p: dirty bit cleaned by others", b);
pr_debug("bnode %p: dirty bit cleaned by others\n", b);
continue;
}
@ -716,7 +716,7 @@ void bch_journal_next(struct journal *j)
j->cur->data->keys = 0;
if (fifo_full(&j->pin))
pr_debug("journal_pin full (%zu)", fifo_used(&j->pin));
pr_debug("journal_pin full (%zu)\n", fifo_used(&j->pin));
}
static void journal_write_endio(struct bio *bio)

Просмотреть файл

@ -110,7 +110,7 @@ static void bch_data_invalidate(struct closure *cl)
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
struct bio *bio = op->bio;
pr_debug("invalidating %i sectors from %llu",
pr_debug("invalidating %i sectors from %llu\n",
bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
while (bio_sectors(bio)) {
@ -396,7 +396,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
pr_debug("skipping unaligned io");
pr_debug("skipping unaligned io\n");
goto skip;
}
@ -650,7 +650,7 @@ static void backing_request_endio(struct bio *bio)
*/
if (unlikely(s->iop.writeback &&
bio->bi_opf & REQ_PREFLUSH)) {
pr_err("Can't flush %s: returned bi_status %i",
pr_err("Can't flush %s: returned bi_status %i\n",
dc->backing_dev_name, bio->bi_status);
} else {
/* set to orig_bio->bi_status in bio_complete() */

Просмотреть файл

@ -89,7 +89,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
for (i = 0; i < SB_JOURNAL_BUCKETS; i++)
sb->d[i] = le64_to_cpu(s->d[i]);
pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n",
sb->version, sb->flags, sb->seq, sb->keys);
err = "Not a bcache superblock (bad offset)";
@ -234,7 +234,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
out->csum = csum_set(out);
pr_debug("ver %llu, flags %llu, seq %llu",
pr_debug("ver %llu, flags %llu, seq %llu\n",
sb->version, sb->flags, sb->seq);
submit_bio(bio);
@ -365,11 +365,11 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
}
bch_extent_to_text(buf, sizeof(buf), k);
pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf);
pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf);
for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
if (!bch_is_zero(u->uuid, 16))
pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u",
pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n",
u - c->uuids, u->uuid, u->label,
u->first_reg, u->last_reg, u->invalidated);
@ -534,7 +534,7 @@ int bch_prio_write(struct cache *ca, bool wait)
struct bucket *b;
struct closure cl;
pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu",
pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n",
fifo_used(&ca->free[RESERVE_PRIO]),
fifo_used(&ca->free[RESERVE_NONE]),
fifo_used(&ca->free_inc));
@ -629,12 +629,12 @@ static int prio_read(struct cache *ca, uint64_t bucket)
if (p->csum !=
bch_crc64(&p->magic, bucket_bytes(ca) - 8)) {
pr_warn("bad csum reading priorities");
pr_warn("bad csum reading priorities\n");
goto out;
}
if (p->magic != pset_magic(&ca->sb)) {
pr_warn("bad magic reading priorities");
pr_warn("bad magic reading priorities\n");
goto out;
}
@ -728,11 +728,11 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
ret = sysfs_create_link(&d->kobj, &c->kobj, "cache");
if (ret < 0)
pr_err("Couldn't create device -> cache set symlink");
pr_err("Couldn't create device -> cache set symlink\n");
ret = sysfs_create_link(&c->kobj, &d->kobj, d->name);
if (ret < 0)
pr_err("Couldn't create cache set -> device symlink");
pr_err("Couldn't create cache set -> device symlink\n");
clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
}
@ -789,15 +789,17 @@ static void bcache_device_free(struct bcache_device *d)
lockdep_assert_held(&bch_register_lock);
if (disk)
pr_info("%s stopped", disk->disk_name);
pr_info("%s stopped\n", disk->disk_name);
else
pr_err("bcache device (NULL gendisk) stopped");
pr_err("bcache device (NULL gendisk) stopped\n");
if (d->c)
bcache_device_detach(d);
if (disk) {
if (disk->flags & GENHD_FL_UP)
bool disk_added = (disk->flags & GENHD_FL_UP) != 0;
if (disk_added)
del_gendisk(disk);
if (disk->queue)
@ -805,7 +807,8 @@ static void bcache_device_free(struct bcache_device *d)
ida_simple_remove(&bcache_device_idx,
first_minor_to_idx(disk->first_minor));
put_disk(disk);
if (disk_added)
put_disk(disk);
}
bioset_exit(&d->bio_split);
@ -830,7 +833,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
if (!d->nr_stripes || d->nr_stripes > max_stripes) {
pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)\n",
(unsigned int)d->nr_stripes);
return -ENOMEM;
}
@ -928,11 +931,11 @@ static int cached_dev_status_update(void *arg)
dc->offline_seconds = 0;
if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
pr_err("%s: device offline for %d seconds",
pr_err("%s: device offline for %d seconds\n",
dc->backing_dev_name,
BACKING_DEV_OFFLINE_TIMEOUT);
pr_err("%s: disable I/O request due to backing "
"device offline", dc->disk.name);
pr_err("%s: disable I/O request due to backing device offline\n",
dc->disk.name);
dc->io_disable = true;
/* let others know earlier that io_disable is true */
smp_mb();
@ -959,7 +962,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
};
if (dc->io_disable) {
pr_err("I/O disabled on cached dev %s",
pr_err("I/O disabled on cached dev %s\n",
dc->backing_dev_name);
kfree(env[1]);
kfree(env[2]);
@ -971,7 +974,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
kfree(env[1]);
kfree(env[2]);
kfree(buf);
pr_info("cached dev %s is running already",
pr_info("cached dev %s is running already\n",
dc->backing_dev_name);
return -EBUSY;
}
@ -1001,16 +1004,14 @@ int bch_cached_dev_run(struct cached_dev *dc)
if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
sysfs_create_link(&disk_to_dev(d->disk)->kobj,
&d->kobj, "bcache")) {
pr_err("Couldn't create bcache dev <-> disk sysfs symlinks");
pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n");
return -ENOMEM;
}
dc->status_update_thread = kthread_run(cached_dev_status_update,
dc, "bcache_status_update");
if (IS_ERR(dc->status_update_thread)) {
pr_warn("failed to create bcache_status_update kthread, "
"continue to run without monitoring backing "
"device status");
pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n");
}
return 0;
@ -1036,7 +1037,7 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
} while (time_out > 0);
if (time_out == 0)
pr_warn("give up waiting for dc->writeback_write_update to quit");
pr_warn("give up waiting for dc->writeback_write_update to quit\n");
cancel_delayed_work_sync(&dc->writeback_rate_update);
}
@ -1077,7 +1078,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
mutex_unlock(&bch_register_lock);
pr_info("Caching disabled for %s", dc->backing_dev_name);
pr_info("Caching disabled for %s\n", dc->backing_dev_name);
/* Drop ref we took in cached_dev_detach() */
closure_put(&dc->disk.cl);
@ -1117,20 +1118,20 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
return -ENOENT;
if (dc->disk.c) {
pr_err("Can't attach %s: already attached",
pr_err("Can't attach %s: already attached\n",
dc->backing_dev_name);
return -EINVAL;
}
if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
pr_err("Can't attach %s: shutting down",
pr_err("Can't attach %s: shutting down\n",
dc->backing_dev_name);
return -EINVAL;
}
if (dc->sb.block_size < c->sb.block_size) {
/* Will die */
pr_err("Couldn't attach %s: block size less than set's block size",
pr_err("Couldn't attach %s: block size less than set's block size\n",
dc->backing_dev_name);
return -EINVAL;
}
@ -1138,7 +1139,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
/* Check whether already attached */
list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
pr_err("Tried to attach %s but duplicate UUID already attached",
pr_err("Tried to attach %s but duplicate UUID already attached\n",
dc->backing_dev_name);
return -EINVAL;
@ -1157,14 +1158,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
if (!u) {
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
pr_err("Couldn't find uuid for %s in set",
pr_err("Couldn't find uuid for %s in set\n",
dc->backing_dev_name);
return -ENOENT;
}
u = uuid_find_empty(c);
if (!u) {
pr_err("Not caching %s, no room for UUID",
pr_err("Not caching %s, no room for UUID\n",
dc->backing_dev_name);
return -EINVAL;
}
@ -1210,7 +1211,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
down_write(&dc->writeback_lock);
if (bch_cached_dev_writeback_start(dc)) {
up_write(&dc->writeback_lock);
pr_err("Couldn't start writeback facilities for %s",
pr_err("Couldn't start writeback facilities for %s\n",
dc->disk.disk->disk_name);
return -ENOMEM;
}
@ -1233,7 +1234,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
*/
kthread_stop(dc->writeback_thread);
cancel_writeback_rate_update_dwork(dc);
pr_err("Couldn't run cached device %s",
pr_err("Couldn't run cached device %s\n",
dc->backing_dev_name);
return ret;
}
@ -1244,7 +1245,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
/* Allow the writeback thread to proceed */
up_write(&dc->writeback_lock);
pr_info("Caching %s as %s on set %pU",
pr_info("Caching %s as %s on set %pU\n",
dc->backing_dev_name,
dc->disk.disk->disk_name,
dc->disk.c->sb.set_uuid);
@ -1384,7 +1385,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
goto err;
pr_info("registered backing device %s", dc->backing_dev_name);
pr_info("registered backing device %s\n", dc->backing_dev_name);
list_add(&dc->list, &uncached_devices);
/* attach to a matched cache set if it exists */
@ -1401,7 +1402,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
return 0;
err:
pr_notice("error %s: %s", dc->backing_dev_name, err);
pr_notice("error %s: %s\n", dc->backing_dev_name, err);
bcache_device_stop(&dc->disk);
return ret;
}
@ -1497,7 +1498,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
u = uuid_find_empty(c);
if (!u) {
pr_err("Can't create volume, no room for UUID");
pr_err("Can't create volume, no room for UUID\n");
return -EINVAL;
}
@ -1523,7 +1524,7 @@ bool bch_cached_dev_error(struct cached_dev *dc)
smp_mb();
pr_err("stop %s: too many IO errors on backing device %s\n",
dc->disk.disk->disk_name, dc->backing_dev_name);
dc->disk.disk->disk_name, dc->backing_dev_name);
bcache_device_stop(&dc->disk);
return true;
@ -1534,6 +1535,7 @@ bool bch_cached_dev_error(struct cached_dev *dc)
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
if (c->on_error != ON_ERROR_PANIC &&
@ -1541,20 +1543,22 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
return false;
if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
pr_info("CACHE_SET_IO_DISABLE already set");
pr_info("CACHE_SET_IO_DISABLE already set\n");
/*
* XXX: we can be called from atomic context
* acquire_console_sem();
*/
pr_err("bcache: error on %pU: ", c->sb.set_uuid);
va_start(args, fmt);
vprintk(fmt, args);
va_end(args);
pr_err(", disabling caching\n");
vaf.fmt = fmt;
vaf.va = &args;
pr_err("error on %pU: %pV, disabling caching\n",
c->sb.set_uuid, &vaf);
va_end(args);
if (c->on_error == ON_ERROR_PANIC)
panic("panic forced after error\n");
@ -1606,7 +1610,7 @@ static void cache_set_free(struct closure *cl)
list_del(&c->list);
mutex_unlock(&bch_register_lock);
pr_info("Cache set %pU unregistered", c->sb.set_uuid);
pr_info("Cache set %pU unregistered\n", c->sb.set_uuid);
wake_up(&unregister_wait);
closure_debug_destroy(&c->cl);
@ -1677,7 +1681,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
struct cached_dev *dc)
{
if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n",
d->disk->disk_name, c->sb.set_uuid);
bcache_device_stop(d);
} else if (atomic_read(&dc->has_dirty)) {
@ -1685,7 +1689,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
* and dc->has_dirty == 1
*/
pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n",
d->disk->disk_name);
/*
* There might be a small time gap that cache set is
@ -1707,7 +1711,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
* and dc->has_dirty == 0
*/
pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n",
d->disk->disk_name);
}
}
@ -1874,7 +1878,7 @@ static int run_cache_set(struct cache_set *c)
if (bch_journal_read(c, &journal))
goto err;
pr_debug("btree_journal_read() done");
pr_debug("btree_journal_read() done\n");
err = "no journal entries found";
if (list_empty(&journal))
@ -1920,7 +1924,7 @@ static int run_cache_set(struct cache_set *c)
bch_journal_mark(c, &journal);
bch_initial_gc_finish(c);
pr_debug("btree_check() done");
pr_debug("btree_check() done\n");
/*
* bcache_journal_next() can't happen sooner, or
@ -1951,7 +1955,7 @@ static int run_cache_set(struct cache_set *c)
if (bch_journal_replay(c, &journal))
goto err;
} else {
pr_notice("invalidating existing data");
pr_notice("invalidating existing data\n");
for_each_cache(ca, c, i) {
unsigned int j;
@ -2085,7 +2089,7 @@ found:
memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16);
c->sb.flags = ca->sb.flags;
c->sb.seq = ca->sb.seq;
pr_debug("set version = %llu", c->sb.version);
pr_debug("set version = %llu\n", c->sb.version);
}
kobject_get(&ca->kobj);
@ -2247,7 +2251,7 @@ err_btree_alloc:
err_free:
module_put(THIS_MODULE);
if (err)
pr_notice("error %s: %s", ca->cache_dev_name, err);
pr_notice("error %s: %s\n", ca->cache_dev_name, err);
return ret;
}
@ -2301,14 +2305,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
goto out;
}
pr_info("registered cache device %s", ca->cache_dev_name);
pr_info("registered cache device %s\n", ca->cache_dev_name);
out:
kobject_put(&ca->kobj);
err:
if (err)
pr_notice("error %s: %s", ca->cache_dev_name, err);
pr_notice("error %s: %s\n", ca->cache_dev_name, err);
return ret;
}
@ -2323,6 +2327,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
kobj_attribute_write(register, register_bcache);
kobj_attribute_write(register_quiet, register_bcache);
kobj_attribute_write(register_async, register_bcache);
kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
static bool bch_is_open_backing(struct block_device *bdev)
@ -2358,6 +2363,83 @@ static bool bch_is_open(struct block_device *bdev)
return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
}
struct async_reg_args {
struct work_struct reg_work;
char *path;
struct cache_sb *sb;
struct cache_sb_disk *sb_disk;
struct block_device *bdev;
};
static void register_bdev_worker(struct work_struct *work)
{
int fail = false;
struct async_reg_args *args =
container_of(work, struct async_reg_args, reg_work);
struct cached_dev *dc;
dc = kzalloc(sizeof(*dc), GFP_KERNEL);
if (!dc) {
fail = true;
put_page(virt_to_page(args->sb_disk));
blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
goto out;
}
mutex_lock(&bch_register_lock);
if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0)
fail = true;
mutex_unlock(&bch_register_lock);
out:
if (fail)
pr_info("error %s: fail to register backing device\n",
args->path);
kfree(args->sb);
kfree(args->path);
kfree(args);
module_put(THIS_MODULE);
}
static void register_cache_worker(struct work_struct *work)
{
int fail = false;
struct async_reg_args *args =
container_of(work, struct async_reg_args, reg_work);
struct cache *ca;
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca) {
fail = true;
put_page(virt_to_page(args->sb_disk));
blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
goto out;
}
/* blkdev_put() will be called in bch_cache_release() */
if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0)
fail = true;
out:
if (fail)
pr_info("error %s: fail to register cache device\n",
args->path);
kfree(args->sb);
kfree(args->path);
kfree(args);
module_put(THIS_MODULE);
}
static void register_device_aync(struct async_reg_args *args)
{
if (SB_IS_BDEV(args->sb))
INIT_WORK(&args->reg_work, register_bdev_worker);
else
INIT_WORK(&args->reg_work, register_cache_worker);
queue_work(system_wq, &args->reg_work);
}
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
const char *buffer, size_t size)
{
@ -2420,6 +2502,26 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
goto out_blkdev_put;
err = "failed to register device";
if (attr == &ksysfs_register_async) {
/* register in asynchronous way */
struct async_reg_args *args =
kzalloc(sizeof(struct async_reg_args), GFP_KERNEL);
if (!args) {
ret = -ENOMEM;
err = "cannot allocate memory";
goto out_put_sb_page;
}
args->path = path;
args->sb = sb;
args->sb_disk = sb_disk;
args->bdev = bdev;
register_device_aync(args);
/* No wait and returns to user space */
goto async_done;
}
if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
@ -2447,6 +2549,7 @@ done:
kfree(sb);
kfree(path);
module_put(THIS_MODULE);
async_done:
return size;
out_put_sb_page:
@ -2461,7 +2564,7 @@ out_free_path:
out_module_put:
module_put(THIS_MODULE);
out:
pr_info("error %s: %s", path?path:"", err);
pr_info("error %s: %s\n", path?path:"", err);
return ret;
}
@ -2506,7 +2609,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
mutex_unlock(&bch_register_lock);
list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
pr_info("delete pdev %p", pdev);
pr_info("delete pdev %p\n", pdev);
list_del(&pdev->list);
bcache_device_stop(&pdev->dc->disk);
kfree(pdev);
@ -2549,7 +2652,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
mutex_unlock(&bch_register_lock);
pr_info("Stopping all devices:");
pr_info("Stopping all devices:\n");
/*
* The reason bch_register_lock is not held to call
@ -2599,9 +2702,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
finish_wait(&unregister_wait, &wait);
if (stopped)
pr_info("All devices stopped");
pr_info("All devices stopped\n");
else
pr_notice("Timeout waiting for devices to be closed");
pr_notice("Timeout waiting for devices to be closed\n");
out:
mutex_unlock(&bch_register_lock);
}
@ -2637,7 +2740,7 @@ static void check_module_parameters(void)
if (bch_cutoff_writeback_sync == 0)
bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC;
else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) {
pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u",
pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n",
bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX);
bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX;
}
@ -2645,13 +2748,13 @@ static void check_module_parameters(void)
if (bch_cutoff_writeback == 0)
bch_cutoff_writeback = CUTOFF_WRITEBACK;
else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) {
pr_warn("set bch_cutoff_writeback (%u) to max value %u",
pr_warn("set bch_cutoff_writeback (%u) to max value %u\n",
bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX);
bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX;
}
if (bch_cutoff_writeback > bch_cutoff_writeback_sync) {
pr_warn("set bch_cutoff_writeback (%u) to %u",
pr_warn("set bch_cutoff_writeback (%u) to %u\n",
bch_cutoff_writeback, bch_cutoff_writeback_sync);
bch_cutoff_writeback = bch_cutoff_writeback_sync;
}
@ -2662,6 +2765,9 @@ static int __init bcache_init(void)
static const struct attribute *files[] = {
&ksysfs_register.attr,
&ksysfs_register_quiet.attr,
#ifdef CONFIG_BCACHE_ASYNC_REGISTRAION
&ksysfs_register_async.attr,
#endif
&ksysfs_pendings_cleanup.attr,
NULL
};

Просмотреть файл

@ -421,7 +421,7 @@ STORE(__cached_dev)
return size;
}
if (v == -ENOENT)
pr_err("Can't attach %s: cache set not found", buf);
pr_err("Can't attach %s: cache set not found\n", buf);
return v;
}
@ -455,7 +455,7 @@ STORE(bch_cached_dev)
*/
if (dc->writeback_running) {
dc->writeback_running = false;
pr_err("%s: failed to run non-existent writeback thread",
pr_err("%s: failed to run non-existent writeback thread\n",
dc->disk.disk->disk_name);
}
} else
@ -872,11 +872,11 @@ STORE(__bch_cache_set)
if (v) {
if (test_and_set_bit(CACHE_SET_IO_DISABLE,
&c->flags))
pr_warn("CACHE_SET_IO_DISABLE already set");
pr_warn("CACHE_SET_IO_DISABLE already set\n");
} else {
if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
&c->flags))
pr_warn("CACHE_SET_IO_DISABLE already cleared");
pr_warn("CACHE_SET_IO_DISABLE already cleared\n");
}
}

Просмотреть файл

@ -809,7 +809,7 @@ static int bch_root_node_dirty_init(struct cache_set *c,
schedule_timeout_interruptible(
msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
else if (ret < 0) {
pr_warn("sectors dirty init failed, ret=%d!", ret);
pr_warn("sectors dirty init failed, ret=%d!\n", ret);
break;
}
} while (ret == -EAGAIN);
@ -917,7 +917,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
if (!state) {
pr_warn("sectors dirty init failed: cannot allocate memory");
pr_warn("sectors dirty init failed: cannot allocate memory\n");
return;
}
@ -945,7 +945,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
&state->infos[i],
name);
if (IS_ERR(state->infos[i].thread)) {
pr_err("fails to run thread bch_dirty_init[%d]", i);
pr_err("fails to run thread bch_dirty_init[%d]\n", i);
for (--i; i >= 0; i--)
kthread_stop(state->infos[i].thread);
goto out;

Просмотреть файл

@ -12,6 +12,6 @@ struct linear_conf
struct rcu_head rcu;
sector_t array_sectors;
int raid_disks; /* a copy of mddev->raid_disks */
struct dev_info disks[0];
struct dev_info disks[];
};
#endif

Просмотреть файл

@ -89,6 +89,7 @@ static struct module *md_cluster_mod;
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static struct workqueue_struct *md_wq;
static struct workqueue_struct *md_misc_wq;
static struct workqueue_struct *md_rdev_misc_wq;
static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *this);
@ -227,13 +228,13 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
goto abort;
if (mddev->serial_info_pool == NULL) {
unsigned int noio_flag;
noio_flag = memalloc_noio_save();
/*
* already in memalloc noio context by
* mddev_suspend()
*/
mddev->serial_info_pool =
mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
sizeof(struct serial_info));
memalloc_noio_restore(noio_flag);
if (!mddev->serial_info_pool) {
rdevs_uninit_serial(mddev);
pr_err("can't alloc memory pool for serialization\n");
@ -466,7 +467,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
{
const int rw = bio_data_dir(bio);
const int sgrp = op_stat_group(bio_op(bio));
struct mddev *mddev = q->queuedata;
struct mddev *mddev = bio->bi_disk->private_data;
unsigned int sectors;
if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
@ -527,11 +528,15 @@ void mddev_suspend(struct mddev *mddev)
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
del_timer_sync(&mddev->safemode_timer);
/* restrict memory reclaim I/O during raid array is suspend */
mddev->noio_flag = memalloc_noio_save();
}
EXPORT_SYMBOL_GPL(mddev_suspend);
void mddev_resume(struct mddev *mddev)
{
/* entred the memalloc scope from mddev_suspend() */
memalloc_noio_restore(mddev->noio_flag);
lockdep_assert_held(&mddev->reconfig_mutex);
if (--mddev->suspended)
return;
@ -2454,7 +2459,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
return err;
}
static void md_delayed_delete(struct work_struct *ws)
static void rdev_delayed_delete(struct work_struct *ws)
{
struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
kobject_del(&rdev->kobj);
@ -2479,9 +2484,9 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
* to delay it due to rcu usage.
*/
synchronize_rcu();
INIT_WORK(&rdev->del_work, md_delayed_delete);
INIT_WORK(&rdev->del_work, rdev_delayed_delete);
kobject_get(&rdev->kobj);
queue_work(md_misc_wq, &rdev->del_work);
queue_work(md_rdev_misc_wq, &rdev->del_work);
}
/*
@ -3191,8 +3196,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
rdev->saved_raid_disk = -1;
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
err = rdev->mddev->pers->
hot_add_disk(rdev->mddev, rdev);
err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
if (err) {
rdev->raid_disk = -1;
return err;
@ -4514,6 +4518,20 @@ null_show(struct mddev *mddev, char *page)
return -EINVAL;
}
/* need to ensure rdev_delayed_delete() has completed */
static void flush_rdev_wq(struct mddev *mddev)
{
struct md_rdev *rdev;
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev)
if (work_pending(&rdev->del_work)) {
flush_workqueue(md_rdev_misc_wq);
break;
}
rcu_read_unlock();
}
static ssize_t
new_dev_store(struct mddev *mddev, const char *buf, size_t len)
{
@ -4541,8 +4559,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
minor != MINOR(dev))
return -EOVERFLOW;
flush_workqueue(md_misc_wq);
flush_rdev_wq(mddev);
err = mddev_lock(mddev);
if (err)
return err;
@ -4780,7 +4797,8 @@ action_store(struct mddev *mddev, const char *page, size_t len)
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
mddev_lock(mddev) == 0) {
flush_workqueue(md_misc_wq);
if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
@ -5626,7 +5644,6 @@ static int md_alloc(dev_t dev, char *name)
mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
if (!mddev->queue)
goto abort;
mddev->queue->queuedata = mddev;
blk_set_stacking_limits(&mddev->queue->limits);
@ -6147,7 +6164,8 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
flush_workqueue(md_misc_wq);
if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
@ -6200,7 +6218,8 @@ static void __md_stop(struct mddev *mddev)
md_bitmap_destroy(mddev);
mddev_detach(mddev);
/* Ensure ->event_work is done */
flush_workqueue(md_misc_wq);
if (mddev->event_work.func)
flush_workqueue(md_misc_wq);
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
@ -7495,9 +7514,8 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
}
if (cmd == ADD_NEW_DISK)
/* need to ensure md_delayed_delete() has completed */
flush_workqueue(md_misc_wq);
if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
flush_rdev_wq(mddev);
if (cmd == HOT_REMOVE_DISK)
/* need to ensure recovery thread has run */
@ -7752,7 +7770,8 @@ static int md_open(struct block_device *bdev, fmode_t mode)
*/
mddev_put(mddev);
/* Wait until bdev->bd_disk is definitely gone */
flush_workqueue(md_misc_wq);
if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
/* Then retry the open from the top */
return -ERESTARTSYS;
}
@ -9040,8 +9059,7 @@ static int remove_and_add_spares(struct mddev *mddev,
rdev->recovery_offset = 0;
}
if (mddev->pers->
hot_add_disk(mddev, rdev) == 0) {
if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
if (sysfs_link_rdev(mddev, rdev))
/* failure here is OK */;
if (!test_bit(Journal, &rdev->flags))
@ -9469,6 +9487,10 @@ static int __init md_init(void)
if (!md_misc_wq)
goto err_misc_wq;
md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
if (!md_misc_wq)
goto err_rdev_misc_wq;
if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
goto err_md;
@ -9490,6 +9512,8 @@ static int __init md_init(void)
err_mdp:
unregister_blkdev(MD_MAJOR, "md");
err_md:
destroy_workqueue(md_rdev_misc_wq);
err_rdev_misc_wq:
destroy_workqueue(md_misc_wq);
err_misc_wq:
destroy_workqueue(md_wq);
@ -9776,6 +9800,7 @@ static __exit void md_exit(void)
* destroy_workqueue() below will wait for that to complete.
*/
}
destroy_workqueue(md_rdev_misc_wq);
destroy_workqueue(md_misc_wq);
destroy_workqueue(md_wq);
}
@ -9785,7 +9810,7 @@ module_exit(md_exit)
static int get_ro(char *buffer, const struct kernel_param *kp)
{
return sprintf(buffer, "%d", start_readonly);
return sprintf(buffer, "%d\n", start_readonly);
}
static int set_ro(const char *val, const struct kernel_param *kp)
{

Просмотреть файл

@ -497,6 +497,7 @@ struct mddev {
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
unsigned int good_device_nr; /* good device num within cluster raid */
unsigned int noio_flag; /* for memalloc scope API */
bool has_superblocks:1;
bool fail_last_dev:1;

Просмотреть файл

@ -296,22 +296,17 @@ static void reschedule_retry(struct r1bio *r1_bio)
static void call_bio_endio(struct r1bio *r1_bio)
{
struct bio *bio = r1_bio->master_bio;
struct r1conf *conf = r1_bio->mddev->private;
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
/*
* Wake up any possible resync thread that waits for the device
* to go idle.
*/
allow_barrier(conf, r1_bio->sector);
}
static void raid_end_bio_io(struct r1bio *r1_bio)
{
struct bio *bio = r1_bio->master_bio;
struct r1conf *conf = r1_bio->mddev->private;
/* if nobody has done the final endio yet, do it now */
if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
@ -322,6 +317,12 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
call_bio_endio(r1_bio);
}
/*
* Wake up any possible resync thread that waits for the device
* to go idle. All I/Os, even write-behind writes, are done.
*/
allow_barrier(conf, r1_bio->sector);
free_r1bio(r1_bio);
}

Просмотреть файл

@ -180,7 +180,7 @@ struct r1bio {
* if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated.
*/
struct bio *bios[0];
struct bio *bios[];
/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
};

Просмотреть файл

@ -153,7 +153,7 @@ struct r10bio {
};
sector_t addr;
int devnum;
} devs[0];
} devs[];
};
/* bits for r10bio.state */

Просмотреть файл

@ -2215,10 +2215,13 @@ static int grow_stripes(struct r5conf *conf, int num)
}
/**
* scribble_len - return the required size of the scribble region
* scribble_alloc - allocate percpu scribble buffer for required size
* of the scribble region
* @percpu - from for_each_present_cpu() of the caller
* @num - total number of disks in the array
* @cnt - scribble objs count for required size of the scribble region
*
* The size must be enough to contain:
* The scribble buffer size must be enough to contain:
* 1/ a struct page pointer for each device in the array +2
* 2/ room to convert each entry in (1) to its corresponding dma
* (dma_map_page()) or page (page_address()) address.
@ -2228,14 +2231,19 @@ static int grow_stripes(struct r5conf *conf, int num)
* of the P and Q blocks.
*/
static int scribble_alloc(struct raid5_percpu *percpu,
int num, int cnt, gfp_t flags)
int num, int cnt)
{
size_t obj_size =
sizeof(struct page *) * (num+2) +
sizeof(addr_conv_t) * (num+2);
void *scribble;
scribble = kvmalloc_array(cnt, obj_size, flags);
/*
* If here is in raid array suspend context, it is in memalloc noio
* context as well, there is no potential recursive memory reclaim
* I/Os with the GFP_KERNEL flag.
*/
scribble = kvmalloc_array(cnt, obj_size, GFP_KERNEL);
if (!scribble)
return -ENOMEM;
@ -2267,8 +2275,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
percpu = per_cpu_ptr(conf->percpu, cpu);
err = scribble_alloc(percpu, new_disks,
new_sectors / STRIPE_SECTORS,
GFP_NOIO);
new_sectors / STRIPE_SECTORS);
if (err)
break;
}
@ -6759,8 +6766,7 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu
conf->previous_raid_disks),
max(conf->chunk_sectors,
conf->prev_chunk_sectors)
/ STRIPE_SECTORS,
GFP_KERNEL)) {
/ STRIPE_SECTORS)) {
free_scratch_buffer(conf, percpu);
return -ENOMEM;
}

Просмотреть файл

@ -19,7 +19,6 @@
#include <linux/pr.h>
#include <linux/ptrace.h>
#include <linux/nvme_ioctl.h>
#include <linux/t10-pi.h>
#include <linux/pm_qos.h>
#include <asm/unaligned.h>
@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
nvme_put_ctrl(ctrl);
}
static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
{
return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
}
static blk_status_t nvme_error_status(u16 status)
{
switch (status & 0x7ff) {
@ -433,7 +427,6 @@ static void nvme_free_ns_head(struct kref *ref)
nvme_mpath_remove_disk(head);
ida_simple_remove(&head->subsys->ns_ida, head->instance);
list_del_init(&head->entry);
cleanup_srcu_struct(&head->srcu);
nvme_put_subsystem(head->subsys);
kfree(head);
@ -530,7 +523,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
c.directive.opcode = nvme_admin_directive_recv;
c.directive.nsid = cpu_to_le32(nsid);
c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1);
c.directive.numd = cpu_to_le32(nvme_bytes_to_numd(sizeof(*s)));
c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
c.directive.dtype = NVME_DIR_STREAMS;
@ -553,19 +546,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
if (ret)
return ret;
goto out_disable_stream;
ctrl->nssa = le16_to_cpu(s.nssa);
if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
dev_info(ctrl->device, "too few streams (%u) available\n",
ctrl->nssa);
nvme_disable_streams(ctrl);
return 0;
goto out_disable_stream;
}
ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
return 0;
out_disable_stream:
nvme_disable_streams(ctrl);
return ret;
}
/*
@ -1027,6 +1023,19 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
/*
* In NVMe 1.0 the CNS field was just a binary controller or namespace
* flag, thus sending any new CNS opcodes has a big chance of not working.
* Qemu unfortunately had that bug after reporting a 1.1 version compliance
* (but not for any later version).
*/
static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl)
{
if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)
return ctrl->vs < NVME_VS(1, 2, 0);
return ctrl->vs < NVME_VS(1, 1, 0);
}
static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{
struct nvme_command c = { };
@ -1290,7 +1299,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
meta_len = (io.nblocks + 1) * ns->ms;
metadata = nvme_to_user_ptr(io.metadata);
if (ns->ext) {
if (ns->features & NVME_NS_EXT_LBAS) {
length += meta_len;
meta_len = 0;
} else if (meta_len) {
@ -1392,8 +1401,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
}
if (effects & NVME_CMD_EFFECTS_CCC)
nvme_init_identify(ctrl);
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work);
}
}
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
@ -1682,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
u32 max_integrity_segments)
{
struct blk_integrity integrity;
@ -1705,20 +1717,15 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
}
integrity.tuple_size = ms;
blk_integrity_register(disk, &integrity);
blk_queue_max_integrity_segments(disk->queue, 1);
blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
}
#else
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
u32 max_integrity_segments)
{
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
static void nvme_set_chunk_size(struct nvme_ns *ns)
{
u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob);
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
}
static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@ -1804,12 +1811,37 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0;
}
static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u32 *phys_bs, u32 *io_opt)
{
struct streams_directive_params s;
int ret;
if (!ctrl->nr_streams)
return 0;
ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id);
if (ret)
return ret;
ns->sws = le32_to_cpu(s.sws);
ns->sgs = le16_to_cpu(s.sgs);
if (ns->sws) {
*phys_bs = ns->sws * (1 << ns->lba_shift);
if (ns->sgs)
*io_opt = *phys_bs * ns->sgs;
}
return 0;
}
static void nvme_update_disk_info(struct gendisk *disk,
struct nvme_ns *ns, struct nvme_id_ns *id)
{
sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
unsigned short bs = 1 << ns->lba_shift;
u32 atomic_bs, phys_bs, io_opt;
u32 atomic_bs, phys_bs, io_opt = 0;
if (ns->lba_shift > PAGE_SHIFT) {
/* unsupported block size, set capacity to 0 later */
@ -1818,26 +1850,25 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);
atomic_bs = phys_bs = bs;
nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt);
if (id->nabo == 0) {
/*
* Bit 1 indicates whether NAWUPF is defined for this namespace
* and whether it should be used instead of AWUPF. If NAWUPF ==
* 0 then AWUPF must be used instead.
*/
if (id->nsfeat & (1 << 1) && id->nawupf)
if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
else
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
} else {
atomic_bs = bs;
}
phys_bs = bs;
io_opt = bs;
if (id->nsfeat & (1 << 4)) {
if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
/* NPWG = Namespace Preferred Write Granularity */
phys_bs *= 1 + le16_to_cpu(id->npwg);
phys_bs = bs * (1 + le16_to_cpu(id->npwg));
/* NOWS = Namespace Optimal Write Size */
io_opt *= 1 + le16_to_cpu(id->nows);
io_opt = bs * (1 + le16_to_cpu(id->nows));
}
blk_queue_logical_block_size(disk->queue, bs);
@ -1850,19 +1881,34 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_queue_io_min(disk->queue, phys_bs);
blk_queue_io_opt(disk->queue, io_opt);
if (ns->ms && !ns->ext &&
(ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
nvme_init_integrity(disk, ns->ms, ns->pi_type);
if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) ||
ns->lba_shift > PAGE_SHIFT)
/*
* The block layer can't support LBA sizes larger than the page size
* yet, so catch this early and don't allow block I/O.
*/
if (ns->lba_shift > PAGE_SHIFT)
capacity = 0;
/*
* Register a metadata profile for PI, or the plain non-integrity NVMe
* metadata masquerading as Type 0 if supported, otherwise reject block
* I/O to namespaces with metadata except when the namespace supports
* PI, as it can strip/insert in that case.
*/
if (ns->ms) {
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
(ns->features & NVME_NS_METADATA_SUPPORTED))
nvme_init_integrity(disk, ns->ms, ns->pi_type,
ns->ctrl->max_integrity_segments);
else if (!nvme_ns_has_pi(ns))
capacity = 0;
}
set_capacity_revalidate_and_notify(disk, capacity, false);
nvme_config_discard(disk, ns);
nvme_config_write_zeroes(disk, ns);
if (id->nsattr & (1 << 0))
if (id->nsattr & NVME_NS_ATTR_RO)
set_disk_ro(disk, true);
else
set_disk_ro(disk, false);
@ -1870,9 +1916,11 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_unfreeze_queue(disk->queue);
}
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
u32 iob;
/*
* If identify namespace failed, use default 512 byte block size so
@ -1881,32 +1929,55 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
if (ns->lba_shift == 0)
ns->lba_shift = 9;
ns->noiob = le16_to_cpu(id->noiob);
if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
is_power_of_2(ctrl->max_hw_sectors))
iob = ctrl->max_hw_sectors;
else
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
ns->features = 0;
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/* the PI implementation requires metadata equal t10 pi tuple size */
if (ns->ms == sizeof(struct t10_pi_tuple))
ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
else
ns->pi_type = 0;
if (ns->noiob)
nvme_set_chunk_size(ns);
if (ns->ms) {
/*
* For PCIe only the separate metadata pointer is supported,
* as the block layer supplies metadata in a separate bio_vec
* chain. For Fabrics, only metadata as part of extended data
* LBA is supported on the wire per the Fabrics specification,
* but the HBA/HCA will do the remapping from the separate
* metadata buffers for us.
*/
if (id->flbas & NVME_NS_FLBAS_META_EXT) {
ns->features |= NVME_NS_EXT_LBAS;
if ((ctrl->ops->flags & NVME_F_FABRICS) &&
(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) &&
ctrl->max_integrity_segments)
ns->features |= NVME_NS_METADATA_SUPPORTED;
} else {
if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS))
return -EINVAL;
if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
ns->features |= NVME_NS_METADATA_SUPPORTED;
}
}
if (iob)
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
nvme_update_disk_info(disk, ns, id);
#ifdef CONFIG_NVME_MULTIPATH
if (ns->head->disk) {
nvme_update_disk_info(ns->head->disk, ns, id);
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
struct backing_dev_info *info =
ns->head->disk->queue->backing_dev_info;
info->capabilities |= BDI_CAP_STABLE_WRITES;
}
revalidate_disk(ns->head->disk);
}
#endif
return 0;
}
static int nvme_revalidate_disk(struct gendisk *disk)
@ -1931,7 +2002,6 @@ static int nvme_revalidate_disk(struct gendisk *disk)
goto free_id;
}
__nvme_revalidate_disk(disk, id);
ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids);
if (ret)
goto free_id;
@ -1940,8 +2010,10 @@ static int nvme_revalidate_disk(struct gendisk *disk)
dev_err(ctrl->device,
"identifiers changed for nsid %d\n", ns->head->ns_id);
ret = -ENODEV;
goto free_id;
}
ret = __nvme_revalidate_disk(disk, id);
free_id:
kfree(id);
out:
@ -2249,10 +2321,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
is_power_of_2(ctrl->max_hw_sectors))
blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
blk_queue_virt_boundary(q, ctrl->page_size - 1);
blk_queue_dma_alignment(q, 7);
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
vwc = true;
blk_queue_write_cache(q, vwc, vwc);
@ -2655,7 +2725,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
return false;
}
if ((id->cmic & (1 << 1)) ||
if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
(ctrl->opts && ctrl->opts->discovery_nqn))
continue;
@ -2746,7 +2816,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
void *log, size_t size, u64 offset)
{
struct nvme_command c = { };
unsigned long dwlen = size / 4 - 1;
u32 dwlen = nvme_bytes_to_numd(size);
c.get_log_page.opcode = nvme_admin_get_log_page;
c.get_log_page.nsid = cpu_to_le32(nsid);
@ -3401,7 +3471,6 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
list_for_each_entry(h, &subsys->nsheads, entry) {
if (nvme_ns_ids_valid(&new->ids) &&
!list_empty(&h->list) &&
nvme_ns_ids_equal(&new->ids, &h->ids))
return -EINVAL;
}
@ -3410,8 +3479,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
}
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
unsigned nsid, struct nvme_id_ns *id,
struct nvme_ns_ids *ids)
unsigned nsid, struct nvme_ns_ids *ids)
{
struct nvme_ns_head *head;
size_t size = sizeof(*head);
@ -3469,42 +3537,51 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
bool is_shared = id->nmic & (1 << 0);
bool is_shared = id->nmic & NVME_NS_NMIC_SHARED;
struct nvme_ns_head *head = NULL;
struct nvme_ns_ids ids;
int ret = 0;
ret = nvme_report_ns_ids(ctrl, nsid, id, &ids);
if (ret)
goto out;
if (ret) {
if (ret < 0)
return ret;
return blk_status_to_errno(nvme_error_status(ret));
}
mutex_lock(&ctrl->subsys->lock);
if (is_shared)
head = nvme_find_ns_head(ctrl->subsys, nsid);
head = nvme_find_ns_head(ctrl->subsys, nsid);
if (!head) {
head = nvme_alloc_ns_head(ctrl, nsid, id, &ids);
head = nvme_alloc_ns_head(ctrl, nsid, &ids);
if (IS_ERR(head)) {
ret = PTR_ERR(head);
goto out_unlock;
}
head->shared = is_shared;
} else {
ret = -EINVAL;
if (!is_shared || !head->shared) {
dev_err(ctrl->device,
"Duplicate unshared namespace %d\n", nsid);
goto out_put_ns_head;
}
if (!nvme_ns_ids_equal(&head->ids, &ids)) {
dev_err(ctrl->device,
"IDs don't match for shared namespace %d\n",
nsid);
ret = -EINVAL;
goto out_unlock;
goto out_put_ns_head;
}
}
list_add_tail(&ns->siblings, &head->list);
ns->head = head;
mutex_unlock(&ctrl->subsys->lock);
return 0;
out_put_ns_head:
nvme_put_ns_head(head);
out_unlock:
mutex_unlock(&ctrl->subsys->lock);
out:
if (ret > 0)
ret = blk_status_to_errno(nvme_error_status(ret));
return ret;
}
@ -3535,32 +3612,6 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
return ret;
}
static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
{
struct streams_directive_params s;
int ret;
if (!ctrl->nr_streams)
return 0;
ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id);
if (ret)
return ret;
ns->sws = le32_to_cpu(s.sws);
ns->sgs = le16_to_cpu(s.sgs);
if (ns->sws) {
unsigned int bs = 1 << ns->lba_shift;
blk_queue_io_min(ns->queue, bs * ns->sws);
if (ns->sgs)
blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
}
return 0;
}
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
@ -3604,7 +3655,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ret = nvme_init_ns_head(ns, nsid, id);
if (ret)
goto out_free_id;
nvme_setup_streams_ns(ctrl, ns);
nvme_set_disk_name(disk_name, ns, ctrl, &flags);
disk = alloc_disk_node(0, node);
@ -3618,7 +3668,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
ns->disk = disk;
__nvme_revalidate_disk(disk, id);
if (__nvme_revalidate_disk(disk, id))
goto out_free_disk;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
ret = nvme_nvm_register(ns, disk_name, node);
@ -3645,9 +3696,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
/* prevent double queue cleanup */
ns->disk->queue = NULL;
put_disk(ns->disk);
out_free_disk:
del_gendisk(ns->disk);
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
if (list_empty(&ns->head->list))
list_del_init(&ns->head->entry);
mutex_unlock(&ctrl->subsys->lock);
nvme_put_ns_head(ns->head);
out_free_id:
@ -3667,7 +3722,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
mutex_lock(&ns->ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
if (list_empty(&ns->head->list))
list_del_init(&ns->head->entry);
mutex_unlock(&ns->ctrl->subsys->lock);
synchronize_rcu(); /* guarantee not available in head->list */
nvme_mpath_clear_current_path(ns);
synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
@ -3687,6 +3745,16 @@ static void nvme_ns_remove(struct nvme_ns *ns)
nvme_put_ns(ns);
}
static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid)
{
struct nvme_ns *ns = nvme_find_get_ns(ctrl, nsid);
if (ns) {
nvme_ns_remove(ns);
nvme_put_ns(ns);
}
}
static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
@ -3718,39 +3786,34 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
}
static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
const int nr_entries = NVME_IDENTIFY_DATA_SIZE / sizeof(__le32);
__le32 *ns_list;
unsigned i, j, nsid, prev = 0;
unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024);
int ret = 0;
u32 prev = 0;
int ret = 0, i;
if (nvme_ctrl_limited_cns(ctrl))
return -EOPNOTSUPP;
ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
if (!ns_list)
return -ENOMEM;
for (i = 0; i < num_lists; i++) {
for (;;) {
ret = nvme_identify_ns_list(ctrl, prev, ns_list);
if (ret)
goto free;
for (j = 0; j < min(nn, 1024U); j++) {
nsid = le32_to_cpu(ns_list[j]);
if (!nsid)
for (i = 0; i < nr_entries; i++) {
u32 nsid = le32_to_cpu(ns_list[i]);
if (!nsid) /* end of the list? */
goto out;
nvme_validate_ns(ctrl, nsid);
while (++prev < nsid) {
ns = nvme_find_get_ns(ctrl, prev);
if (ns) {
nvme_ns_remove(ns);
nvme_put_ns(ns);
}
}
while (++prev < nsid)
nvme_ns_remove_by_nsid(ctrl, prev);
}
nn -= j;
}
out:
nvme_remove_invalid_namespaces(ctrl, prev);
@ -3759,9 +3822,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
return ret;
}
static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl)
{
unsigned i;
struct nvme_id_ctrl *id;
u32 nn, i;
if (nvme_identify_ctrl(ctrl, &id))
return;
nn = le32_to_cpu(id->nn);
kfree(id);
for (i = 1; i <= nn; i++)
nvme_validate_ns(ctrl, i);
@ -3798,8 +3867,6 @@ static void nvme_scan_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, scan_work);
struct nvme_id_ctrl *id;
unsigned nn;
/* No tagset on a live ctrl means IO queues could not created */
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
@ -3810,20 +3877,11 @@ static void nvme_scan_work(struct work_struct *work)
nvme_clear_changed_ns_log(ctrl);
}
if (nvme_identify_ctrl(ctrl, &id))
return;
mutex_lock(&ctrl->scan_lock);
nn = le32_to_cpu(id->nn);
if (ctrl->vs >= NVME_VS(1, 1, 0) &&
!(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
if (!nvme_scan_ns_list(ctrl, nn))
goto out_free_id;
}
nvme_scan_ns_sequential(ctrl, nn);
out_free_id:
if (nvme_scan_ns_list(ctrl) != 0)
nvme_scan_ns_sequential(ctrl);
mutex_unlock(&ctrl->scan_lock);
kfree(id);
down_write(&ctrl->namespaces_rwsem);
list_sort(NULL, &ctrl->namespaces, ns_cmp);
up_write(&ctrl->namespaces_rwsem);

Просмотреть файл

@ -14,6 +14,7 @@
#include "fabrics.h"
#include <linux/nvme-fc-driver.h>
#include <linux/nvme-fc.h>
#include "fc.h"
#include <scsi/scsi_transport_fc.h>
/* *************************** Data Structures/Defines ****************** */
@ -61,6 +62,17 @@ struct nvmefc_ls_req_op {
bool req_queued;
};
struct nvmefc_ls_rcv_op {
struct nvme_fc_rport *rport;
struct nvmefc_ls_rsp *lsrsp;
union nvmefc_ls_requests *rqstbuf;
union nvmefc_ls_responses *rspbuf;
u16 rqstdatalen;
bool handled;
dma_addr_t rspdma;
struct list_head lsrcv_list; /* rport->ls_rcv_list */
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
enum nvme_fcpop_state {
FCPOP_STATE_UNINIT = 0,
FCPOP_STATE_IDLE = 1,
@ -96,7 +108,7 @@ struct nvme_fc_fcp_op {
struct nvme_fcp_op_w_sgl {
struct nvme_fc_fcp_op op;
struct scatterlist sgl[NVME_INLINE_SG_CNT];
uint8_t priv[0];
uint8_t priv[];
};
struct nvme_fc_lport {
@ -117,6 +129,7 @@ struct nvme_fc_rport {
struct list_head endp_list; /* for lport->endp_list */
struct list_head ctrl_list;
struct list_head ls_req_list;
struct list_head ls_rcv_list;
struct list_head disc_list;
struct device *dev; /* physical device for dma */
struct nvme_fc_lport *lport;
@ -124,11 +137,12 @@ struct nvme_fc_rport {
struct kref ref;
atomic_t act_ctrl_cnt;
unsigned long dev_loss_end;
struct work_struct lsrcv_work;
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
enum nvme_fcctrl_flags {
FCCTRL_TERMIO = (1 << 0),
};
/* fc_ctrl flags values - specified as bit positions */
#define ASSOC_ACTIVE 0
#define FCCTRL_TERMIO 1
struct nvme_fc_ctrl {
spinlock_t lock;
@ -139,9 +153,9 @@ struct nvme_fc_ctrl {
u32 cnum;
bool ioq_live;
bool assoc_active;
atomic_t err_work_active;
u64 association_id;
struct nvmefc_ls_rcv_op *rcv_disconn;
struct list_head ctrl_list; /* rport->ctrl_list */
@ -152,7 +166,7 @@ struct nvme_fc_ctrl {
struct work_struct err_work;
struct kref ref;
u32 flags;
unsigned long flags;
u32 iocnt;
wait_queue_head_t ioabort_wait;
@ -219,6 +233,9 @@ static struct device *fc_udev_device;
static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
struct nvme_fc_queue *, unsigned int);
static void nvme_fc_handle_ls_rqst_work(struct work_struct *work);
static void
nvme_fc_free_lport(struct kref *ref)
{
@ -394,7 +411,10 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
newrec->ops = template;
newrec->dev = dev;
ida_init(&newrec->endp_cnt);
newrec->localport.private = &newrec[1];
if (template->local_priv_sz)
newrec->localport.private = &newrec[1];
else
newrec->localport.private = NULL;
newrec->localport.node_name = pinfo->node_name;
newrec->localport.port_name = pinfo->port_name;
newrec->localport.port_role = pinfo->port_role;
@ -701,9 +721,13 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
atomic_set(&newrec->act_ctrl_cnt, 0);
spin_lock_init(&newrec->lock);
newrec->remoteport.localport = &lport->localport;
INIT_LIST_HEAD(&newrec->ls_rcv_list);
newrec->dev = lport->dev;
newrec->lport = lport;
newrec->remoteport.private = &newrec[1];
if (lport->ops->remote_priv_sz)
newrec->remoteport.private = &newrec[1];
else
newrec->remoteport.private = NULL;
newrec->remoteport.port_role = pinfo->port_role;
newrec->remoteport.node_name = pinfo->node_name;
newrec->remoteport.port_name = pinfo->port_name;
@ -711,6 +735,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
newrec->remoteport.port_num = idx;
__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work);
spin_lock_irqsave(&nvme_fc_lock, flags);
list_add_tail(&newrec->endp_list, &lport->endp_list);
@ -1000,6 +1025,7 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
static void
__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
@ -1140,41 +1166,6 @@ nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport,
return __nvme_fc_send_ls_req(rport, lsop, done);
}
/* Validation Error indexes into the string table below */
enum {
VERR_NO_ERROR = 0,
VERR_LSACC = 1,
VERR_LSDESC_RQST = 2,
VERR_LSDESC_RQST_LEN = 3,
VERR_ASSOC_ID = 4,
VERR_ASSOC_ID_LEN = 5,
VERR_CONN_ID = 6,
VERR_CONN_ID_LEN = 7,
VERR_CR_ASSOC = 8,
VERR_CR_ASSOC_ACC_LEN = 9,
VERR_CR_CONN = 10,
VERR_CR_CONN_ACC_LEN = 11,
VERR_DISCONN = 12,
VERR_DISCONN_ACC_LEN = 13,
};
static char *validation_errors[] = {
"OK",
"Not LS_ACC",
"Not LSDESC_RQST",
"Bad LSDESC_RQST Length",
"Not Association ID",
"Bad Association ID Length",
"Not Connection ID",
"Bad Connection ID Length",
"Not CR_ASSOC Rqst",
"Bad CR_ASSOC ACC Length",
"Not CR_CONN Rqst",
"Bad CR_CONN ACC Length",
"Not Disconnect Rqst",
"Bad Disconnect ACC Length",
};
static int
nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio)
@ -1183,21 +1174,27 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
struct nvmefc_ls_req *lsreq;
struct fcnvme_ls_cr_assoc_rqst *assoc_rqst;
struct fcnvme_ls_cr_assoc_acc *assoc_acc;
unsigned long flags;
int ret, fcret = 0;
lsop = kzalloc((sizeof(*lsop) +
ctrl->lport->ops->lsrqst_priv_sz +
sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL);
sizeof(*assoc_rqst) + sizeof(*assoc_acc) +
ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
if (!lsop) {
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: send Create Association failed: ENOMEM\n",
ctrl->cnum);
ret = -ENOMEM;
goto out_no_memory;
}
lsreq = &lsop->ls_req;
lsreq->private = (void *)&lsop[1];
assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)
(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1];
assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1];
lsreq = &lsop->ls_req;
if (ctrl->lport->ops->lsrqst_priv_sz)
lsreq->private = &assoc_acc[1];
else
lsreq->private = NULL;
assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION;
assoc_rqst->desc_list_len =
@ -1267,11 +1264,13 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
"q %d Create Association LS failed: %s\n",
queue->qnum, validation_errors[fcret]);
} else {
spin_lock_irqsave(&ctrl->lock, flags);
ctrl->association_id =
be64_to_cpu(assoc_acc->associd.association_id);
queue->connection_id =
be64_to_cpu(assoc_acc->connectid.connection_id);
set_bit(NVME_FC_Q_CONNECTED, &queue->flags);
spin_unlock_irqrestore(&ctrl->lock, flags);
}
out_free_buffer:
@ -1295,18 +1294,23 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
int ret, fcret = 0;
lsop = kzalloc((sizeof(*lsop) +
ctrl->lport->ops->lsrqst_priv_sz +
sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL);
sizeof(*conn_rqst) + sizeof(*conn_acc) +
ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
if (!lsop) {
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: send Create Connection failed: ENOMEM\n",
ctrl->cnum);
ret = -ENOMEM;
goto out_no_memory;
}
lsreq = &lsop->ls_req;
lsreq->private = (void *)&lsop[1];
conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)
(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1];
conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1];
lsreq = &lsop->ls_req;
if (ctrl->lport->ops->lsrqst_priv_sz)
lsreq->private = (void *)&conn_acc[1];
else
lsreq->private = NULL;
conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION;
conn_rqst->desc_list_len = cpu_to_be32(
@ -1420,43 +1424,26 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
int ret;
lsop = kzalloc((sizeof(*lsop) +
ctrl->lport->ops->lsrqst_priv_sz +
sizeof(*discon_rqst) + sizeof(*discon_acc)),
GFP_KERNEL);
if (!lsop)
/* couldn't sent it... too bad */
sizeof(*discon_rqst) + sizeof(*discon_acc) +
ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
if (!lsop) {
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: send Disconnect Association "
"failed: ENOMEM\n",
ctrl->cnum);
return;
}
lsreq = &lsop->ls_req;
lsreq->private = (void *)&lsop[1];
discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)
(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1];
discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
lsreq = &lsop->ls_req;
if (ctrl->lport->ops->lsrqst_priv_sz)
lsreq->private = (void *)&discon_acc[1];
else
lsreq->private = NULL;
discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC;
discon_rqst->desc_list_len = cpu_to_be32(
sizeof(struct fcnvme_lsdesc_assoc_id) +
sizeof(struct fcnvme_lsdesc_disconn_cmd));
discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
discon_rqst->associd.desc_len =
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_assoc_id));
discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id);
discon_rqst->discon_cmd.desc_tag = cpu_to_be32(
FCNVME_LSDESC_DISCONN_CMD);
discon_rqst->discon_cmd.desc_len =
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_disconn_cmd));
lsreq->rqstaddr = discon_rqst;
lsreq->rqstlen = sizeof(*discon_rqst);
lsreq->rspaddr = discon_acc;
lsreq->rsplen = sizeof(*discon_acc);
lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc,
ctrl->association_id);
ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
nvme_fc_disconnect_assoc_done);
@ -1464,11 +1451,359 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
kfree(lsop);
}
static void
nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
{
struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private;
struct nvme_fc_rport *rport = lsop->rport;
struct nvme_fc_lport *lport = rport->lport;
unsigned long flags;
spin_lock_irqsave(&rport->lock, flags);
list_del(&lsop->lsrcv_list);
spin_unlock_irqrestore(&rport->lock, flags);
fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma,
sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
fc_dma_unmap_single(lport->dev, lsop->rspdma,
sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
kfree(lsop);
nvme_fc_rport_put(rport);
}
static void
nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop)
{
struct nvme_fc_rport *rport = lsop->rport;
struct nvme_fc_lport *lport = rport->lport;
struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
int ret;
fc_dma_sync_single_for_device(lport->dev, lsop->rspdma,
sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport,
lsop->lsrsp);
if (ret) {
dev_warn(lport->dev,
"LLDD rejected LS RSP xmt: LS %d status %d\n",
w0->ls_cmd, ret);
nvme_fc_xmt_ls_rsp_done(lsop->lsrsp);
return;
}
}
static struct nvme_fc_ctrl *
nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport,
struct nvmefc_ls_rcv_op *lsop)
{
struct fcnvme_ls_disconnect_assoc_rqst *rqst =
&lsop->rqstbuf->rq_dis_assoc;
struct nvme_fc_ctrl *ctrl, *ret = NULL;
struct nvmefc_ls_rcv_op *oldls = NULL;
u64 association_id = be64_to_cpu(rqst->associd.association_id);
unsigned long flags;
spin_lock_irqsave(&rport->lock, flags);
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
if (!nvme_fc_ctrl_get(ctrl))
continue;
spin_lock(&ctrl->lock);
if (association_id == ctrl->association_id) {
oldls = ctrl->rcv_disconn;
ctrl->rcv_disconn = lsop;
ret = ctrl;
}
spin_unlock(&ctrl->lock);
if (ret)
/* leave the ctrl get reference */
break;
nvme_fc_ctrl_put(ctrl);
}
spin_unlock_irqrestore(&rport->lock, flags);
/* transmit a response for anything that was pending */
if (oldls) {
dev_info(rport->lport->dev,
"NVME-FC{%d}: Multiple Disconnect Association "
"LS's received\n", ctrl->cnum);
/* overwrite good response with bogus failure */
oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf,
sizeof(*oldls->rspbuf),
rqst->w0.ls_cmd,
FCNVME_RJT_RC_UNAB,
FCNVME_RJT_EXP_NONE, 0);
nvme_fc_xmt_ls_rsp(oldls);
}
return ret;
}
/*
* returns true to mean LS handled and ls_rsp can be sent
* returns false to defer ls_rsp xmt (will be done as part of
* association termination)
*/
static bool
nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop)
{
struct nvme_fc_rport *rport = lsop->rport;
struct fcnvme_ls_disconnect_assoc_rqst *rqst =
&lsop->rqstbuf->rq_dis_assoc;
struct fcnvme_ls_disconnect_assoc_acc *acc =
&lsop->rspbuf->rsp_dis_assoc;
struct nvme_fc_ctrl *ctrl = NULL;
int ret = 0;
memset(acc, 0, sizeof(*acc));
ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst);
if (!ret) {
/* match an active association */
ctrl = nvme_fc_match_disconn_ls(rport, lsop);
if (!ctrl)
ret = VERR_NO_ASSOC;
}
if (ret) {
dev_info(rport->lport->dev,
"Disconnect LS failed: %s\n",
validation_errors[ret]);
lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc,
sizeof(*acc), rqst->w0.ls_cmd,
(ret == VERR_NO_ASSOC) ?
FCNVME_RJT_RC_INV_ASSOC :
FCNVME_RJT_RC_LOGIC,
FCNVME_RJT_EXP_NONE, 0);
return true;
}
/* format an ACCept response */
lsop->lsrsp->rsplen = sizeof(*acc);
nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
fcnvme_lsdesc_len(
sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
FCNVME_LS_DISCONNECT_ASSOC);
/*
* the transmit of the response will occur after the exchanges
* for the association have been ABTS'd by
* nvme_fc_delete_association().
*/
/* fail the association */
nvme_fc_error_recovery(ctrl, "Disconnect Association LS received");
/* release the reference taken by nvme_fc_match_disconn_ls() */
nvme_fc_ctrl_put(ctrl);
return false;
}
/*
* Actual Processing routine for received FC-NVME LS Requests from the LLD
* returns true if a response should be sent afterward, false if rsp will
* be sent asynchronously.
*/
static bool
nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop)
{
struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
bool ret = true;
lsop->lsrsp->nvme_fc_private = lsop;
lsop->lsrsp->rspbuf = lsop->rspbuf;
lsop->lsrsp->rspdma = lsop->rspdma;
lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done;
/* Be preventative. handlers will later set to valid length */
lsop->lsrsp->rsplen = 0;
/*
* handlers:
* parse request input, execute the request, and format the
* LS response
*/
switch (w0->ls_cmd) {
case FCNVME_LS_DISCONNECT_ASSOC:
ret = nvme_fc_ls_disconnect_assoc(lsop);
break;
case FCNVME_LS_DISCONNECT_CONN:
lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
sizeof(*lsop->rspbuf), w0->ls_cmd,
FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0);
break;
case FCNVME_LS_CREATE_ASSOCIATION:
case FCNVME_LS_CREATE_CONNECTION:
lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
sizeof(*lsop->rspbuf), w0->ls_cmd,
FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0);
break;
default:
lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
sizeof(*lsop->rspbuf), w0->ls_cmd,
FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
break;
}
return(ret);
}
static void
nvme_fc_handle_ls_rqst_work(struct work_struct *work)
{
struct nvme_fc_rport *rport =
container_of(work, struct nvme_fc_rport, lsrcv_work);
struct fcnvme_ls_rqst_w0 *w0;
struct nvmefc_ls_rcv_op *lsop;
unsigned long flags;
bool sendrsp;
restart:
sendrsp = true;
spin_lock_irqsave(&rport->lock, flags);
list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) {
if (lsop->handled)
continue;
lsop->handled = true;
if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
spin_unlock_irqrestore(&rport->lock, flags);
sendrsp = nvme_fc_handle_ls_rqst(lsop);
} else {
spin_unlock_irqrestore(&rport->lock, flags);
w0 = &lsop->rqstbuf->w0;
lsop->lsrsp->rsplen = nvme_fc_format_rjt(
lsop->rspbuf,
sizeof(*lsop->rspbuf),
w0->ls_cmd,
FCNVME_RJT_RC_UNAB,
FCNVME_RJT_EXP_NONE, 0);
}
if (sendrsp)
nvme_fc_xmt_ls_rsp(lsop);
goto restart;
}
spin_unlock_irqrestore(&rport->lock, flags);
}
/**
* nvme_fc_rcv_ls_req - transport entry point called by an LLDD
* upon the reception of a NVME LS request.
*
* The nvme-fc layer will copy payload to an internal structure for
* processing. As such, upon completion of the routine, the LLDD may
* immediately free/reuse the LS request buffer passed in the call.
*
* If this routine returns error, the LLDD should abort the exchange.
*
* @remoteport: pointer to the (registered) remote port that the LS
* was received from. The remoteport is associated with
* a specific localport.
* @lsrsp: pointer to a nvmefc_ls_rsp response structure to be
* used to reference the exchange corresponding to the LS
* when issuing an ls response.
* @lsreqbuf: pointer to the buffer containing the LS Request
* @lsreqbuf_len: length, in bytes, of the received LS request
*/
int
nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
struct nvmefc_ls_rsp *lsrsp,
void *lsreqbuf, u32 lsreqbuf_len)
{
struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
struct nvme_fc_lport *lport = rport->lport;
struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf;
struct nvmefc_ls_rcv_op *lsop;
unsigned long flags;
int ret;
nvme_fc_rport_get(rport);
/* validate there's a routine to transmit a response */
if (!lport->ops->xmt_ls_rsp) {
dev_info(lport->dev,
"RCV %s LS failed: no LLDD xmt_ls_rsp\n",
(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
nvmefc_ls_names[w0->ls_cmd] : "");
ret = -EINVAL;
goto out_put;
}
if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) {
dev_info(lport->dev,
"RCV %s LS failed: payload too large\n",
(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
nvmefc_ls_names[w0->ls_cmd] : "");
ret = -E2BIG;
goto out_put;
}
lsop = kzalloc(sizeof(*lsop) +
sizeof(union nvmefc_ls_requests) +
sizeof(union nvmefc_ls_responses),
GFP_KERNEL);
if (!lsop) {
dev_info(lport->dev,
"RCV %s LS failed: No memory\n",
(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
nvmefc_ls_names[w0->ls_cmd] : "");
ret = -ENOMEM;
goto out_put;
}
lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1];
lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1];
lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf,
sizeof(*lsop->rspbuf),
DMA_TO_DEVICE);
if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) {
dev_info(lport->dev,
"RCV %s LS failed: DMA mapping failure\n",
(w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
nvmefc_ls_names[w0->ls_cmd] : "");
ret = -EFAULT;
goto out_free;
}
lsop->rport = rport;
lsop->lsrsp = lsrsp;
memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len);
lsop->rqstdatalen = lsreqbuf_len;
spin_lock_irqsave(&rport->lock, flags);
if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) {
spin_unlock_irqrestore(&rport->lock, flags);
ret = -ENOTCONN;
goto out_unmap;
}
list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list);
spin_unlock_irqrestore(&rport->lock, flags);
schedule_work(&rport->lsrcv_work);
return 0;
out_unmap:
fc_dma_unmap_single(lport->dev, lsop->rspdma,
sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
out_free:
kfree(lsop);
out_put:
nvme_fc_rport_put(rport);
return ret;
}
EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req);
/* *********************** NVME Ctrl Routines **************************** */
static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
static void
__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
struct nvme_fc_fcp_op *op)
@ -1500,7 +1835,7 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
if (opstate != FCPOP_STATE_ACTIVE)
atomic_set(&op->state, opstate);
else if (ctrl->flags & FCCTRL_TERMIO)
else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
ctrl->iocnt++;
spin_unlock_irqrestore(&ctrl->lock, flags);
@ -1537,7 +1872,7 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
if (opstate == FCPOP_STATE_ABORTED) {
spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->flags & FCCTRL_TERMIO) {
if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
if (!--ctrl->iocnt)
wake_up(&ctrl->ioabort_wait);
}
@ -1771,7 +2106,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
if (res)
return res;
op->op.fcp_req.first_sgl = &op->sgl[0];
op->op.fcp_req.first_sgl = op->sgl;
op->op.fcp_req.private = &op->priv[0];
nvme_req(rq)->ctrl = &ctrl->ctrl;
return res;
@ -1783,15 +2118,17 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
struct nvme_fc_fcp_op *aen_op;
struct nvme_fc_cmd_iu *cmdiu;
struct nvme_command *sqe;
void *private;
void *private = NULL;
int i, ret;
aen_op = ctrl->aen_ops;
for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
if (ctrl->lport->ops->fcprqst_priv_sz) {
private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
GFP_KERNEL);
if (!private)
return -ENOMEM;
if (!private)
return -ENOMEM;
}
cmdiu = &aen_op->cmd_iu;
sqe = &cmdiu->sqe;
@ -1822,9 +2159,6 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
aen_op = ctrl->aen_ops;
for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
if (!aen_op->fcp_req.private)
continue;
__nvme_fc_exit_request(ctrl, aen_op);
kfree(aen_op->fcp_req.private);
@ -2366,16 +2700,9 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg)
{
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
struct nvme_fc_fcp_op *aen_op;
unsigned long flags;
bool terminating = false;
blk_status_t ret;
spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->flags & FCCTRL_TERMIO)
terminating = true;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (terminating)
if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
return;
aen_op = &ctrl->aen_ops[0];
@ -2584,10 +2911,9 @@ nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl)
struct nvme_fc_rport *rport = ctrl->rport;
u32 cnt;
if (ctrl->assoc_active)
if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags))
return 1;
ctrl->assoc_active = true;
cnt = atomic_inc_return(&rport->act_ctrl_cnt);
if (cnt == 1)
nvme_fc_rport_active_on_lport(rport);
@ -2602,7 +2928,7 @@ nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl)
struct nvme_fc_lport *lport = rport->lport;
u32 cnt;
/* ctrl->assoc_active=false will be set independently */
/* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */
cnt = atomic_dec_return(&rport->act_ctrl_cnt);
if (cnt == 0) {
@ -2622,6 +2948,8 @@ static int
nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
struct nvmefc_ls_rcv_op *disls = NULL;
unsigned long flags;
int ret;
bool changed;
@ -2739,12 +3067,18 @@ out_term_aen_ops:
out_disconnect_admin_queue:
/* send a Disconnect(association) LS to fc-nvme target */
nvme_fc_xmt_disconnect_assoc(ctrl);
spin_lock_irqsave(&ctrl->lock, flags);
ctrl->association_id = 0;
disls = ctrl->rcv_disconn;
ctrl->rcv_disconn = NULL;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (disls)
nvme_fc_xmt_ls_rsp(disls);
out_delete_hw_queue:
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
out_free_queue:
nvme_fc_free_queue(&ctrl->queues[0]);
ctrl->assoc_active = false;
clear_bit(ASSOC_ACTIVE, &ctrl->flags);
nvme_fc_ctlr_inactive_on_rport(ctrl);
return ret;
@ -2759,14 +3093,14 @@ out_free_queue:
static void
nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
{
struct nvmefc_ls_rcv_op *disls = NULL;
unsigned long flags;
if (!ctrl->assoc_active)
if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
return;
ctrl->assoc_active = false;
spin_lock_irqsave(&ctrl->lock, flags);
ctrl->flags |= FCCTRL_TERMIO;
set_bit(FCCTRL_TERMIO, &ctrl->flags);
ctrl->iocnt = 0;
spin_unlock_irqrestore(&ctrl->lock, flags);
@ -2817,7 +3151,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
/* wait for all io that had to be aborted */
spin_lock_irq(&ctrl->lock);
wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
ctrl->flags &= ~FCCTRL_TERMIO;
clear_bit(FCCTRL_TERMIO, &ctrl->flags);
spin_unlock_irq(&ctrl->lock);
nvme_fc_term_aen_ops(ctrl);
@ -2831,7 +3165,17 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
if (ctrl->association_id)
nvme_fc_xmt_disconnect_assoc(ctrl);
spin_lock_irqsave(&ctrl->lock, flags);
ctrl->association_id = 0;
disls = ctrl->rcv_disconn;
ctrl->rcv_disconn = NULL;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (disls)
/*
* if a Disconnect Request was waiting for a response, send
* now that all ABTS's have been issued (and are complete).
*/
nvme_fc_xmt_ls_rsp(disls);
if (ctrl->ctrl.tagset) {
nvme_fc_delete_hw_io_queues(ctrl);
@ -2902,7 +3246,9 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: dev_loss_tmo (%d) expired "
"while waiting for remoteport connectivity.\n",
ctrl->cnum, portptr->dev_loss_tmo);
ctrl->cnum, min_t(int, portptr->dev_loss_tmo,
(ctrl->ctrl.opts->max_reconnects *
ctrl->ctrl.opts->reconnect_delay)));
WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
}
}
@ -3089,7 +3435,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->dev = lport->dev;
ctrl->cnum = idx;
ctrl->ioq_live = false;
ctrl->assoc_active = false;
atomic_set(&ctrl->err_work_active, 0);
init_waitqueue_head(&ctrl->ioabort_wait);

227
drivers/nvme/host/fc.h Normal file
Просмотреть файл

@ -0,0 +1,227 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2016, Avago Technologies
*/
#ifndef _NVME_FC_TRANSPORT_H
#define _NVME_FC_TRANSPORT_H 1
/*
* Common definitions between the nvme_fc (host) transport and
* nvmet_fc (target) transport implementation.
*/
/*
* ****************** FC-NVME LS HANDLING ******************
*/
union nvmefc_ls_requests {
struct fcnvme_ls_rqst_w0 w0;
struct fcnvme_ls_cr_assoc_rqst rq_cr_assoc;
struct fcnvme_ls_cr_conn_rqst rq_cr_conn;
struct fcnvme_ls_disconnect_assoc_rqst rq_dis_assoc;
struct fcnvme_ls_disconnect_conn_rqst rq_dis_conn;
} __aligned(128); /* alignment for other things alloc'd with */
union nvmefc_ls_responses {
struct fcnvme_ls_rjt rsp_rjt;
struct fcnvme_ls_cr_assoc_acc rsp_cr_assoc;
struct fcnvme_ls_cr_conn_acc rsp_cr_conn;
struct fcnvme_ls_disconnect_assoc_acc rsp_dis_assoc;
struct fcnvme_ls_disconnect_conn_acc rsp_dis_conn;
} __aligned(128); /* alignment for other things alloc'd with */
static inline void
nvme_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd)
{
struct fcnvme_ls_acc_hdr *acc = buf;
acc->w0.ls_cmd = ls_cmd;
acc->desc_list_len = desc_len;
acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST);
acc->rqst.desc_len =
fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst));
acc->rqst.w0.ls_cmd = rqst_ls_cmd;
}
static inline int
nvme_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd,
u8 reason, u8 explanation, u8 vendor)
{
struct fcnvme_ls_rjt *rjt = buf;
nvme_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST,
fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)),
ls_cmd);
rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT);
rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt));
rjt->rjt.reason_code = reason;
rjt->rjt.reason_explanation = explanation;
rjt->rjt.vendor = vendor;
return sizeof(struct fcnvme_ls_rjt);
}
/* Validation Error indexes into the string table below */
enum {
VERR_NO_ERROR = 0,
VERR_CR_ASSOC_LEN = 1,
VERR_CR_ASSOC_RQST_LEN = 2,
VERR_CR_ASSOC_CMD = 3,
VERR_CR_ASSOC_CMD_LEN = 4,
VERR_ERSP_RATIO = 5,
VERR_ASSOC_ALLOC_FAIL = 6,
VERR_QUEUE_ALLOC_FAIL = 7,
VERR_CR_CONN_LEN = 8,
VERR_CR_CONN_RQST_LEN = 9,
VERR_ASSOC_ID = 10,
VERR_ASSOC_ID_LEN = 11,
VERR_NO_ASSOC = 12,
VERR_CONN_ID = 13,
VERR_CONN_ID_LEN = 14,
VERR_INVAL_CONN = 15,
VERR_CR_CONN_CMD = 16,
VERR_CR_CONN_CMD_LEN = 17,
VERR_DISCONN_LEN = 18,
VERR_DISCONN_RQST_LEN = 19,
VERR_DISCONN_CMD = 20,
VERR_DISCONN_CMD_LEN = 21,
VERR_DISCONN_SCOPE = 22,
VERR_RS_LEN = 23,
VERR_RS_RQST_LEN = 24,
VERR_RS_CMD = 25,
VERR_RS_CMD_LEN = 26,
VERR_RS_RCTL = 27,
VERR_RS_RO = 28,
VERR_LSACC = 29,
VERR_LSDESC_RQST = 30,
VERR_LSDESC_RQST_LEN = 31,
VERR_CR_ASSOC = 32,
VERR_CR_ASSOC_ACC_LEN = 33,
VERR_CR_CONN = 34,
VERR_CR_CONN_ACC_LEN = 35,
VERR_DISCONN = 36,
VERR_DISCONN_ACC_LEN = 37,
};
static char *validation_errors[] = {
"OK",
"Bad CR_ASSOC Length",
"Bad CR_ASSOC Rqst Length",
"Not CR_ASSOC Cmd",
"Bad CR_ASSOC Cmd Length",
"Bad Ersp Ratio",
"Association Allocation Failed",
"Queue Allocation Failed",
"Bad CR_CONN Length",
"Bad CR_CONN Rqst Length",
"Not Association ID",
"Bad Association ID Length",
"No Association",
"Not Connection ID",
"Bad Connection ID Length",
"Invalid Connection ID",
"Not CR_CONN Cmd",
"Bad CR_CONN Cmd Length",
"Bad DISCONN Length",
"Bad DISCONN Rqst Length",
"Not DISCONN Cmd",
"Bad DISCONN Cmd Length",
"Bad Disconnect Scope",
"Bad RS Length",
"Bad RS Rqst Length",
"Not RS Cmd",
"Bad RS Cmd Length",
"Bad RS R_CTL",
"Bad RS Relative Offset",
"Not LS_ACC",
"Not LSDESC_RQST",
"Bad LSDESC_RQST Length",
"Not CR_ASSOC Rqst",
"Bad CR_ASSOC ACC Length",
"Not CR_CONN Rqst",
"Bad CR_CONN ACC Length",
"Not Disconnect Rqst",
"Bad Disconnect ACC Length",
};
#define NVME_FC_LAST_LS_CMD_VALUE FCNVME_LS_DISCONNECT_CONN
static char *nvmefc_ls_names[] = {
"Reserved (0)",
"RJT (1)",
"ACC (2)",
"Create Association",
"Create Connection",
"Disconnect Association",
"Disconnect Connection",
};
static inline void
nvmefc_fmt_lsreq_discon_assoc(struct nvmefc_ls_req *lsreq,
struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst,
struct fcnvme_ls_disconnect_assoc_acc *discon_acc,
u64 association_id)
{
lsreq->rqstaddr = discon_rqst;
lsreq->rqstlen = sizeof(*discon_rqst);
lsreq->rspaddr = discon_acc;
lsreq->rsplen = sizeof(*discon_acc);
lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC;
discon_rqst->desc_list_len = cpu_to_be32(
sizeof(struct fcnvme_lsdesc_assoc_id) +
sizeof(struct fcnvme_lsdesc_disconn_cmd));
discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
discon_rqst->associd.desc_len =
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_assoc_id));
discon_rqst->associd.association_id = cpu_to_be64(association_id);
discon_rqst->discon_cmd.desc_tag = cpu_to_be32(
FCNVME_LSDESC_DISCONN_CMD);
discon_rqst->discon_cmd.desc_len =
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_disconn_cmd));
}
static inline int
nvmefc_vldt_lsreq_discon_assoc(u32 rqstlen,
struct fcnvme_ls_disconnect_assoc_rqst *rqst)
{
int ret = 0;
if (rqstlen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst))
ret = VERR_DISCONN_LEN;
else if (rqst->desc_list_len !=
fcnvme_lsdesc_len(
sizeof(struct fcnvme_ls_disconnect_assoc_rqst)))
ret = VERR_DISCONN_RQST_LEN;
else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
ret = VERR_ASSOC_ID;
else if (rqst->associd.desc_len !=
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_assoc_id)))
ret = VERR_ASSOC_ID_LEN;
else if (rqst->discon_cmd.desc_tag !=
cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD))
ret = VERR_DISCONN_CMD;
else if (rqst->discon_cmd.desc_len !=
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_disconn_cmd)))
ret = VERR_DISCONN_CMD_LEN;
/*
* As the standard changed on the LS, check if old format and scope
* something other than Association (e.g. 0).
*/
else if (rqst->discon_cmd.rsvd8[0])
ret = VERR_DISCONN_SCOPE;
return ret;
}
#endif /* _NVME_FC_TRANSPORT_H */

Просмотреть файл

@ -171,7 +171,7 @@ struct nvme_nvm_bb_tbl {
__le32 tdresv;
__le32 thresv;
__le32 rsvd2[8];
__u8 blk[0];
__u8 blk[];
};
struct nvme_nvm_id20_addrf {
@ -961,7 +961,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
geo = &dev->geo;
geo->csecs = 1 << ns->lba_shift;
geo->sos = ns->ms;
geo->ext = ns->ext;
if (ns->features & NVME_NS_EXT_LBAS)
geo->ext = true;
else
geo->ext = false;
geo->mdts = ns->ctrl->max_hw_sectors;
dev->q = q;

Просмотреть файл

@ -3,6 +3,7 @@
* Copyright (c) 2017-2018 Christoph Hellwig.
*/
#include <linux/backing-dev.h>
#include <linux/moduleparam.h>
#include <trace/events/block.h>
#include "nvme.h"
@ -293,7 +294,7 @@ static bool nvme_available_path(struct nvme_ns_head *head)
static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
struct bio *bio)
{
struct nvme_ns_head *head = q->queuedata;
struct nvme_ns_head *head = bio->bi_disk->private_data;
struct device *dev = disk_to_dev(head->disk);
struct nvme_ns *ns;
blk_qc_t ret = BLK_QC_T_NONE;
@ -371,13 +372,12 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
* We also do this for private namespaces as the namespace sharing data could
* change after a rescan.
*/
if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
return 0;
q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node);
if (!q)
goto out;
q->queuedata = head;
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* set to a default value for 512 until disk is validated */
blk_queue_logical_block_size(q, 512);
@ -666,6 +666,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
nvme_mpath_set_live(ns);
mutex_unlock(&ns->head->lock);
}
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
struct backing_dev_info *info =
ns->head->disk->queue->backing_dev_info;
info->capabilities |= BDI_CAP_STABLE_WRITES;
}
}
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@ -687,7 +694,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
int error;
/* check if multipath is enabled and we have the capability */
if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3)))
if (!multipath || !ctrl->subsys ||
!(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
return 0;
ctrl->anacap = id->anacap;

Просмотреть файл

@ -16,6 +16,7 @@
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
#include <linux/wait.h>
#include <linux/t10-pi.h>
#include <trace/events/block.h>
@ -30,8 +31,10 @@ extern unsigned int admin_timeout;
#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define NVME_INLINE_SG_CNT 0
#define NVME_INLINE_METADATA_SG_CNT 0
#else
#define NVME_INLINE_SG_CNT 2
#define NVME_INLINE_METADATA_SG_CNT 1
#endif
extern struct workqueue_struct *nvme_wq;
@ -228,6 +231,7 @@ struct nvme_ctrl {
u32 page_size;
u32 max_hw_sectors;
u32 max_segments;
u32 max_integrity_segments;
u16 crdt[3];
u16 oncs;
u16 oacs;
@ -352,6 +356,7 @@ struct nvme_ns_head {
struct nvme_ns_ids ids;
struct list_head entry;
struct kref ref;
bool shared;
int instance;
#ifdef CONFIG_NVME_MULTIPATH
struct gendisk *disk;
@ -363,6 +368,11 @@ struct nvme_ns_head {
#endif
};
enum nvme_ns_features {
NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
};
struct nvme_ns {
struct list_head list;
@ -382,18 +392,23 @@ struct nvme_ns {
u16 ms;
u16 sgs;
u32 sws;
bool ext;
u8 pi_type;
unsigned long features;
unsigned long flags;
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
u16 noiob;
struct nvme_fault_inject fault_inject;
};
/* NVMe ns supports metadata actions by the controller (generate/strip) */
static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
{
return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
}
struct nvme_ctrl_ops {
const char *name;
struct module *module;
@ -449,6 +464,14 @@ static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba)
return lba << (ns->lba_shift - SECTOR_SHIFT);
}
/*
* Convert byte length to nvme's 0-based num dwords
*/
static inline u32 nvme_bytes_to_numd(size_t len)
{
return (len >> 2) - 1;
}
static inline void nvme_end_request(struct request *req, __le16 status,
union nvme_result result)
{
@ -489,7 +512,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
void nvme_put_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_identify(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);

Просмотреть файл

@ -68,14 +68,30 @@ static int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
static int io_queue_count_set(const char *val, const struct kernel_param *kp)
{
unsigned int n;
int ret;
ret = kstrtouint(val, 10, &n);
if (ret != 0 || n > num_possible_cpus())
return -EINVAL;
return param_set_uint(val, kp);
}
static const struct kernel_param_ops io_queue_count_ops = {
.set = io_queue_count_set,
.get = param_get_uint,
};
static unsigned int write_queues;
module_param(write_queues, uint, 0644);
module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644);
MODULE_PARM_DESC(write_queues,
"Number of queues to use for writes. If not set, reads and writes "
"will share a queue set.");
static unsigned int poll_queues;
module_param(poll_queues, uint, 0644);
module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644);
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
struct nvme_dev;
@ -128,6 +144,9 @@ struct nvme_dev {
dma_addr_t host_mem_descs_dma;
struct nvme_host_mem_buf_desc *host_mem_descs;
void **host_mem_desc_bufs;
unsigned int nr_allocated_queues;
unsigned int nr_write_queues;
unsigned int nr_poll_queues;
};
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
@ -166,14 +185,13 @@ struct nvme_queue {
void *sq_cmds;
/* only used for poll queues: */
spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
volatile struct nvme_completion *cqes;
struct nvme_completion *cqes;
dma_addr_t sq_dma_addr;
dma_addr_t cq_dma_addr;
u32 __iomem *q_db;
u16 q_depth;
u16 cq_vector;
u16 sq_tail;
u16 last_sq_tail;
u16 cq_head;
u16 qid;
u8 cq_phase;
@ -209,25 +227,14 @@ struct nvme_iod {
struct scatterlist *sg;
};
static unsigned int max_io_queues(void)
static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev)
{
return num_possible_cpus() + write_queues + poll_queues;
}
static unsigned int max_queue_count(void)
{
/* IO queues + admin queue */
return 1 + max_io_queues();
}
static inline unsigned int nvme_dbbuf_size(u32 stride)
{
return (max_queue_count() * 8 * stride);
return dev->nr_allocated_queues * 8 * dev->db_stride;
}
static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
{
unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
unsigned int mem_size = nvme_dbbuf_size(dev);
if (dev->dbbuf_dbs)
return 0;
@ -252,7 +259,7 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
static void nvme_dbbuf_dma_free(struct nvme_dev *dev)
{
unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
unsigned int mem_size = nvme_dbbuf_size(dev);
if (dev->dbbuf_dbs) {
dma_free_coherent(dev->dev, mem_size,
@ -446,24 +453,11 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
return 0;
}
/*
* Write sq tail if we are asked to, or if the next command would wrap.
*/
static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
static inline void nvme_write_sq_db(struct nvme_queue *nvmeq)
{
if (!write_sq) {
u16 next_tail = nvmeq->sq_tail + 1;
if (next_tail == nvmeq->q_depth)
next_tail = 0;
if (next_tail != nvmeq->last_sq_tail)
return;
}
if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
writel(nvmeq->sq_tail, nvmeq->q_db);
nvmeq->last_sq_tail = nvmeq->sq_tail;
}
/**
@ -480,7 +474,8 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
cmd, sizeof(*cmd));
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
nvme_write_sq_db(nvmeq, write_sq);
if (write_sq)
nvme_write_sq_db(nvmeq);
spin_unlock(&nvmeq->sq_lock);
}
@ -489,8 +484,7 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
struct nvme_queue *nvmeq = hctx->driver_data;
spin_lock(&nvmeq->sq_lock);
if (nvmeq->sq_tail != nvmeq->last_sq_tail)
nvme_write_sq_db(nvmeq, true);
nvme_write_sq_db(nvmeq);
spin_unlock(&nvmeq->sq_lock);
}
@ -922,8 +916,9 @@ static void nvme_pci_complete_rq(struct request *req)
/* We read the CQE phase first to check if the rest of the entry is valid */
static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
{
return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
nvmeq->cq_phase;
struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head];
return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase;
}
static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
@ -944,7 +939,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
{
volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
struct nvme_completion *cqe = &nvmeq->cqes[idx];
struct request *req;
if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
@ -1501,7 +1496,6 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
struct nvme_dev *dev = nvmeq->dev;
nvmeq->sq_tail = 0;
nvmeq->last_sq_tail = 0;
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
@ -2003,7 +1997,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
{
struct nvme_dev *dev = affd->priv;
unsigned int nr_read_queues;
unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues;
/*
* If there is no interupt available for queues, ensure that
@ -2019,12 +2013,12 @@ static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
if (!nrirqs) {
nrirqs = 1;
nr_read_queues = 0;
} else if (nrirqs == 1 || !write_queues) {
} else if (nrirqs == 1 || !nr_write_queues) {
nr_read_queues = 0;
} else if (write_queues >= nrirqs) {
} else if (nr_write_queues >= nrirqs) {
nr_read_queues = 1;
} else {
nr_read_queues = nrirqs - write_queues;
nr_read_queues = nrirqs - nr_write_queues;
}
dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
@ -2048,7 +2042,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
* Poll queues don't need interrupts, but we need at least one IO
* queue left over for non-polled IO.
*/
this_p_queues = poll_queues;
this_p_queues = dev->nr_poll_queues;
if (this_p_queues >= nr_io_queues) {
this_p_queues = nr_io_queues - 1;
irq_queues = 1;
@ -2078,14 +2072,25 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
__nvme_disable_io_queues(dev, nvme_admin_delete_cq);
}
static unsigned int nvme_max_io_queues(struct nvme_dev *dev)
{
return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues;
}
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = &dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev);
int result, nr_io_queues;
unsigned int nr_io_queues;
unsigned long size;
int result;
nr_io_queues = max_io_queues();
/*
* Sample the module parameters once at reset time so that we have
* stable values to work with.
*/
dev->nr_write_queues = write_queues;
dev->nr_poll_queues = poll_queues;
/*
* If tags are shared with admin queue (Apple bug), then
@ -2093,6 +2098,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
*/
if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
nr_io_queues = 1;
else
nr_io_queues = min(nvme_max_io_queues(dev),
dev->nr_allocated_queues - 1);
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
if (result < 0)
@ -2565,6 +2573,12 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
/*
* We do not support an SGL for metadata (yet), so we are limited to a
* single integrity segment for the separate metadata pointer.
*/
dev->ctrl.max_integrity_segments = 1;
result = nvme_init_identify(&dev->ctrl);
if (result)
goto out;
@ -2767,8 +2781,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (!dev)
return -ENOMEM;
dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue),
GFP_KERNEL, node);
dev->nr_write_queues = write_queues;
dev->nr_poll_queues = poll_queues;
dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1;
dev->queues = kcalloc_node(dev->nr_allocated_queues,
sizeof(struct nvme_queue), GFP_KERNEL, node);
if (!dev->queues)
goto free;
@ -3131,8 +3148,6 @@ static int __init nvme_init(void)
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
write_queues = min(write_queues, num_possible_cpus());
poll_queues = min(poll_queues, num_possible_cpus());
return pci_register_driver(&nvme_driver);
}

Просмотреть файл

@ -34,6 +34,11 @@
#define NVME_RDMA_MAX_INLINE_SEGMENTS 4
#define NVME_RDMA_DATA_SGL_SIZE \
(sizeof(struct scatterlist) * NVME_INLINE_SG_CNT)
#define NVME_RDMA_METADATA_SGL_SIZE \
(sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT)
struct nvme_rdma_device {
struct ib_device *dev;
struct ib_pd *pd;
@ -48,6 +53,11 @@ struct nvme_rdma_qe {
u64 dma;
};
struct nvme_rdma_sgl {
int nents;
struct sg_table sg_table;
};
struct nvme_rdma_queue;
struct nvme_rdma_request {
struct nvme_request req;
@ -58,12 +68,12 @@ struct nvme_rdma_request {
refcount_t ref;
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
u32 num_sge;
int nents;
struct ib_reg_wr reg_wr;
struct ib_cqe reg_cqe;
struct nvme_rdma_queue *queue;
struct sg_table sg_table;
struct scatterlist first_sgl[];
struct nvme_rdma_sgl data_sgl;
struct nvme_rdma_sgl *metadata_sgl;
bool use_sig_mr;
};
enum nvme_rdma_queue_flags {
@ -85,6 +95,7 @@ struct nvme_rdma_queue {
struct rdma_cm_id *cm_id;
int cm_error;
struct completion cm_done;
bool pi_support;
};
struct nvme_rdma_ctrl {
@ -261,6 +272,8 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
init_attr.qp_type = IB_QPT_RC;
init_attr.send_cq = queue->ib_cq;
init_attr.recv_cq = queue->ib_cq;
if (queue->pi_support)
init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
@ -290,6 +303,12 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
if (!req->sqe.data)
return -ENOMEM;
/* metadata nvme_rdma_sgl struct is located after command's data SGL */
if (queue->pi_support)
req->metadata_sgl = (void *)nvme_req(rq) +
sizeof(struct nvme_rdma_request) +
NVME_RDMA_DATA_SGL_SIZE;
req->queue = queue;
return 0;
@ -400,6 +419,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
dev = queue->device;
ibdev = dev->dev;
if (queue->pi_support)
ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs);
ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
/*
@ -416,10 +437,16 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
nvme_rdma_dev_put(dev);
}
static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
{
return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
ibdev->attrs.max_fast_reg_page_list_len - 1);
u32 max_page_list_len;
if (pi_support)
max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len;
else
max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len;
return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
}
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
@ -476,7 +503,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
* misaligned we'll end up using two entries for a single data page,
* so one additional entry is required.
*/
pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1;
pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1;
ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
queue->queue_size,
IB_MR_TYPE_MEM_REG,
@ -488,10 +515,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
goto out_destroy_ring;
}
if (queue->pi_support) {
ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs,
queue->queue_size, IB_MR_TYPE_INTEGRITY,
pages_per_mr, pages_per_mr);
if (ret) {
dev_err(queue->ctrl->ctrl.device,
"failed to initialize PI MR pool sized %d for QID %d\n",
queue->queue_size, idx);
goto out_destroy_mr_pool;
}
}
set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
return 0;
out_destroy_mr_pool:
ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
out_destroy_ring:
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
sizeof(struct nvme_completion), DMA_FROM_DEVICE);
@ -513,6 +554,10 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
queue = &ctrl->queues[idx];
queue->ctrl = ctrl;
if (idx && ctrl->ctrl.max_integrity_segments)
queue->pi_support = true;
else
queue->pi_support = false;
init_completion(&queue->cm_done);
if (idx > 0)
@ -723,7 +768,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = nctrl->numa_node;
set->cmd_size = sizeof(struct nvme_rdma_request) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
NVME_RDMA_DATA_SGL_SIZE;
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = ADMIN_TIMEOUT;
@ -737,7 +782,10 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
NVME_RDMA_DATA_SGL_SIZE;
if (nctrl->max_integrity_segments)
set->cmd_size += sizeof(struct nvme_rdma_sgl) +
NVME_RDMA_METADATA_SGL_SIZE;
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
@ -770,6 +818,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool new)
{
bool pi_capable = false;
int error;
error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
@ -779,7 +828,13 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->device = ctrl->queues[0].device;
ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
/* T10-PI support */
if (ctrl->device->dev->attrs.device_cap_flags &
IB_DEVICE_INTEGRITY_HANDOVER)
pi_capable = true;
ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
pi_capable);
/*
* Bind the async event SQE DMA mapping to the admin queue lifetime.
@ -821,6 +876,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->ctrl.max_segments = ctrl->max_fr_pages;
ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
if (pi_capable)
ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
else
ctrl->ctrl.max_integrity_segments = 0;
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
@ -1149,17 +1208,29 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
struct list_head *pool = &queue->qp->rdma_mrs;
if (!blk_rq_nr_phys_segments(rq))
return;
if (blk_integrity_rq(rq)) {
ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
req->metadata_sgl->nents, rq_dma_dir(rq));
sg_free_table_chained(&req->metadata_sgl->sg_table,
NVME_INLINE_METADATA_SG_CNT);
}
if (req->use_sig_mr)
pool = &queue->qp->sig_mrs;
if (req->mr) {
ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
ib_mr_pool_put(queue->qp, pool, req->mr);
req->mr = NULL;
}
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
rq_dma_dir(rq));
sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
}
static int nvme_rdma_set_sg_null(struct nvme_command *c)
@ -1178,7 +1249,7 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
int count)
{
struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
struct scatterlist *sgl = req->sg_table.sgl;
struct scatterlist *sgl = req->data_sgl.sg_table.sgl;
struct ib_sge *sge = &req->sge[1];
u32 len = 0;
int i;
@ -1203,8 +1274,8 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue,
{
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl));
put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length);
sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl));
put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length);
put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key);
sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
return 0;
@ -1225,7 +1296,8 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
* Align the MR to a 4K page size to match the ctrl page size and
* the block virtual boundary.
*/
nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL,
SZ_4K);
if (unlikely(nr < count)) {
ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
req->mr = NULL;
@ -1256,12 +1328,125 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
return 0;
}
static void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
struct nvme_command *cmd, struct ib_sig_domain *domain,
u16 control, u8 pi_type)
{
domain->sig_type = IB_SIG_TYPE_T10_DIF;
domain->sig.dif.bg_type = IB_T10DIF_CRC;
domain->sig.dif.pi_interval = 1 << bi->interval_exp;
domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
if (control & NVME_RW_PRINFO_PRCHK_REF)
domain->sig.dif.ref_remap = true;
domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
domain->sig.dif.app_escape = true;
if (pi_type == NVME_NS_DPS_PI_TYPE3)
domain->sig.dif.ref_escape = true;
}
static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi,
struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs,
u8 pi_type)
{
u16 control = le16_to_cpu(cmd->rw.control);
memset(sig_attrs, 0, sizeof(*sig_attrs));
if (control & NVME_RW_PRINFO_PRACT) {
/* for WRITE_INSERT/READ_STRIP no memory domain */
sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
pi_type);
/* Clear the PRACT bit since HCA will generate/verify the PI */
control &= ~NVME_RW_PRINFO_PRACT;
cmd->rw.control = cpu_to_le16(control);
} else {
/* for WRITE_PASS/READ_PASS both wire/memory domains exist */
nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
pi_type);
nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
pi_type);
}
}
static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask)
{
*mask = 0;
if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF)
*mask |= IB_SIG_CHECK_REFTAG;
if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD)
*mask |= IB_SIG_CHECK_GUARD;
}
static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc)
{
if (unlikely(wc->status != IB_WC_SUCCESS))
nvme_rdma_wr_error(cq, wc, "SIG");
}
static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
struct nvme_rdma_request *req, struct nvme_command *c,
int count, int pi_count)
{
struct nvme_rdma_sgl *sgl = &req->data_sgl;
struct ib_reg_wr *wr = &req->reg_wr;
struct request *rq = blk_mq_rq_from_pdu(req);
struct nvme_ns *ns = rq->q->queuedata;
struct bio *bio = rq->bio;
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
int nr;
req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
if (WARN_ON_ONCE(!req->mr))
return -EAGAIN;
nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL,
req->metadata_sgl->sg_table.sgl, pi_count, NULL,
SZ_4K);
if (unlikely(nr))
goto mr_put;
nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c,
req->mr->sig_attrs, ns->pi_type);
nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
req->reg_cqe.done = nvme_rdma_sig_done;
memset(wr, 0, sizeof(*wr));
wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
wr->wr.wr_cqe = &req->reg_cqe;
wr->wr.num_sge = 0;
wr->wr.send_flags = 0;
wr->mr = req->mr;
wr->key = req->mr->rkey;
wr->access = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
sg->addr = cpu_to_le64(req->mr->iova);
put_unaligned_le24(req->mr->length, sg->length);
put_unaligned_le32(req->mr->rkey, sg->key);
sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
return 0;
mr_put:
ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr);
req->mr = NULL;
if (nr < 0)
return nr;
return -EINVAL;
}
static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
struct request *rq, struct nvme_command *c)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
int pi_count = 0;
int count, ret;
req->num_sge = 1;
@ -1272,22 +1457,52 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
if (!blk_rq_nr_phys_segments(rq))
return nvme_rdma_set_sg_null(c);
req->sg_table.sgl = req->first_sgl;
ret = sg_alloc_table_chained(&req->sg_table,
blk_rq_nr_phys_segments(rq), req->sg_table.sgl,
req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1);
ret = sg_alloc_table_chained(&req->data_sgl.sg_table,
blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl,
NVME_INLINE_SG_CNT);
if (ret)
return -ENOMEM;
req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
req->data_sgl.sg_table.sgl);
count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
rq_dma_dir(rq));
count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
req->data_sgl.nents, rq_dma_dir(rq));
if (unlikely(count <= 0)) {
ret = -EIO;
goto out_free_table;
}
if (blk_integrity_rq(rq)) {
req->metadata_sgl->sg_table.sgl =
(struct scatterlist *)(req->metadata_sgl + 1);
ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table,
blk_rq_count_integrity_sg(rq->q, rq->bio),
req->metadata_sgl->sg_table.sgl,
NVME_INLINE_METADATA_SG_CNT);
if (unlikely(ret)) {
ret = -ENOMEM;
goto out_unmap_sg;
}
req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
rq->bio, req->metadata_sgl->sg_table.sgl);
pi_count = ib_dma_map_sg(ibdev,
req->metadata_sgl->sg_table.sgl,
req->metadata_sgl->nents,
rq_dma_dir(rq));
if (unlikely(pi_count <= 0)) {
ret = -EIO;
goto out_free_pi_table;
}
}
if (req->use_sig_mr) {
ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count);
goto out;
}
if (count <= dev->num_inline_segments) {
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
queue->ctrl->use_inline_data &&
@ -1306,14 +1521,23 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
ret = nvme_rdma_map_sg_fr(queue, req, c, count);
out:
if (unlikely(ret))
goto out_unmap_sg;
goto out_unmap_pi_sg;
return 0;
out_unmap_pi_sg:
if (blk_integrity_rq(rq))
ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
req->metadata_sgl->nents, rq_dma_dir(rq));
out_free_pi_table:
if (blk_integrity_rq(rq))
sg_free_table_chained(&req->metadata_sgl->sg_table,
NVME_INLINE_METADATA_SG_CNT);
out_unmap_sg:
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
rq_dma_dir(rq));
out_free_table:
sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
return ret;
}
@ -1761,6 +1985,15 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
queue->pi_support &&
(c->common.opcode == nvme_cmd_write ||
c->common.opcode == nvme_cmd_read) &&
nvme_ns_has_pi(ns))
req->use_sig_mr = true;
else
req->use_sig_mr = false;
err = nvme_rdma_map_data(queue, rq, c);
if (unlikely(err < 0)) {
dev_err(queue->ctrl->ctrl.device,
@ -1801,12 +2034,46 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
return ib_process_cq_direct(queue->ib_cq, -1);
}
static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req)
{
struct request *rq = blk_mq_rq_from_pdu(req);
struct ib_mr_status mr_status;
int ret;
ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
if (ret) {
pr_err("ib_check_mr_status failed, ret %d\n", ret);
nvme_req(rq)->status = NVME_SC_INVALID_PI;
return;
}
if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
switch (mr_status.sig_err.err_type) {
case IB_SIG_BAD_GUARD:
nvme_req(rq)->status = NVME_SC_GUARD_CHECK;
break;
case IB_SIG_BAD_REFTAG:
nvme_req(rq)->status = NVME_SC_REFTAG_CHECK;
break;
case IB_SIG_BAD_APPTAG:
nvme_req(rq)->status = NVME_SC_APPTAG_CHECK;
break;
}
pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
mr_status.sig_err.err_type, mr_status.sig_err.expected,
mr_status.sig_err.actual);
}
}
static void nvme_rdma_complete_rq(struct request *rq)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct ib_device *ibdev = queue->device->dev;
if (req->use_sig_mr)
nvme_rdma_check_pi_status(req);
nvme_rdma_unmap_data(queue, rq);
ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
@ -1926,7 +2193,7 @@ out_fail:
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.name = "rdma",
.module = THIS_MODULE,
.flags = NVME_F_FABRICS,
.flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED,
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,

Просмотреть файл

@ -60,6 +60,7 @@ struct nvme_tcp_request {
enum nvme_tcp_queue_flags {
NVME_TCP_Q_ALLOCATED = 0,
NVME_TCP_Q_LIVE = 1,
NVME_TCP_Q_POLLING = 2,
};
enum nvme_tcp_recv_state {
@ -75,6 +76,7 @@ struct nvme_tcp_queue {
int io_cpu;
spinlock_t lock;
struct mutex send_mutex;
struct list_head send_list;
/* recv state */
@ -131,6 +133,7 @@ static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
static struct workqueue_struct *nvme_tcp_wq;
static struct blk_mq_ops nvme_tcp_mq_ops;
static struct blk_mq_ops nvme_tcp_admin_mq_ops;
static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
{
@ -257,15 +260,29 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
}
}
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req)
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
bool sync)
{
struct nvme_tcp_queue *queue = req->queue;
bool empty;
spin_lock(&queue->lock);
empty = list_empty(&queue->send_list) && !queue->request;
list_add_tail(&req->entry, &queue->send_list);
spin_unlock(&queue->lock);
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
/*
* if we're the first on the send_list and we can try to send
* directly, otherwise queue io_work. Also, only do that if we
* are on the same cpu, so we don't introduce contention.
*/
if (queue->io_cpu == smp_processor_id() &&
sync && empty && mutex_trylock(&queue->send_mutex)) {
nvme_tcp_try_send(queue);
mutex_unlock(&queue->send_mutex);
} else {
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
}
}
static inline struct nvme_tcp_request *
@ -578,7 +595,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
req->state = NVME_TCP_SEND_H2C_PDU;
req->offset = 0;
nvme_tcp_queue_request(req);
nvme_tcp_queue_request(req, false);
return 0;
}
@ -794,11 +811,12 @@ static void nvme_tcp_data_ready(struct sock *sk)
{
struct nvme_tcp_queue *queue;
read_lock(&sk->sk_callback_lock);
read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (likely(queue && queue->rd_enabled))
if (likely(queue && queue->rd_enabled) &&
!test_bit(NVME_TCP_Q_POLLING, &queue->flags))
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
read_unlock(&sk->sk_callback_lock);
read_unlock_bh(&sk->sk_callback_lock);
}
static void nvme_tcp_write_space(struct sock *sk)
@ -867,7 +885,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
if (last && !queue->data_digest)
flags |= MSG_EOR;
else
flags |= MSG_MORE;
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
/* can't zcopy slab pages */
if (unlikely(PageSlab(page))) {
@ -906,11 +924,16 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
struct nvme_tcp_queue *queue = req->queue;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
bool inline_data = nvme_tcp_has_inline_data(req);
int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR);
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) + hdgst - req->offset;
int flags = MSG_DONTWAIT;
int ret;
if (inline_data)
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
if (queue->hdr_digest && !req->offset)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
@ -949,7 +972,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
offset_in_page(pdu) + req->offset, len,
MSG_DONTWAIT | MSG_MORE);
MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
if (unlikely(ret <= 0))
return ret;
@ -1063,11 +1086,14 @@ static void nvme_tcp_io_work(struct work_struct *w)
bool pending = false;
int result;
result = nvme_tcp_try_send(queue);
if (result > 0)
pending = true;
else if (unlikely(result < 0))
break;
if (mutex_trylock(&queue->send_mutex)) {
result = nvme_tcp_try_send(queue);
mutex_unlock(&queue->send_mutex);
if (result > 0)
pending = true;
else if (unlikely(result < 0))
break;
}
result = nvme_tcp_try_recv(queue);
if (result > 0)
@ -1319,6 +1345,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
queue->ctrl = ctrl;
INIT_LIST_HEAD(&queue->send_list);
spin_lock_init(&queue->lock);
mutex_init(&queue->send_mutex);
INIT_WORK(&queue->io_work, nvme_tcp_io_work);
queue->queue_size = queue_size;
@ -1543,6 +1570,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = NUMA_NO_NODE;
set->flags = BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl;
set->nr_hw_queues = 1;
@ -1554,7 +1582,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->queue_depth = nctrl->sqsize + 1;
set->reserved_tags = 1; /* fabric connect */
set->numa_node = NUMA_NO_NODE;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
@ -2113,7 +2141,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
ctrl->async_req.curr_bio = NULL;
ctrl->async_req.data_len = 0;
nvme_tcp_queue_request(&ctrl->async_req);
nvme_tcp_queue_request(&ctrl->async_req, true);
}
static enum blk_eh_timer_return
@ -2244,7 +2272,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
nvme_tcp_queue_request(req);
nvme_tcp_queue_request(req, true);
return BLK_STS_OK;
}
@ -2302,9 +2330,11 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
return 0;
set_bit(NVME_TCP_Q_POLLING, &queue->flags);
if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
sk_busy_loop(sk, true);
nvme_tcp_try_recv(queue);
clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
return queue->nr_cqe;
}

Просмотреть файл

@ -4,6 +4,7 @@ config NVME_TARGET
tristate "NVMe Target support"
depends on BLOCK
depends on CONFIGFS_FS
select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
select SGL_ALLOC
help
This enabled target side support for the NVMe protocol, that is

Просмотреть файл

@ -295,7 +295,7 @@ out:
static void nvmet_execute_get_log_page(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd)))
if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd)))
return;
switch (req->cmd->get_log_page.lid) {
@ -341,6 +341,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
u32 cmd_capsule_size;
u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@ -433,9 +434,15 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
/* Max command capsule size is sqe + single page of in-capsule data */
id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
req->port->inline_data_size) / 16);
/*
* Max command capsule size is sqe + in-capsule data size.
* Disable in-capsule data for Metadata capable controllers.
*/
cmd_capsule_size = sizeof(struct nvme_command);
if (!ctrl->pi_support)
cmd_capsule_size += req->port->inline_data_size;
id->ioccsz = cpu_to_le32(cmd_capsule_size / 16);
/* Max response capsule size is cqe */
id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
@ -465,6 +472,7 @@ out:
static void nvmet_execute_identify_ns(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
struct nvme_id_ns *id;
u16 status = 0;
@ -482,10 +490,12 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
}
/* return an all zeroed buffer if we can't find an active namespace */
ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid);
if (!ns)
goto done;
nvmet_ns_revalidate(ns);
/*
* nuse = ncap = nsze isn't always true, but we have no way to find
* that out from the underlying device.
@ -521,6 +531,16 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
id->lbaf[0].ds = ns->blksize_shift;
if (ctrl->pi_support && nvmet_ns_has_pi(ns)) {
id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST |
NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 |
NVME_NS_DPC_PI_TYPE3;
id->mc = NVME_MC_EXTENDED_LBA;
id->dps = ns->pi_type;
id->flbas = NVME_NS_FLBAS_META_EXT;
id->lbaf[0].ms = cpu_to_le16(ns->metadata_size);
}
if (ns->readonly)
id->nsattr |= (1 << 0);
nvmet_put_namespace(ns);
@ -625,7 +645,7 @@ out:
static void nvmet_execute_identify(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
return;
switch (req->cmd->identify.cns) {
@ -654,7 +674,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
*/
static void nvmet_execute_abort(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
nvmet_set_result(req, 1);
nvmet_req_complete(req, 0);
@ -743,7 +763,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
u16 nsqr;
u16 ncqr;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {
@ -815,7 +835,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10)))
if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10)))
return;
switch (cdw10 & 0xff) {
@ -882,7 +902,7 @@ void nvmet_execute_async_event(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
mutex_lock(&ctrl->lock);
@ -901,7 +921,7 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
pr_debug("ctrl %d update keep-alive timer for %d secs\n",

Просмотреть файл

@ -20,61 +20,71 @@ static const struct config_item_type nvmet_subsys_type;
static LIST_HEAD(nvmet_ports_list);
struct list_head *nvmet_ports = &nvmet_ports_list;
static const struct nvmet_transport_name {
struct nvmet_type_name_map {
u8 type;
const char *name;
} nvmet_transport_names[] = {
};
static struct nvmet_type_name_map nvmet_transport[] = {
{ NVMF_TRTYPE_RDMA, "rdma" },
{ NVMF_TRTYPE_FC, "fc" },
{ NVMF_TRTYPE_TCP, "tcp" },
{ NVMF_TRTYPE_LOOP, "loop" },
};
static const struct nvmet_type_name_map nvmet_addr_family[] = {
{ NVMF_ADDR_FAMILY_PCI, "pcie" },
{ NVMF_ADDR_FAMILY_IP4, "ipv4" },
{ NVMF_ADDR_FAMILY_IP6, "ipv6" },
{ NVMF_ADDR_FAMILY_IB, "ib" },
{ NVMF_ADDR_FAMILY_FC, "fc" },
{ NVMF_ADDR_FAMILY_LOOP, "loop" },
};
static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller)
{
if (p->enabled)
pr_err("Disable port '%u' before changing attribute in %s\n",
le16_to_cpu(p->disc_addr.portid), caller);
return p->enabled;
}
/*
* nvmet_port Generic ConfigFS definitions.
* Used in any place in the ConfigFS tree that refers to an address.
*/
static ssize_t nvmet_addr_adrfam_show(struct config_item *item,
char *page)
static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page)
{
switch (to_nvmet_port(item)->disc_addr.adrfam) {
case NVMF_ADDR_FAMILY_IP4:
return sprintf(page, "ipv4\n");
case NVMF_ADDR_FAMILY_IP6:
return sprintf(page, "ipv6\n");
case NVMF_ADDR_FAMILY_IB:
return sprintf(page, "ib\n");
case NVMF_ADDR_FAMILY_FC:
return sprintf(page, "fc\n");
default:
return sprintf(page, "\n");
u8 adrfam = to_nvmet_port(item)->disc_addr.adrfam;
int i;
for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) {
if (nvmet_addr_family[i].type == adrfam)
return sprintf(page, "%s\n", nvmet_addr_family[i].name);
}
return sprintf(page, "\n");
}
static ssize_t nvmet_addr_adrfam_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
int i;
if (port->enabled) {
pr_err("Cannot modify address while enabled\n");
pr_err("Disable the address before modifying\n");
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) {
if (sysfs_streq(page, nvmet_addr_family[i].name))
goto found;
}
if (sysfs_streq(page, "ipv4")) {
port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP4;
} else if (sysfs_streq(page, "ipv6")) {
port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP6;
} else if (sysfs_streq(page, "ib")) {
port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IB;
} else if (sysfs_streq(page, "fc")) {
port->disc_addr.adrfam = NVMF_ADDR_FAMILY_FC;
} else {
pr_err("Invalid value '%s' for adrfam\n", page);
return -EINVAL;
}
pr_err("Invalid value '%s' for adrfam\n", page);
return -EINVAL;
found:
port->disc_addr.adrfam = nvmet_addr_family[i].type;
return count;
}
@ -100,11 +110,9 @@ static ssize_t nvmet_addr_portid_store(struct config_item *item,
return -EINVAL;
}
if (port->enabled) {
pr_err("Cannot modify address while enabled\n");
pr_err("Disable the address before modifying\n");
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
}
port->disc_addr.portid = cpu_to_le16(portid);
return count;
}
@ -130,11 +138,8 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item,
return -EINVAL;
}
if (port->enabled) {
pr_err("Cannot modify address while enabled\n");
pr_err("Disable the address before modifying\n");
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
}
if (sscanf(page, "%s\n", port->disc_addr.traddr) != 1)
return -EINVAL;
@ -143,20 +148,24 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, addr_traddr);
static ssize_t nvmet_addr_treq_show(struct config_item *item,
char *page)
static const struct nvmet_type_name_map nvmet_addr_treq[] = {
{ NVMF_TREQ_NOT_SPECIFIED, "not specified" },
{ NVMF_TREQ_REQUIRED, "required" },
{ NVMF_TREQ_NOT_REQUIRED, "not required" },
};
static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page)
{
switch (to_nvmet_port(item)->disc_addr.treq &
NVME_TREQ_SECURE_CHANNEL_MASK) {
case NVMF_TREQ_NOT_SPECIFIED:
return sprintf(page, "not specified\n");
case NVMF_TREQ_REQUIRED:
return sprintf(page, "required\n");
case NVMF_TREQ_NOT_REQUIRED:
return sprintf(page, "not required\n");
default:
return sprintf(page, "\n");
u8 treq = to_nvmet_port(item)->disc_addr.treq &
NVME_TREQ_SECURE_CHANNEL_MASK;
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
if (treq == nvmet_addr_treq[i].type)
return sprintf(page, "%s\n", nvmet_addr_treq[i].name);
}
return sprintf(page, "\n");
}
static ssize_t nvmet_addr_treq_store(struct config_item *item,
@ -164,25 +173,22 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item,
{
struct nvmet_port *port = to_nvmet_port(item);
u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK;
int i;
if (port->enabled) {
pr_err("Cannot modify address while enabled\n");
pr_err("Disable the address before modifying\n");
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
if (sysfs_streq(page, nvmet_addr_treq[i].name))
goto found;
}
if (sysfs_streq(page, "not specified")) {
treq |= NVMF_TREQ_NOT_SPECIFIED;
} else if (sysfs_streq(page, "required")) {
treq |= NVMF_TREQ_REQUIRED;
} else if (sysfs_streq(page, "not required")) {
treq |= NVMF_TREQ_NOT_REQUIRED;
} else {
pr_err("Invalid value '%s' for treq\n", page);
return -EINVAL;
}
pr_err("Invalid value '%s' for treq\n", page);
return -EINVAL;
found:
treq |= nvmet_addr_treq[i].type;
port->disc_addr.treq = treq;
return count;
}
@ -206,11 +212,8 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
pr_err("Invalid value '%s' for trsvcid\n", page);
return -EINVAL;
}
if (port->enabled) {
pr_err("Cannot modify address while enabled\n");
pr_err("Disable the address before modifying\n");
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
}
if (sscanf(page, "%s\n", port->disc_addr.trsvcid) != 1)
return -EINVAL;
@ -233,11 +236,8 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
struct nvmet_port *port = to_nvmet_port(item);
int ret;
if (port->enabled) {
pr_err("Cannot modify inline_data_size while port enabled\n");
pr_err("Disable the port before modifying\n");
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
}
ret = kstrtoint(page, 0, &port->inline_data_size);
if (ret) {
pr_err("Invalid value '%s' for inline_data_size\n", page);
@ -248,16 +248,45 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, param_inline_data_size);
#ifdef CONFIG_BLK_DEV_INTEGRITY
static ssize_t nvmet_param_pi_enable_show(struct config_item *item,
char *page)
{
struct nvmet_port *port = to_nvmet_port(item);
return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable);
}
static ssize_t nvmet_param_pi_enable_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
bool val;
if (strtobool(page, &val))
return -EINVAL;
if (port->enabled) {
pr_err("Disable port before setting pi_enable value.\n");
return -EACCES;
}
port->pi_enable = val;
return count;
}
CONFIGFS_ATTR(nvmet_, param_pi_enable);
#endif
static ssize_t nvmet_addr_trtype_show(struct config_item *item,
char *page)
{
struct nvmet_port *port = to_nvmet_port(item);
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
if (port->disc_addr.trtype != nvmet_transport_names[i].type)
continue;
return sprintf(page, "%s\n", nvmet_transport_names[i].name);
for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) {
if (port->disc_addr.trtype == nvmet_transport[i].type)
return sprintf(page, "%s\n", nvmet_transport[i].name);
}
return sprintf(page, "\n");
@ -276,22 +305,20 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item,
struct nvmet_port *port = to_nvmet_port(item);
int i;
if (port->enabled) {
pr_err("Cannot modify address while enabled\n");
pr_err("Disable the address before modifying\n");
if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
}
for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
if (sysfs_streq(page, nvmet_transport_names[i].name))
for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) {
if (sysfs_streq(page, nvmet_transport[i].name))
goto found;
}
pr_err("Invalid value '%s' for trtype\n", page);
return -EINVAL;
found:
memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
port->disc_addr.trtype = nvmet_transport_names[i].type;
port->disc_addr.trtype = nvmet_transport[i].type;
if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA)
nvmet_port_init_tsas_rdma(port);
return count;
@ -327,7 +354,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item,
kfree(ns->device_path);
ret = -ENOMEM;
ns->device_path = kstrndup(page, len, GFP_KERNEL);
ns->device_path = kmemdup_nul(page, len, GFP_KERNEL);
if (!ns->device_path)
goto out_unlock;
@ -543,6 +570,31 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_ns_, buffered_io);
static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ns *ns = to_nvmet_ns(item);
bool val;
if (strtobool(page, &val))
return -EINVAL;
if (!val)
return -EINVAL;
mutex_lock(&ns->subsys->lock);
if (!ns->enabled) {
pr_err("enable ns before revalidate.\n");
mutex_unlock(&ns->subsys->lock);
return -EINVAL;
}
nvmet_ns_revalidate(ns);
mutex_unlock(&ns->subsys->lock);
return count;
}
CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
@ -550,6 +602,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
&nvmet_ns_attr_revalidate_size,
#ifdef CONFIG_PCI_P2PDMA
&nvmet_ns_attr_p2pmem,
#endif
@ -963,7 +1016,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
return -EINVAL;
}
new_model_number = kstrndup(page, len, GFP_KERNEL);
new_model_number = kmemdup_nul(page, len, GFP_KERNEL);
if (!new_model_number)
return -ENOMEM;
@ -987,6 +1040,28 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
}
CONFIGFS_ATTR(nvmet_subsys_, attr_model);
#ifdef CONFIG_BLK_DEV_INTEGRITY
static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
char *page)
{
return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support);
}
static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_subsys *subsys = to_subsys(item);
bool pi_enable;
if (strtobool(page, &pi_enable))
return -EINVAL;
subsys->pi_support = pi_enable;
return count;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable);
#endif
static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host,
&nvmet_subsys_attr_attr_version,
@ -994,6 +1069,9 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_cntlid_min,
&nvmet_subsys_attr_attr_cntlid_max,
&nvmet_subsys_attr_attr_model,
#ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_subsys_attr_attr_pi_enable,
#endif
NULL,
};
@ -1149,10 +1227,7 @@ static const struct config_item_type nvmet_referrals_type = {
.ct_group_ops = &nvmet_referral_group_ops,
};
static struct {
enum nvme_ana_state state;
const char *name;
} nvmet_ana_state_names[] = {
static struct nvmet_type_name_map nvmet_ana_state[] = {
{ NVME_ANA_OPTIMIZED, "optimized" },
{ NVME_ANA_NONOPTIMIZED, "non-optimized" },
{ NVME_ANA_INACCESSIBLE, "inaccessible" },
@ -1167,10 +1242,9 @@ static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item,
enum nvme_ana_state state = grp->port->ana_state[grp->grpid];
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
if (state != nvmet_ana_state_names[i].state)
continue;
return sprintf(page, "%s\n", nvmet_ana_state_names[i].name);
for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) {
if (state == nvmet_ana_state[i].type)
return sprintf(page, "%s\n", nvmet_ana_state[i].name);
}
return sprintf(page, "\n");
@ -1180,10 +1254,11 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ana_group *grp = to_ana_group(item);
enum nvme_ana_state *ana_state = grp->port->ana_state;
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
if (sysfs_streq(page, nvmet_ana_state_names[i].name))
for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) {
if (sysfs_streq(page, nvmet_ana_state[i].name))
goto found;
}
@ -1192,10 +1267,9 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
found:
down_write(&nvmet_ana_sem);
grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state;
ana_state[grp->grpid] = (enum nvme_ana_state) nvmet_ana_state[i].type;
nvmet_ana_chgcnt++;
up_write(&nvmet_ana_sem);
nvmet_port_send_ana_event(grp->port);
return count;
}
@ -1297,6 +1371,9 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_trsvcid,
&nvmet_attr_addr_trtype,
&nvmet_attr_param_inline_data_size,
#ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_attr_param_pi_enable,
#endif
NULL,
};
@ -1346,6 +1423,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
port->inline_data_size = -1; /* < 0 == let the transport choose */
port->disc_addr.portid = cpu_to_le16(portid);
port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX;
port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW;
config_group_init_type_name(&port->group, name, &nvmet_port_type);

Просмотреть файл

@ -134,15 +134,10 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
struct nvmet_async_event *aen;
struct nvmet_req *req;
while (1) {
mutex_lock(&ctrl->lock);
aen = list_first_entry_or_null(&ctrl->async_events,
struct nvmet_async_event, entry);
if (!aen || !ctrl->nr_async_event_cmds) {
mutex_unlock(&ctrl->lock);
break;
}
mutex_lock(&ctrl->lock);
while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) {
aen = list_first_entry(&ctrl->async_events,
struct nvmet_async_event, entry);
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
if (status == 0)
nvmet_set_result(req, nvmet_async_event_result(aen));
@ -151,20 +146,21 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
kfree(aen);
mutex_unlock(&ctrl->lock);
trace_nvmet_async_event(ctrl, req->cqe->result.u32);
nvmet_req_complete(req, status);
mutex_lock(&ctrl->lock);
}
mutex_unlock(&ctrl->lock);
}
static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
{
struct nvmet_req *req;
struct nvmet_async_event *aen, *tmp;
mutex_lock(&ctrl->lock);
while (ctrl->nr_async_event_cmds) {
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
mutex_unlock(&ctrl->lock);
nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
mutex_lock(&ctrl->lock);
list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) {
list_del(&aen->entry);
kfree(aen);
}
mutex_unlock(&ctrl->lock);
}
@ -322,12 +318,21 @@ int nvmet_enable_port(struct nvmet_port *port)
if (!try_module_get(ops->owner))
return -EINVAL;
ret = ops->add_port(port);
if (ret) {
module_put(ops->owner);
return ret;
/*
* If the user requested PI support and the transport isn't pi capable,
* don't enable the port.
*/
if (port->pi_enable && !ops->metadata_support) {
pr_err("T10-PI is not supported by transport type %d\n",
port->disc_addr.trtype);
ret = -EINVAL;
goto out_put;
}
ret = ops->add_port(port);
if (ret)
goto out_put;
/* If the transport didn't set inline_data_size, then disable it. */
if (port->inline_data_size < 0)
port->inline_data_size = 0;
@ -335,6 +340,10 @@ int nvmet_enable_port(struct nvmet_port *port)
port->enabled = true;
port->tr_ops = ops;
return 0;
out_put:
module_put(ops->owner);
return ret;
}
void nvmet_disable_port(struct nvmet_port *port)
@ -514,6 +523,19 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
ns->nsid);
}
void nvmet_ns_revalidate(struct nvmet_ns *ns)
{
loff_t oldsize = ns->size;
if (ns->bdev)
nvmet_bdev_ns_revalidate(ns);
else
nvmet_file_ns_revalidate(ns);
if (oldsize != ns->size)
nvmet_ns_changed(ns->subsys, ns->nsid);
}
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
@ -764,10 +786,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
* If this is the admin queue, complete all AERs so that our
* queue doesn't have outstanding requests on it.
*/
if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) {
if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
nvmet_async_events_process(ctrl, status);
nvmet_async_events_free(ctrl);
}
percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
wait_for_completion(&sq->confirm_done);
wait_for_completion(&sq->free_done);
@ -873,8 +893,11 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->sq = sq;
req->ops = ops;
req->sg = NULL;
req->metadata_sg = NULL;
req->sg_cnt = 0;
req->metadata_sg_cnt = 0;
req->transfer_len = 0;
req->metadata_len = 0;
req->cqe->status = 0;
req->cqe->sq_head = 0;
req->ns = NULL;
@ -936,9 +959,9 @@ void nvmet_req_uninit(struct nvmet_req *req)
}
EXPORT_SYMBOL_GPL(nvmet_req_uninit);
bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
{
if (unlikely(data_len != req->transfer_len)) {
if (unlikely(len != req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
return false;
@ -946,7 +969,7 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
return true;
}
EXPORT_SYMBOL_GPL(nvmet_check_data_len);
EXPORT_SYMBOL_GPL(nvmet_check_transfer_len);
bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
{
@ -959,50 +982,90 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
return true;
}
int nvmet_req_alloc_sgl(struct nvmet_req *req)
static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
{
struct pci_dev *p2p_dev = NULL;
return req->transfer_len - req->metadata_len;
}
if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
if (req->sq->ctrl && req->ns)
p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
req->ns->nsid);
static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req)
{
req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt,
nvmet_data_transfer_len(req));
if (!req->sg)
goto out_err;
req->p2p_dev = NULL;
if (req->sq->qid && p2p_dev) {
req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
req->transfer_len);
if (req->sg) {
req->p2p_dev = p2p_dev;
return 0;
}
}
if (req->metadata_len) {
req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev,
&req->metadata_sg_cnt, req->metadata_len);
if (!req->metadata_sg)
goto out_free_sg;
}
return 0;
out_free_sg:
pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
out_err:
return -ENOMEM;
}
/*
* If no P2P memory was available we fallback to using
* regular memory
*/
static bool nvmet_req_find_p2p_dev(struct nvmet_req *req)
{
if (!IS_ENABLED(CONFIG_PCI_P2PDMA))
return false;
if (req->sq->ctrl && req->sq->qid && req->ns) {
req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
req->ns->nsid);
if (req->p2p_dev)
return true;
}
req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
req->p2p_dev = NULL;
return false;
}
int nvmet_req_alloc_sgls(struct nvmet_req *req)
{
if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req))
return 0;
req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
&req->sg_cnt);
if (unlikely(!req->sg))
return -ENOMEM;
goto out;
if (req->metadata_len) {
req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL,
&req->metadata_sg_cnt);
if (unlikely(!req->metadata_sg))
goto out_free;
}
return 0;
out_free:
sgl_free(req->sg);
out:
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls);
void nvmet_req_free_sgl(struct nvmet_req *req)
void nvmet_req_free_sgls(struct nvmet_req *req)
{
if (req->p2p_dev)
if (req->p2p_dev) {
pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
else
if (req->metadata_sg)
pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
} else {
sgl_free(req->sg);
if (req->metadata_sg)
sgl_free(req->metadata_sg);
}
req->sg = NULL;
req->metadata_sg = NULL;
req->sg_cnt = 0;
req->metadata_sg_cnt = 0;
}
EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
EXPORT_SYMBOL_GPL(nvmet_req_free_sgls);
static inline bool nvmet_cc_en(u32 cc)
{
@ -1357,6 +1420,7 @@ static void nvmet_ctrl_free(struct kref *ref)
ida_simple_remove(&cntlid_ida, ctrl->cntlid);
nvmet_async_events_free(ctrl);
kfree(ctrl->sqs);
kfree(ctrl->cqs);
kfree(ctrl->changed_ns_list);

Просмотреть файл

@ -171,7 +171,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
u16 status = 0;
void *buffer;
if (!nvmet_check_data_len(req, data_len))
if (!nvmet_check_transfer_len(req, data_len))
return;
if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
@ -244,7 +244,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
const char model[] = "Linux";
u16 status = 0;
if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
return;
if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
@ -298,7 +298,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {
@ -324,7 +324,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat = 0;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {

Просмотреть файл

@ -12,7 +12,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
u64 val = le64_to_cpu(req->cmd->prop_set.value);
u16 status = 0;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
if (req->cmd->prop_set.attrib & 1) {
@ -41,7 +41,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
u16 status = 0;
u64 val = 0;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
if (req->cmd->prop_get.attrib & 1) {
@ -156,7 +156,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
struct nvmet_ctrl *ctrl = NULL;
u16 status = 0;
if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
return;
d = kmalloc(sizeof(*d), GFP_KERNEL);
@ -197,6 +197,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
goto out;
}
ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
uuid_copy(&ctrl->hostid, &d->hostid);
status = nvmet_install_queue(ctrl, req);
@ -205,8 +207,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
goto out;
}
pr_info("creating controller %d for subsystem %s for NQN %s.\n",
ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
pr_info("creating controller %d for subsystem %s for NQN %s%s.\n",
ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
ctrl->pi_support ? " T10-PI is enabled" : "");
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
out:
@ -223,7 +226,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 status = 0;
if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
return;
d = kmalloc(sizeof(*d), GFP_KERNEL);

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -208,10 +208,13 @@ struct fcloop_rport {
};
struct fcloop_tport {
struct nvmet_fc_target_port *targetport;
struct nvme_fc_remote_port *remoteport;
struct fcloop_nport *nport;
struct fcloop_lport *lport;
struct nvmet_fc_target_port *targetport;
struct nvme_fc_remote_port *remoteport;
struct fcloop_nport *nport;
struct fcloop_lport *lport;
spinlock_t lock;
struct list_head ls_list;
struct work_struct ls_work;
};
struct fcloop_nport {
@ -228,7 +231,8 @@ struct fcloop_nport {
struct fcloop_lsreq {
struct nvmefc_ls_req *lsreq;
struct nvmefc_tgt_ls_req tgt_ls_req;
struct nvmefc_ls_rsp ls_rsp;
int lsdir; /* H2T or T2H */
int status;
struct list_head ls_list; /* fcloop_rport->ls_list */
};
@ -267,9 +271,9 @@ struct fcloop_ini_fcpreq {
};
static inline struct fcloop_lsreq *
tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq)
ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp)
{
return container_of(tgt_lsreq, struct fcloop_lsreq, tgt_ls_req);
return container_of(lsrsp, struct fcloop_lsreq, ls_rsp);
}
static inline struct fcloop_fcpreq *
@ -323,7 +327,7 @@ fcloop_rport_lsrqst_work(struct work_struct *work)
}
static int
fcloop_ls_req(struct nvme_fc_local_port *localport,
fcloop_h2t_ls_req(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
struct nvmefc_ls_req *lsreq)
{
@ -344,27 +348,28 @@ fcloop_ls_req(struct nvme_fc_local_port *localport,
}
tls_req->status = 0;
ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req,
lsreq->rqstaddr, lsreq->rqstlen);
ret = nvmet_fc_rcv_ls_req(rport->targetport, rport,
&tls_req->ls_rsp,
lsreq->rqstaddr, lsreq->rqstlen);
return ret;
}
static int
fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
struct nvmefc_tgt_ls_req *tgt_lsreq)
fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
struct nvmefc_ls_rsp *lsrsp)
{
struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq);
struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp);
struct nvmefc_ls_req *lsreq = tls_req->lsreq;
struct fcloop_tport *tport = targetport->private;
struct nvme_fc_remote_port *remoteport = tport->remoteport;
struct fcloop_rport *rport;
memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf,
((lsreq->rsplen < tgt_lsreq->rsplen) ?
lsreq->rsplen : tgt_lsreq->rsplen));
memcpy(lsreq->rspaddr, lsrsp->rspbuf,
((lsreq->rsplen < lsrsp->rsplen) ?
lsreq->rsplen : lsrsp->rsplen));
tgt_lsreq->done(tgt_lsreq);
lsrsp->done(lsrsp);
if (remoteport) {
rport = remoteport->private;
@ -377,6 +382,99 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
return 0;
}
static void
fcloop_tport_lsrqst_work(struct work_struct *work)
{
struct fcloop_tport *tport =
container_of(work, struct fcloop_tport, ls_work);
struct fcloop_lsreq *tls_req;
spin_lock(&tport->lock);
for (;;) {
tls_req = list_first_entry_or_null(&tport->ls_list,
struct fcloop_lsreq, ls_list);
if (!tls_req)
break;
list_del(&tls_req->ls_list);
spin_unlock(&tport->lock);
tls_req->lsreq->done(tls_req->lsreq, tls_req->status);
/*
* callee may free memory containing tls_req.
* do not reference lsreq after this.
*/
spin_lock(&tport->lock);
}
spin_unlock(&tport->lock);
}
static int
fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
struct nvmefc_ls_req *lsreq)
{
struct fcloop_lsreq *tls_req = lsreq->private;
struct fcloop_tport *tport = targetport->private;
int ret = 0;
/*
* hosthandle should be the dst.rport value.
* hosthandle ignored as fcloop currently is
* 1:1 tgtport vs remoteport
*/
tls_req->lsreq = lsreq;
INIT_LIST_HEAD(&tls_req->ls_list);
if (!tport->remoteport) {
tls_req->status = -ECONNREFUSED;
spin_lock(&tport->lock);
list_add_tail(&tport->ls_list, &tls_req->ls_list);
spin_unlock(&tport->lock);
schedule_work(&tport->ls_work);
return ret;
}
tls_req->status = 0;
ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp,
lsreq->rqstaddr, lsreq->rqstlen);
return ret;
}
static int
fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
struct nvmefc_ls_rsp *lsrsp)
{
struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp);
struct nvmefc_ls_req *lsreq = tls_req->lsreq;
struct fcloop_rport *rport = remoteport->private;
struct nvmet_fc_target_port *targetport = rport->targetport;
struct fcloop_tport *tport;
memcpy(lsreq->rspaddr, lsrsp->rspbuf,
((lsreq->rsplen < lsrsp->rsplen) ?
lsreq->rsplen : lsrsp->rsplen));
lsrsp->done(lsrsp);
if (targetport) {
tport = targetport->private;
spin_lock(&tport->lock);
list_add_tail(&tport->ls_list, &tls_req->ls_list);
spin_unlock(&tport->lock);
schedule_work(&tport->ls_work);
}
return 0;
}
static void
fcloop_t2h_host_release(void *hosthandle)
{
/* host handle ignored for now */
}
/*
* Simulate reception of RSCN and converting it to a initiator transport
* call to rescan a remote port.
@ -762,12 +860,18 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport,
}
static void
fcloop_ls_abort(struct nvme_fc_local_port *localport,
fcloop_h2t_ls_abort(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
struct nvmefc_ls_req *lsreq)
{
}
static void
fcloop_t2h_ls_abort(struct nvmet_fc_target_port *targetport,
void *hosthandle, struct nvmefc_ls_req *lsreq)
{
}
static void
fcloop_fcp_abort(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
@ -867,6 +971,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
{
struct fcloop_tport *tport = targetport->private;
flush_work(&tport->ls_work);
fcloop_nport_put(tport->nport);
}
@ -879,10 +984,11 @@ static struct nvme_fc_port_template fctemplate = {
.remoteport_delete = fcloop_remoteport_delete,
.create_queue = fcloop_create_queue,
.delete_queue = fcloop_delete_queue,
.ls_req = fcloop_ls_req,
.ls_req = fcloop_h2t_ls_req,
.fcp_io = fcloop_fcp_req,
.ls_abort = fcloop_ls_abort,
.ls_abort = fcloop_h2t_ls_abort,
.fcp_abort = fcloop_fcp_abort,
.xmt_ls_rsp = fcloop_t2h_xmt_ls_rsp,
.max_hw_queues = FCLOOP_HW_QUEUES,
.max_sgl_segments = FCLOOP_SGL_SEGS,
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
@ -896,11 +1002,14 @@ static struct nvme_fc_port_template fctemplate = {
static struct nvmet_fc_target_template tgttemplate = {
.targetport_delete = fcloop_targetport_delete,
.xmt_ls_rsp = fcloop_xmt_ls_rsp,
.xmt_ls_rsp = fcloop_h2t_xmt_ls_rsp,
.fcp_op = fcloop_fcp_op,
.fcp_abort = fcloop_tgt_fcp_abort,
.fcp_req_release = fcloop_fcp_req_release,
.discovery_event = fcloop_tgt_discovery_evt,
.ls_req = fcloop_t2h_ls_req,
.ls_abort = fcloop_t2h_ls_abort,
.host_release = fcloop_t2h_host_release,
.max_hw_queues = FCLOOP_HW_QUEUES,
.max_sgl_segments = FCLOOP_SGL_SEGS,
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
@ -909,6 +1018,7 @@ static struct nvmet_fc_target_template tgttemplate = {
.target_features = 0,
/* sizes of additional private data for data structures */
.target_priv_sz = sizeof(struct fcloop_tport),
.lsrqst_priv_sz = sizeof(struct fcloop_lsreq),
};
static ssize_t
@ -1258,6 +1368,9 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr,
tport->nport = nport;
tport->lport = nport->lport;
nport->tport = tport;
spin_lock_init(&tport->lock);
INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work);
INIT_LIST_HEAD(&tport->ls_list);
return count;
}

Просмотреть файл

@ -47,6 +47,22 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
id->nows = to0based(ql->io_opt / ql->logical_block_size);
}
static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
{
struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
if (bi) {
ns->metadata_size = bi->tuple_size;
if (bi->profile == &t10_pi_type1_crc)
ns->pi_type = NVME_NS_DPS_PI_TYPE1;
else if (bi->profile == &t10_pi_type3_crc)
ns->pi_type = NVME_NS_DPS_PI_TYPE3;
else
/* Unsupported metadata type */
ns->metadata_size = 0;
}
}
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
{
int ret;
@ -64,6 +80,12 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
}
ns->size = i_size_read(ns->bdev->bd_inode);
ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
ns->pi_type = 0;
ns->metadata_size = 0;
if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
nvmet_bdev_ns_enable_integrity(ns);
return 0;
}
@ -75,6 +97,11 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
}
}
void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
{
ns->size = i_size_read(ns->bdev->bd_inode);
}
static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
{
u16 status = NVME_SC_SUCCESS;
@ -142,6 +169,61 @@ static void nvmet_bio_done(struct bio *bio)
bio_put(bio);
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
struct sg_mapping_iter *miter)
{
struct blk_integrity *bi;
struct bio_integrity_payload *bip;
struct block_device *bdev = req->ns->bdev;
int rc;
size_t resid, len;
bi = bdev_get_integrity(bdev);
if (unlikely(!bi)) {
pr_err("Unable to locate bio_integrity\n");
return -ENODEV;
}
bip = bio_integrity_alloc(bio, GFP_NOIO,
min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES));
if (IS_ERR(bip)) {
pr_err("Unable to allocate bio_integrity_payload\n");
return PTR_ERR(bip);
}
bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
/* virtual start sector must be in integrity interval units */
bip_set_seed(bip, bio->bi_iter.bi_sector >>
(bi->interval_exp - SECTOR_SHIFT));
resid = bip->bip_iter.bi_size;
while (resid > 0 && sg_miter_next(miter)) {
len = min_t(size_t, miter->length, resid);
rc = bio_integrity_add_page(bio, miter->page, len,
offset_in_page(miter->addr));
if (unlikely(rc != len)) {
pr_err("bio_integrity_add_page() failed; %d\n", rc);
sg_miter_stop(miter);
return -ENOMEM;
}
resid -= len;
if (len < miter->length)
miter->consumed -= miter->length - len;
}
sg_miter_stop(miter);
return 0;
}
#else
static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
struct sg_mapping_iter *miter)
{
return -EINVAL;
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
@ -149,9 +231,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
struct scatterlist *sg;
struct blk_plug plug;
sector_t sector;
int op, i;
int op, i, rc;
struct sg_mapping_iter prot_miter;
unsigned int iter_flags;
unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
if (!nvmet_check_transfer_len(req, total_len))
return;
if (!req->sg_cnt) {
@ -163,8 +248,10 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
op |= REQ_FUA;
iter_flags = SG_MITER_TO_SG;
} else {
op = REQ_OP_READ;
iter_flags = SG_MITER_FROM_SG;
}
if (is_pci_p2pdma_page(sg_page(req->sg)))
@ -186,11 +273,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio->bi_opf = op;
blk_start_plug(&plug);
if (req->metadata_len)
sg_miter_start(&prot_miter, req->metadata_sg,
req->metadata_sg_cnt, iter_flags);
for_each_sg(req->sg, sg, req->sg_cnt, i) {
while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
!= sg->length) {
struct bio *prev = bio;
if (req->metadata_len) {
rc = nvmet_bdev_alloc_bip(req, bio,
&prot_miter);
if (unlikely(rc)) {
bio_io_error(bio);
return;
}
}
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
@ -204,6 +304,14 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
sg_cnt--;
}
if (req->metadata_len) {
rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
if (unlikely(rc)) {
bio_io_error(bio);
return;
}
}
submit_bio(bio);
blk_finish_plug(&plug);
}
@ -212,7 +320,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
{
struct bio *bio = &req->b.inline_bio;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
@ -304,7 +412,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
sector_t nr_sector;
int ret;
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
sector = le64_to_cpu(write_zeroes->slba) <<
@ -331,6 +439,8 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_bdev_execute_rw;
if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
req->metadata_len = nvmet_rw_metadata_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_bdev_execute_flush;

Просмотреть файл

@ -13,6 +13,18 @@
#define NVMET_MAX_MPOOL_BVEC 16
#define NVMET_MIN_MPOOL_OBJ 16
int nvmet_file_ns_revalidate(struct nvmet_ns *ns)
{
struct kstat stat;
int ret;
ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE,
AT_STATX_FORCE_SYNC);
if (!ret)
ns->size = stat.size;
return ret;
}
void nvmet_file_ns_disable(struct nvmet_ns *ns)
{
if (ns->file) {
@ -30,7 +42,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
int nvmet_file_ns_enable(struct nvmet_ns *ns)
{
int flags = O_RDWR | O_LARGEFILE;
struct kstat stat;
int ret;
if (!ns->buffered_io)
@ -43,12 +54,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
return PTR_ERR(ns->file);
}
ret = vfs_getattr(&ns->file->f_path,
&stat, STATX_SIZE, AT_STATX_FORCE_SYNC);
ret = nvmet_file_ns_revalidate(ns);
if (ret)
goto err;
ns->size = stat.size;
/*
* i_blkbits can be greater than the universally accepted upper bound,
* so make sure we export a sane namespace lba_shift.
@ -232,7 +241,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
{
ssize_t nr_bvec = req->sg_cnt;
if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
return;
if (!req->sg_cnt || !nr_bvec) {
@ -276,7 +285,7 @@ static void nvmet_file_flush_work(struct work_struct *w)
static void nvmet_file_execute_flush(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
INIT_WORK(&req->f.work, nvmet_file_flush_work);
schedule_work(&req->f.work);
@ -366,7 +375,7 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
{
if (!nvmet_check_data_len(req, 0))
if (!nvmet_check_transfer_len(req, 0))
return;
INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
schedule_work(&req->f.work);

Просмотреть файл

@ -19,6 +19,7 @@
#include <linux/rcupdate.h>
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/t10-pi.h>
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
@ -77,6 +78,8 @@ struct nvmet_ns {
int use_p2pmem;
struct pci_dev *p2p_dev;
int pi_type;
int metadata_size;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@ -142,6 +145,7 @@ struct nvmet_port {
bool enabled;
int inline_data_size;
const struct nvmet_fabrics_ops *tr_ops;
bool pi_enable;
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@ -201,6 +205,7 @@ struct nvmet_ctrl {
spinlock_t error_lock;
u64 err_counter;
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
bool pi_support;
};
struct nvmet_subsys_model {
@ -230,6 +235,7 @@ struct nvmet_subsys {
u64 ver;
u64 serial;
char *subsysnqn;
bool pi_support;
struct config_group group;
@ -281,6 +287,7 @@ struct nvmet_fabrics_ops {
unsigned int type;
unsigned int msdbd;
bool has_keyed_sgls : 1;
bool metadata_support : 1;
void (*queue_response)(struct nvmet_req *req);
int (*add_port)(struct nvmet_port *port);
void (*remove_port)(struct nvmet_port *port);
@ -302,6 +309,7 @@ struct nvmet_req {
struct nvmet_cq *cq;
struct nvmet_ns *ns;
struct scatterlist *sg;
struct scatterlist *metadata_sg;
struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
union {
struct {
@ -315,8 +323,10 @@ struct nvmet_req {
} f;
};
int sg_cnt;
int metadata_sg_cnt;
/* data length as parsed from the SGL descriptor: */
size_t transfer_len;
size_t metadata_len;
struct nvmet_port *port;
@ -384,11 +394,11 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len);
bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len);
bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
int nvmet_req_alloc_sgl(struct nvmet_req *req);
void nvmet_req_free_sgl(struct nvmet_req *req);
int nvmet_req_alloc_sgls(struct nvmet_req *req);
void nvmet_req_free_sgls(struct nvmet_req *req);
void nvmet_execute_keep_alive(struct nvmet_req *req);
@ -498,13 +508,24 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns);
u16 nvmet_bdev_flush(struct nvmet_req *req);
u16 nvmet_file_flush(struct nvmet_req *req);
void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
void nvmet_ns_revalidate(struct nvmet_ns *ns);
static inline u32 nvmet_rw_len(struct nvmet_req *req)
static inline u32 nvmet_rw_data_len(struct nvmet_req *req)
{
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
req->ns->blksize_shift;
}
static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req)
{
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
return 0;
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) *
req->ns->metadata_size;
}
static inline u32 nvmet_dsm_len(struct nvmet_req *req)
{
return (le32_to_cpu(req->cmd->dsm.nr) + 1) *
@ -519,4 +540,11 @@ static inline __le16 to0based(u32 a)
return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
}
static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns)
{
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
return false;
return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple);
}
#endif /* _NVMET_H */

Просмотреть файл

@ -33,6 +33,9 @@
/* Assume mpsmin == device_page_size == 4KB */
#define NVMET_RDMA_MAX_MDTS 8
#define NVMET_RDMA_MAX_METADATA_MDTS 5
struct nvmet_rdma_srq;
struct nvmet_rdma_cmd {
struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
@ -41,6 +44,7 @@ struct nvmet_rdma_cmd {
struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE];
struct nvme_command *nvme_cmd;
struct nvmet_rdma_queue *queue;
struct nvmet_rdma_srq *nsrq;
};
enum {
@ -57,6 +61,7 @@ struct nvmet_rdma_rsp {
struct nvmet_rdma_queue *queue;
struct ib_cqe read_cqe;
struct ib_cqe write_cqe;
struct rdma_rw_ctx rw;
struct nvmet_req req;
@ -83,6 +88,7 @@ struct nvmet_rdma_queue {
struct ib_cq *cq;
atomic_t sq_wr_avail;
struct nvmet_rdma_device *dev;
struct nvmet_rdma_srq *nsrq;
spinlock_t state_lock;
enum nvmet_rdma_queue_state state;
struct nvmet_cq nvme_cq;
@ -100,6 +106,7 @@ struct nvmet_rdma_queue {
int idx;
int host_qid;
int comp_vector;
int recv_queue_size;
int send_queue_size;
@ -113,11 +120,17 @@ struct nvmet_rdma_port {
struct delayed_work repair_work;
};
struct nvmet_rdma_srq {
struct ib_srq *srq;
struct nvmet_rdma_cmd *cmds;
struct nvmet_rdma_device *ndev;
};
struct nvmet_rdma_device {
struct ib_device *device;
struct ib_pd *pd;
struct ib_srq *srq;
struct nvmet_rdma_cmd *srq_cmds;
struct nvmet_rdma_srq **srqs;
int srq_count;
size_t srq_size;
struct kref ref;
struct list_head entry;
@ -129,6 +142,16 @@ static bool nvmet_rdma_use_srq;
module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
static int srq_size_set(const char *val, const struct kernel_param *kp);
static const struct kernel_param_ops srq_size_ops = {
.set = srq_size_set,
.get = param_get_int,
};
static int nvmet_rdma_srq_size = 1024;
module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644);
MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)");
static DEFINE_IDA(nvmet_rdma_queue_ida);
static LIST_HEAD(nvmet_rdma_queue_list);
static DEFINE_MUTEX(nvmet_rdma_queue_mutex);
@ -140,6 +163,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
@ -149,6 +173,17 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
static const struct nvmet_fabrics_ops nvmet_rdma_ops;
static int srq_size_set(const char *val, const struct kernel_param *kp)
{
int n = 0, ret;
ret = kstrtoint(val, 10, &n);
if (ret != 0 || n < 256)
return -EINVAL;
return param_set_int(val, kp);
}
static int num_pages(int len)
{
return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT);
@ -391,6 +426,9 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
/* Data In / RDMA READ */
r->read_cqe.done = nvmet_rdma_read_data_done;
/* Data Out / RDMA WRITE */
r->write_cqe.done = nvmet_rdma_write_data_done;
return 0;
out_free_rsp:
@ -466,8 +504,8 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
cmd->sge[0].addr, cmd->sge[0].length,
DMA_FROM_DEVICE);
if (ndev->srq)
ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
if (cmd->nsrq)
ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL);
else
ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
@ -500,6 +538,129 @@ static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
spin_unlock(&queue->rsp_wr_wait_lock);
}
static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr)
{
struct ib_mr_status mr_status;
int ret;
u16 status = 0;
ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
if (ret) {
pr_err("ib_check_mr_status failed, ret %d\n", ret);
return NVME_SC_INVALID_PI;
}
if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
switch (mr_status.sig_err.err_type) {
case IB_SIG_BAD_GUARD:
status = NVME_SC_GUARD_CHECK;
break;
case IB_SIG_BAD_REFTAG:
status = NVME_SC_REFTAG_CHECK;
break;
case IB_SIG_BAD_APPTAG:
status = NVME_SC_APPTAG_CHECK;
break;
}
pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
mr_status.sig_err.err_type,
mr_status.sig_err.expected,
mr_status.sig_err.actual);
}
return status;
}
static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi,
struct nvme_command *cmd, struct ib_sig_domain *domain,
u16 control, u8 pi_type)
{
domain->sig_type = IB_SIG_TYPE_T10_DIF;
domain->sig.dif.bg_type = IB_T10DIF_CRC;
domain->sig.dif.pi_interval = 1 << bi->interval_exp;
domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
if (control & NVME_RW_PRINFO_PRCHK_REF)
domain->sig.dif.ref_remap = true;
domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
domain->sig.dif.app_escape = true;
if (pi_type == NVME_NS_DPS_PI_TYPE3)
domain->sig.dif.ref_escape = true;
}
static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req,
struct ib_sig_attrs *sig_attrs)
{
struct nvme_command *cmd = req->cmd;
u16 control = le16_to_cpu(cmd->rw.control);
u8 pi_type = req->ns->pi_type;
struct blk_integrity *bi;
bi = bdev_get_integrity(req->ns->bdev);
memset(sig_attrs, 0, sizeof(*sig_attrs));
if (control & NVME_RW_PRINFO_PRACT) {
/* for WRITE_INSERT/READ_STRIP no wire domain */
sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
pi_type);
/* Clear the PRACT bit since HCA will generate/verify the PI */
control &= ~NVME_RW_PRINFO_PRACT;
cmd->rw.control = cpu_to_le16(control);
/* PI is added by the HW */
req->transfer_len += req->metadata_len;
} else {
/* for WRITE_PASS/READ_PASS both wire/memory domains exist */
nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
pi_type);
nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
pi_type);
}
if (control & NVME_RW_PRINFO_PRCHK_REF)
sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG;
if (control & NVME_RW_PRINFO_PRCHK_GUARD)
sig_attrs->check_mask |= IB_SIG_CHECK_GUARD;
if (control & NVME_RW_PRINFO_PRCHK_APP)
sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG;
}
static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key,
struct ib_sig_attrs *sig_attrs)
{
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
struct nvmet_req *req = &rsp->req;
int ret;
if (req->metadata_len)
ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp,
cm_id->port_num, req->sg, req->sg_cnt,
req->metadata_sg, req->metadata_sg_cnt, sig_attrs,
addr, key, nvmet_data_dir(req));
else
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
req->sg, req->sg_cnt, 0, addr, key,
nvmet_data_dir(req));
return ret;
}
static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp)
{
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
struct nvmet_req *req = &rsp->req;
if (req->metadata_len)
rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp,
cm_id->port_num, req->sg, req->sg_cnt,
req->metadata_sg, req->metadata_sg_cnt,
nvmet_data_dir(req));
else
rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num,
req->sg, req->sg_cnt, nvmet_data_dir(req));
}
static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
{
@ -507,14 +668,11 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
if (rsp->n_rdma) {
rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
queue->cm_id->port_num, rsp->req.sg,
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
}
if (rsp->n_rdma)
nvmet_rdma_rw_ctx_destroy(rsp);
if (rsp->req.sg != rsp->cmd->inline_sg)
nvmet_req_free_sgl(&rsp->req);
nvmet_req_free_sgls(&rsp->req);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
nvmet_rdma_process_wr_wait_list(queue);
@ -566,11 +724,16 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
rsp->send_wr.opcode = IB_WR_SEND;
}
if (nvmet_rdma_need_data_out(rsp))
first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
cm_id->port_num, NULL, &rsp->send_wr);
else
if (nvmet_rdma_need_data_out(rsp)) {
if (rsp->req.metadata_len)
first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
cm_id->port_num, &rsp->write_cqe, NULL);
else
first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
cm_id->port_num, NULL, &rsp->send_wr);
} else {
first_wr = &rsp->send_wr;
}
nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
@ -589,15 +752,14 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
struct nvmet_rdma_queue *queue = cq->cq_context;
u16 status = 0;
WARN_ON(rsp->n_rdma <= 0);
atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
queue->cm_id->port_num, rsp->req.sg,
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
rsp->n_rdma = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
nvmet_rdma_rw_ctx_destroy(rsp);
nvmet_req_uninit(&rsp->req);
nvmet_rdma_release_rsp(rsp);
if (wc->status != IB_WC_WR_FLUSH_ERR) {
@ -608,7 +770,58 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
rsp->req.execute(&rsp->req);
if (rsp->req.metadata_len)
status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
nvmet_rdma_rw_ctx_destroy(rsp);
if (unlikely(status))
nvmet_req_complete(&rsp->req, status);
else
rsp->req.execute(&rsp->req);
}
static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
struct nvmet_rdma_queue *queue = cq->cq_context;
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u16 status;
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
return;
WARN_ON(rsp->n_rdma <= 0);
atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
rsp->n_rdma = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
nvmet_rdma_rw_ctx_destroy(rsp);
nvmet_req_uninit(&rsp->req);
nvmet_rdma_release_rsp(rsp);
if (wc->status != IB_WC_WR_FLUSH_ERR) {
pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n",
wc->wr_cqe, ib_wc_status_msg(wc->status),
wc->status);
nvmet_rdma_error_comp(queue);
}
return;
}
/*
* Upon RDMA completion check the signature status
* - if succeeded send good NVMe response
* - if failed send bad NVMe response with appropriate error
*/
status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
if (unlikely(status))
rsp->req.cqe->status = cpu_to_le16(status << 1);
nvmet_rdma_rw_ctx_destroy(rsp);
if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) {
pr_err("sending cmd response failed\n");
nvmet_rdma_release_rsp(rsp);
}
}
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
@ -665,9 +878,9 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
struct nvme_keyed_sgl_desc *sgl, bool invalidate)
{
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u64 addr = le64_to_cpu(sgl->addr);
u32 key = get_unaligned_le32(sgl->key);
struct ib_sig_attrs sig_attrs;
int ret;
rsp->req.transfer_len = get_unaligned_le24(sgl->length);
@ -676,13 +889,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
if (!rsp->req.transfer_len)
return 0;
ret = nvmet_req_alloc_sgl(&rsp->req);
if (rsp->req.metadata_len)
nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs);
ret = nvmet_req_alloc_sgls(&rsp->req);
if (unlikely(ret < 0))
goto error_out;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
nvmet_data_dir(&rsp->req));
ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs);
if (unlikely(ret < 0))
goto error_out;
rsp->n_rdma += ret;
@ -845,23 +1059,40 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
nvmet_rdma_handle_command(queue, rsp);
}
static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev)
static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq)
{
if (!ndev->srq)
return;
nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size,
false);
ib_destroy_srq(nsrq->srq);
nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
ib_destroy_srq(ndev->srq);
kfree(nsrq);
}
static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev)
{
int i;
if (!ndev->srqs)
return;
for (i = 0; i < ndev->srq_count; i++)
nvmet_rdma_destroy_srq(ndev->srqs[i]);
kfree(ndev->srqs);
}
static struct nvmet_rdma_srq *
nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
{
struct ib_srq_init_attr srq_attr = { NULL, };
size_t srq_size = ndev->srq_size;
struct nvmet_rdma_srq *nsrq;
struct ib_srq *srq;
size_t srq_size;
int ret, i;
srq_size = 4095; /* XXX: tune */
nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL);
if (!nsrq)
return ERR_PTR(-ENOMEM);
srq_attr.attr.max_wr = srq_size;
srq_attr.attr.max_sge = 1 + ndev->inline_page_count;
@ -869,6 +1100,42 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
srq_attr.srq_type = IB_SRQT_BASIC;
srq = ib_create_srq(ndev->pd, &srq_attr);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
goto out_free;
}
nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
if (IS_ERR(nsrq->cmds)) {
ret = PTR_ERR(nsrq->cmds);
goto out_destroy_srq;
}
nsrq->srq = srq;
nsrq->ndev = ndev;
for (i = 0; i < srq_size; i++) {
nsrq->cmds[i].nsrq = nsrq;
ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]);
if (ret)
goto out_free_cmds;
}
return nsrq;
out_free_cmds:
nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false);
out_destroy_srq:
ib_destroy_srq(srq);
out_free:
kfree(nsrq);
return ERR_PTR(ret);
}
static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev)
{
int i, ret;
if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) {
/*
* If SRQs aren't supported we just go ahead and use normal
* non-shared receive queues.
@ -877,27 +1144,29 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
return 0;
}
ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
if (IS_ERR(ndev->srq_cmds)) {
ret = PTR_ERR(ndev->srq_cmds);
goto out_destroy_srq;
}
ndev->srq_size = min(ndev->device->attrs.max_srq_wr,
nvmet_rdma_srq_size);
ndev->srq_count = min(ndev->device->num_comp_vectors,
ndev->device->attrs.max_srq);
ndev->srq = srq;
ndev->srq_size = srq_size;
ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL);
if (!ndev->srqs)
return -ENOMEM;
for (i = 0; i < srq_size; i++) {
ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
if (ret)
goto out_free_cmds;
for (i = 0; i < ndev->srq_count; i++) {
ndev->srqs[i] = nvmet_rdma_init_srq(ndev);
if (IS_ERR(ndev->srqs[i])) {
ret = PTR_ERR(ndev->srqs[i]);
goto err_srq;
}
}
return 0;
out_free_cmds:
nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
out_destroy_srq:
ib_destroy_srq(srq);
err_srq:
while (--i >= 0)
nvmet_rdma_destroy_srq(ndev->srqs[i]);
kfree(ndev->srqs);
return ret;
}
@ -910,7 +1179,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
list_del(&ndev->entry);
mutex_unlock(&device_list_mutex);
nvmet_rdma_destroy_srq(ndev);
nvmet_rdma_destroy_srqs(ndev);
ib_dealloc_pd(ndev->pd);
kfree(ndev);
@ -957,7 +1226,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
goto out_free_dev;
if (nvmet_rdma_use_srq) {
ret = nvmet_rdma_init_srq(ndev);
ret = nvmet_rdma_init_srqs(ndev);
if (ret)
goto out_free_pd;
}
@ -981,14 +1250,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
{
struct ib_qp_init_attr qp_attr;
struct nvmet_rdma_device *ndev = queue->dev;
int comp_vector, nr_cqe, ret, i, factor;
/*
* Spread the io queues across completion vectors,
* but still keep all admin queues on vector 0.
*/
comp_vector = !queue->host_qid ? 0 :
queue->idx % ndev->device->num_comp_vectors;
int nr_cqe, ret, i, factor;
/*
* Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND.
@ -996,7 +1258,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size;
queue->cq = ib_alloc_cq(ndev->device, queue,
nr_cqe + 1, comp_vector,
nr_cqe + 1, queue->comp_vector,
IB_POLL_WORKQUEUE);
if (IS_ERR(queue->cq)) {
ret = PTR_ERR(queue->cq);
@ -1020,14 +1282,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
ndev->device->attrs.max_send_sge);
if (ndev->srq) {
qp_attr.srq = ndev->srq;
if (queue->nsrq) {
qp_attr.srq = queue->nsrq->srq;
} else {
/* +1 for drain */
qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
}
if (queue->port->pi_enable && queue->host_qid)
qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
if (ret) {
pr_err("failed to create_qp ret= %d\n", ret);
@ -1041,7 +1306,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
__func__, queue->cq->cqe, qp_attr.cap.max_send_sge,
qp_attr.cap.max_send_wr, queue->cm_id);
if (!ndev->srq) {
if (!queue->nsrq) {
for (i = 0; i < queue->recv_queue_size; i++) {
queue->cmds[i].queue = queue;
ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
@ -1076,7 +1341,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
nvmet_sq_destroy(&queue->nvme_sq);
nvmet_rdma_destroy_queue_ib(queue);
if (!queue->dev->srq) {
if (!queue->nsrq) {
nvmet_rdma_free_cmds(queue->dev, queue->cmds,
queue->recv_queue_size,
!queue->host_qid);
@ -1146,6 +1411,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_rdma_queue *queue;
int ret;
@ -1172,6 +1438,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
queue->dev = ndev;
queue->cm_id = cm_id;
queue->port = port->nport;
spin_lock_init(&queue->state_lock);
queue->state = NVMET_RDMA_Q_CONNECTING;
@ -1188,13 +1455,23 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
goto out_destroy_sq;
}
/*
* Spread the io queues across completion vectors,
* but still keep all admin queues on vector 0.
*/
queue->comp_vector = !queue->host_qid ? 0 :
queue->idx % ndev->device->num_comp_vectors;
ret = nvmet_rdma_alloc_rsps(queue);
if (ret) {
ret = NVME_RDMA_CM_NO_RSC;
goto out_ida_remove;
}
if (!ndev->srq) {
if (ndev->srqs) {
queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count];
} else {
queue->cmds = nvmet_rdma_alloc_cmds(ndev,
queue->recv_queue_size,
!queue->host_qid);
@ -1215,7 +1492,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
return queue;
out_free_cmds:
if (!ndev->srq) {
if (!queue->nsrq) {
nvmet_rdma_free_cmds(queue->dev, queue->cmds,
queue->recv_queue_size,
!queue->host_qid);
@ -1241,6 +1518,10 @@ static void nvmet_rdma_qp_event(struct ib_event *event, void *priv)
case IB_EVENT_COMM_EST:
rdma_notify(queue->cm_id, event->event);
break;
case IB_EVENT_QP_LAST_WQE_REACHED:
pr_debug("received last WQE reached event for queue=0x%p\n",
queue);
break;
default:
pr_err("received IB QP event: %s (%d)\n",
ib_event_msg(event->event), event->event);
@ -1275,7 +1556,6 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_rdma_device *ndev;
struct nvmet_rdma_queue *queue;
int ret = -EINVAL;
@ -1291,7 +1571,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
ret = -ENOMEM;
goto put_device;
}
queue->port = port->nport;
if (queue->host_qid == 0) {
/* Let inflight controller teardown complete */
@ -1563,6 +1842,14 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
goto out_destroy_id;
}
if (port->nport->pi_enable &&
!(cm_id->device->attrs.device_cap_flags &
IB_DEVICE_INTEGRITY_HANDOVER)) {
pr_err("T10-PI is not supported for %pISpcs\n", addr);
ret = -EINVAL;
goto out_destroy_id;
}
port->cm_id = cm_id;
return 0;
@ -1672,6 +1959,8 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
{
if (ctrl->pi_support)
return NVMET_RDMA_MAX_METADATA_MDTS;
return NVMET_RDMA_MAX_MDTS;
}
@ -1680,6 +1969,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.type = NVMF_TRTYPE_RDMA,
.msdbd = 1,
.has_keyed_sgls = 1,
.metadata_support = 1,
.add_port = nvmet_rdma_add_port,
.remove_port = nvmet_rdma_remove_port,
.queue_response = nvmet_rdma_queue_response,

Просмотреть файл

@ -325,6 +325,14 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
}
static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status)
{
if (status == -EPIPE || status == -ECONNRESET)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
else
nvmet_tcp_fatal_error(queue);
}
static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
{
struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
@ -510,7 +518,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu),
offset_in_page(cmd->data_pdu) + cmd->offset,
left, MSG_DONTWAIT | MSG_MORE);
left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
if (ret <= 0)
return ret;
@ -538,7 +546,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
if ((!last_in_batch && cmd->queue->send_list_len) ||
cmd->wbytes_done + left < cmd->req.transfer_len ||
queue->data_digest || !queue->nvme_sq.sqhd_disabled)
flags |= MSG_MORE;
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
left, flags);
@ -585,7 +593,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
flags |= MSG_MORE;
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
@ -614,7 +622,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
flags |= MSG_MORE;
flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
@ -644,6 +652,8 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
if (!last_in_batch && cmd->queue->send_list_len)
msg.msg_flags |= MSG_MORE;
else
msg.msg_flags |= MSG_EOR;
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0))
@ -716,11 +726,15 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
for (i = 0; i < budget; i++) {
ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
if (ret <= 0)
if (unlikely(ret < 0)) {
nvmet_tcp_socket_error(queue, ret);
goto done;
} else if (ret == 0) {
break;
}
(*sends)++;
}
done:
return ret;
}
@ -1157,11 +1171,15 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
for (i = 0; i < budget; i++) {
ret = nvmet_tcp_try_recv_one(queue);
if (ret <= 0)
if (unlikely(ret < 0)) {
nvmet_tcp_socket_error(queue, ret);
goto done;
} else if (ret == 0) {
break;
}
(*recvs)++;
}
done:
return ret;
}
@ -1186,27 +1204,16 @@ static void nvmet_tcp_io_work(struct work_struct *w)
pending = false;
ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
if (ret > 0) {
if (ret > 0)
pending = true;
} else if (ret < 0) {
if (ret == -EPIPE || ret == -ECONNRESET)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
else
nvmet_tcp_fatal_error(queue);
else if (ret < 0)
return;
}
ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
if (ret > 0) {
/* transmitted message/data */
if (ret > 0)
pending = true;
} else if (ret < 0) {
if (ret == -EPIPE || ret == -ECONNRESET)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
else
nvmet_tcp_fatal_error(queue);
else if (ret < 0)
return;
}
} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);

Просмотреть файл

@ -130,6 +130,34 @@ TRACE_EVENT(nvmet_req_complete,
);
#define aer_name(aer) { aer, #aer }
TRACE_EVENT(nvmet_async_event,
TP_PROTO(struct nvmet_ctrl *ctrl, __le32 result),
TP_ARGS(ctrl, result),
TP_STRUCT__entry(
__field(int, ctrl_id)
__field(u32, result)
),
TP_fast_assign(
__entry->ctrl_id = ctrl->cntlid;
__entry->result = (le32_to_cpu(result) & 0xff00) >> 8;
),
TP_printk("nvmet%d: NVME_AEN=%#08x [%s]",
__entry->ctrl_id, __entry->result,
__print_symbolic(__entry->result,
aer_name(NVME_AER_NOTICE_NS_CHANGED),
aer_name(NVME_AER_NOTICE_ANA),
aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
aer_name(NVME_AER_NOTICE_DISC_CHANGED),
aer_name(NVME_AER_ERROR),
aer_name(NVME_AER_SMART),
aer_name(NVME_AER_CSS),
aer_name(NVME_AER_VS))
)
);
#undef aer_name
#endif /* _TRACE_NVMET_H */
#undef TRACE_INCLUDE_PATH

Просмотреть файл

@ -22,6 +22,7 @@
#include <asm/schid.h>
#include <asm/cmb.h>
#include <linux/uaccess.h>
#include <linux/dasd_mod.h>
/* This is ugly... */
#define PRINTK_HEADER "dasd_ioctl:"
@ -457,10 +458,9 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
/*
* Return dasd information. Used for BIODASDINFO and BIODASDINFO2.
*/
static int dasd_ioctl_information(struct dasd_block *block,
unsigned int cmd, void __user *argp)
static int __dasd_ioctl_information(struct dasd_block *block,
struct dasd_information2_t *dasd_info)
{
struct dasd_information2_t *dasd_info;
struct subchannel_id sch_id;
struct ccw_dev_id dev_id;
struct dasd_device *base;
@ -473,15 +473,9 @@ static int dasd_ioctl_information(struct dasd_block *block,
if (!base->discipline || !base->discipline->fill_info)
return -EINVAL;
dasd_info = kzalloc(sizeof(struct dasd_information2_t), GFP_KERNEL);
if (dasd_info == NULL)
return -ENOMEM;
rc = base->discipline->fill_info(base, dasd_info);
if (rc) {
kfree(dasd_info);
if (rc)
return rc;
}
cdev = base->cdev;
ccw_device_get_id(cdev, &dev_id);
@ -520,15 +514,24 @@ static int dasd_ioctl_information(struct dasd_block *block,
list_for_each(l, &base->ccw_queue)
dasd_info->chanq_len++;
spin_unlock_irqrestore(&block->queue_lock, flags);
return 0;
}
rc = 0;
if (copy_to_user(argp, dasd_info,
((cmd == (unsigned int) BIODASDINFO2) ?
sizeof(struct dasd_information2_t) :
sizeof(struct dasd_information_t))))
rc = -EFAULT;
static int dasd_ioctl_information(struct dasd_block *block, void __user *argp,
size_t copy_size)
{
struct dasd_information2_t *dasd_info;
int error;
dasd_info = kzalloc(sizeof(*dasd_info), GFP_KERNEL);
if (!dasd_info)
return -ENOMEM;
error = __dasd_ioctl_information(block, dasd_info);
if (!error && copy_to_user(argp, dasd_info, copy_size))
error = -EFAULT;
kfree(dasd_info);
return rc;
return error;
}
/*
@ -622,10 +625,12 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode,
rc = dasd_ioctl_check_format(bdev, argp);
break;
case BIODASDINFO:
rc = dasd_ioctl_information(block, cmd, argp);
rc = dasd_ioctl_information(block, argp,
sizeof(struct dasd_information_t));
break;
case BIODASDINFO2:
rc = dasd_ioctl_information(block, cmd, argp);
rc = dasd_ioctl_information(block, argp,
sizeof(struct dasd_information2_t));
break;
case BIODASDPRRD:
rc = dasd_ioctl_read_profile(block, argp);
@ -660,3 +665,36 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode,
dasd_put_device(base);
return rc;
}
/**
* dasd_biodasdinfo() - fill out the dasd information structure
* @disk [in]: pointer to gendisk structure that references a DASD
* @info [out]: pointer to the dasd_information2_t structure
*
* Provide access to DASD specific information.
* The gendisk structure is checked if it belongs to the DASD driver by
* comparing the gendisk->fops pointer.
* If it does not belong to the DASD driver -EINVAL is returned.
* Otherwise the provided dasd_information2_t structure is filled out.
*
* Returns:
* %0 on success and a negative error value on failure.
*/
int dasd_biodasdinfo(struct gendisk *disk, struct dasd_information2_t *info)
{
struct dasd_device *base;
int error;
if (disk->fops != &dasd_device_operations)
return -EINVAL;
base = dasd_device_from_gendisk(disk);
if (!base)
return -ENODEV;
error = __dasd_ioctl_information(base->block, info);
dasd_put_device(base);
return error;
}
/* export that symbol_get in partition detection is possible */
EXPORT_SYMBOL_GPL(dasd_biodasdinfo);

Просмотреть файл

@ -143,7 +143,7 @@ struct lpfc_dmabuf {
struct lpfc_nvmet_ctxbuf {
struct list_head list;
struct lpfc_nvmet_rcv_ctx *context;
struct lpfc_async_xchg_ctx *context;
struct lpfc_iocbq *iocbq;
struct lpfc_sglq *sglq;
struct work_struct defer_work;

Просмотреть файл

@ -37,8 +37,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
#include <linux/nvme-fc-driver.h>
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@ -48,7 +46,6 @@
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
#include "lpfc_nvmet.h"
#include "lpfc_logmsg.h"
#include "lpfc_version.h"
#include "lpfc_compat.h"

Просмотреть файл

@ -24,7 +24,6 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *);
struct fc_rport;
struct fc_frame_header;
struct lpfc_nvmet_rcv_ctx;
void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
void lpfc_sli_read_link_ste(struct lpfc_hba *);
void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t);
@ -564,12 +563,16 @@ void lpfc_nvme_update_localport(struct lpfc_vport *vport);
int lpfc_nvmet_create_targetport(struct lpfc_hba *phba);
int lpfc_nvmet_update_targetport(struct lpfc_hba *phba);
void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba);
void lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba,
struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb);
int lpfc_nvme_handle_lsreq(struct lpfc_hba *phba,
struct lpfc_async_xchg_ctx *axchg);
int lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba,
struct lpfc_async_xchg_ctx *axchg);
void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint32_t idx,
struct rqb_dmabuf *nvmebuf, uint64_t isr_ts,
uint8_t cqflag);
void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
void lpfc_nvmet_invalidate_host(struct lpfc_hba *phba,
struct lpfc_nodelist *ndlp);
void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
struct lpfc_iocbq *cmdiocb,
struct lpfc_wcqe_complete *abts_cmpl);

Просмотреть файл

@ -44,7 +44,6 @@
#include "lpfc_disc.h"
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
#include "lpfc_logmsg.h"
#include "lpfc_crtn.h"
#include "lpfc_version.h"

Просмотреть файл

@ -39,8 +39,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
#include <linux/nvme-fc-driver.h>
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@ -50,7 +48,6 @@
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
#include "lpfc_nvmet.h"
#include "lpfc_logmsg.h"
#include "lpfc_crtn.h"
#include "lpfc_vport.h"
@ -1035,7 +1032,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
{
struct lpfc_hba *phba = vport->phba;
struct lpfc_nvmet_tgtport *tgtp;
struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
struct lpfc_async_xchg_ctx *ctxp, *next_ctxp;
struct nvme_fc_local_port *localport;
struct lpfc_fc4_ctrl_stat *cstat;
struct lpfc_nvme_lport *lport;

Просмотреть файл

@ -36,8 +36,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
#include <linux/nvme-fc-driver.h>
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_nl.h"
@ -825,6 +823,12 @@ lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove)
if ((phba->sli_rev < LPFC_SLI_REV4) &&
(!remove && ndlp->nlp_type & NLP_FABRIC))
continue;
/* Notify transport of connectivity loss to trigger cleanup. */
if (phba->nvmet_support &&
ndlp->nlp_state == NLP_STE_UNMAPPED_NODE)
lpfc_nvmet_invalidate_host(phba, ndlp);
lpfc_disc_state_machine(vport, ndlp, NULL,
remove
? NLP_EVT_DEVICE_RM

Просмотреть файл

@ -50,8 +50,6 @@
#include <scsi/scsi_tcq.h>
#include <scsi/fc/fc_fs.h>
#include <linux/nvme-fc-driver.h>
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@ -61,7 +59,6 @@
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
#include "lpfc_nvmet.h"
#include "lpfc_logmsg.h"
#include "lpfc_crtn.h"
#include "lpfc_vport.h"
@ -1032,7 +1029,7 @@ static int
lpfc_hba_down_post_s4(struct lpfc_hba *phba)
{
struct lpfc_io_buf *psb, *psb_next;
struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next;
struct lpfc_async_xchg_ctx *ctxp, *ctxp_next;
struct lpfc_sli4_hdw_queue *qp;
LIST_HEAD(aborts);
LIST_HEAD(nvme_aborts);
@ -1099,7 +1096,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
&nvmet_aborts);
spin_unlock_irq(&phba->sli4_hba.abts_nvmet_buf_list_lock);
list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
ctxp->flag &= ~(LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP);
lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
}
}

Просмотреть файл

@ -31,8 +31,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
#include <linux/nvme-fc-driver.h>
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@ -41,8 +39,6 @@
#include "lpfc_disc.h"
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
#include "lpfc_nvmet.h"
#include "lpfc_crtn.h"
#include "lpfc_logmsg.h"

Просмотреть файл

@ -32,8 +32,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
#include <linux/nvme-fc-driver.h>
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@ -491,6 +489,11 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
(unsigned long long)
wwn_to_u64(sp->portName.u.wwn));
/* Notify transport of connectivity loss to trigger cleanup. */
if (phba->nvmet_support &&
ndlp->nlp_state == NLP_STE_UNMAPPED_NODE)
lpfc_nvmet_invalidate_host(phba, ndlp);
ndlp->nlp_prev_state = ndlp->nlp_state;
/* rport needs to be unregistered first */
lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
@ -841,6 +844,12 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL);
else
lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL);
/* Notify transport of connectivity loss to trigger cleanup. */
if (phba->nvmet_support &&
ndlp->nlp_state == NLP_STE_UNMAPPED_NODE)
lpfc_nvmet_invalidate_host(phba, ndlp);
if (ndlp->nlp_DID == Fabric_DID) {
if (vport->port_state <= LPFC_FDISC)
goto out;

Просмотреть файл

@ -36,9 +36,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
#include <linux/nvme.h>
#include <linux/nvme-fc-driver.h>
#include <linux/nvme-fc.h>
#include "lpfc_version.h"
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
@ -396,18 +393,131 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport)
return;
}
/**
* lpfc_nvme_handle_lsreq - Process an unsolicited NVME LS request
* @phba: pointer to lpfc hba data structure.
* @axchg: pointer to exchange context for the NVME LS request
*
* This routine is used for processing an asychronously received NVME LS
* request. Any remaining validation is done and the LS is then forwarded
* to the nvme-fc transport via nvme_fc_rcv_ls_req().
*
* The calling sequence should be: nvme_fc_rcv_ls_req() -> (processing)
* -> lpfc_nvme_xmt_ls_rsp/cmp -> req->done.
* __lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg.
*
* Returns 0 if LS was handled and delivered to the transport
* Returns 1 if LS failed to be handled and should be dropped
*/
int
lpfc_nvme_handle_lsreq(struct lpfc_hba *phba,
struct lpfc_async_xchg_ctx *axchg)
{
#if (IS_ENABLED(CONFIG_NVME_FC))
struct lpfc_vport *vport;
struct lpfc_nvme_rport *lpfc_rport;
struct nvme_fc_remote_port *remoteport;
struct lpfc_nvme_lport *lport;
uint32_t *payload = axchg->payload;
int rc;
vport = axchg->ndlp->vport;
lpfc_rport = axchg->ndlp->nrport;
if (!lpfc_rport)
return -EINVAL;
remoteport = lpfc_rport->remoteport;
if (!vport->localport)
return -EINVAL;
lport = vport->localport->private;
if (!lport)
return -EINVAL;
rc = nvme_fc_rcv_ls_req(remoteport, &axchg->ls_rsp, axchg->payload,
axchg->size);
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
"6205 NVME Unsol rcv: sz %d rc %d: %08x %08x %08x "
"%08x %08x %08x\n",
axchg->size, rc,
*payload, *(payload+1), *(payload+2),
*(payload+3), *(payload+4), *(payload+5));
if (!rc)
return 0;
#endif
return 1;
}
/**
* __lpfc_nvme_ls_req_cmp - Generic completion handler for a NVME
* LS request.
* @phba: Pointer to HBA context object
* @vport: The local port that issued the LS
* @cmdwqe: Pointer to driver command WQE object.
* @wcqe: Pointer to driver response CQE object.
*
* This function is the generic completion handler for NVME LS requests.
* The function updates any states and statistics, calls the transport
* ls_req done() routine, then tears down the command and buffers used
* for the LS request.
**/
void
__lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport,
struct lpfc_iocbq *cmdwqe,
struct lpfc_wcqe_complete *wcqe)
{
struct nvmefc_ls_req *pnvme_lsreq;
struct lpfc_dmabuf *buf_ptr;
struct lpfc_nodelist *ndlp;
uint32_t status;
pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2;
ndlp = (struct lpfc_nodelist *)cmdwqe->context1;
status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
"6047 NVMEx LS REQ %px cmpl DID %x Xri: %x "
"status %x reason x%x cmd:x%px lsreg:x%px bmp:x%px "
"ndlp:x%px\n",
pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
cmdwqe->sli4_xritag, status,
(wcqe->parameter & 0xffff),
cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp);
lpfc_nvmeio_data(phba, "NVMEx LS CMPL: xri x%x stat x%x parm x%x\n",
cmdwqe->sli4_xritag, status, wcqe->parameter);
if (cmdwqe->context3) {
buf_ptr = (struct lpfc_dmabuf *)cmdwqe->context3;
lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
kfree(buf_ptr);
cmdwqe->context3 = NULL;
}
if (pnvme_lsreq->done)
pnvme_lsreq->done(pnvme_lsreq, status);
else
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
"6046 NVMEx cmpl without done call back? "
"Data %px DID %x Xri: %x status %x\n",
pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
cmdwqe->sli4_xritag, status);
if (ndlp) {
lpfc_nlp_put(ndlp);
cmdwqe->context1 = NULL;
}
lpfc_sli_release_iocbq(phba, cmdwqe);
}
static void
lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
struct lpfc_wcqe_complete *wcqe)
{
struct lpfc_vport *vport = cmdwqe->vport;
struct lpfc_nvme_lport *lport;
uint32_t status;
struct nvmefc_ls_req *pnvme_lsreq;
struct lpfc_dmabuf *buf_ptr;
struct lpfc_nodelist *ndlp;
pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2;
status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
if (vport->localport) {
@ -422,38 +532,7 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
}
}
ndlp = (struct lpfc_nodelist *)cmdwqe->context1;
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
"6047 nvme cmpl Enter "
"Data %px DID %x Xri: %x status %x reason x%x "
"cmd:x%px lsreg:x%px bmp:x%px ndlp:x%px\n",
pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
cmdwqe->sli4_xritag, status,
(wcqe->parameter & 0xffff),
cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp);
lpfc_nvmeio_data(phba, "NVME LS CMPL: xri x%x stat x%x parm x%x\n",
cmdwqe->sli4_xritag, status, wcqe->parameter);
if (cmdwqe->context3) {
buf_ptr = (struct lpfc_dmabuf *)cmdwqe->context3;
lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
kfree(buf_ptr);
cmdwqe->context3 = NULL;
}
if (pnvme_lsreq->done)
pnvme_lsreq->done(pnvme_lsreq, status);
else
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
"6046 nvme cmpl without done call back? "
"Data %px DID %x Xri: %x status %x\n",
pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
cmdwqe->sli4_xritag, status);
if (ndlp) {
lpfc_nlp_put(ndlp);
cmdwqe->context1 = NULL;
}
lpfc_sli_release_iocbq(phba, cmdwqe);
__lpfc_nvme_ls_req_cmp(phba, vport, cmdwqe, wcqe);
}
static int
@ -557,13 +636,6 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
/* Issue GEN REQ WQE for NPORT <did> */
lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
"6050 Issue GEN REQ WQE to NPORT x%x "
"Data: x%x x%x wq:x%px lsreq:x%px bmp:x%px "
"xmit:%d 1st:%d\n",
ndlp->nlp_DID, genwqe->iotag,
vport->port_state,
genwqe, pnvme_lsreq, bmp, xmit_len, first_len);
genwqe->wqe_cmpl = cmpl;
genwqe->iocb_cmpl = NULL;
genwqe->drvrTimeout = tmo + LPFC_DRVR_TIMEOUT;
@ -575,105 +647,108 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
rc = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], genwqe);
if (rc) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC | LOG_ELS,
"6045 Issue GEN REQ WQE to NPORT x%x "
"Data: x%x x%x\n",
"Data: x%x x%x rc x%x\n",
ndlp->nlp_DID, genwqe->iotag,
vport->port_state);
vport->port_state, rc);
lpfc_sli_release_iocbq(phba, genwqe);
return 1;
}
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_ELS,
"6050 Issue GEN REQ WQE to NPORT x%x "
"Data: oxid: x%x state: x%x wq:x%px lsreq:x%px "
"bmp:x%px xmit:%d 1st:%d\n",
ndlp->nlp_DID, genwqe->sli4_xritag,
vport->port_state,
genwqe, pnvme_lsreq, bmp, xmit_len, first_len);
return 0;
}
/**
* lpfc_nvme_ls_req - Issue an Link Service request
* @lpfc_pnvme: Pointer to the driver's nvme instance data
* @lpfc_nvme_lport: Pointer to the driver's local port data
* @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
* __lpfc_nvme_ls_req - Generic service routine to issue an NVME LS request
* @vport: The local port issuing the LS
* @ndlp: The remote port to send the LS to
* @pnvme_lsreq: Pointer to LS request structure from the transport
*
* Driver registers this routine to handle any link service request
* from the nvme_fc transport to a remote nvme-aware port.
* Routine validates the ndlp, builds buffers and sends a GEN_REQUEST
* WQE to perform the LS operation.
*
* Return value :
* 0 - Success
* TODO: What are the failure codes.
* non-zero: various error codes, in form of -Exxx
**/
static int
lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
struct nvme_fc_remote_port *pnvme_rport,
struct nvmefc_ls_req *pnvme_lsreq)
int
__lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
struct nvmefc_ls_req *pnvme_lsreq,
void (*gen_req_cmp)(struct lpfc_hba *phba,
struct lpfc_iocbq *cmdwqe,
struct lpfc_wcqe_complete *wcqe))
{
int ret = 0;
struct lpfc_nvme_lport *lport;
struct lpfc_nvme_rport *rport;
struct lpfc_vport *vport;
struct lpfc_nodelist *ndlp;
struct ulp_bde64 *bpl;
struct lpfc_dmabuf *bmp;
struct ulp_bde64 *bpl;
int ret;
uint16_t ntype, nstate;
/* there are two dma buf in the request, actually there is one and
* the second one is just the start address + cmd size.
* Before calling lpfc_nvme_gen_req these buffers need to be wrapped
* in a lpfc_dmabuf struct. When freeing we just free the wrapper
* because the nvem layer owns the data bufs.
* We do not have to break these packets open, we don't care what is in
* them. And we do not have to look at the resonse data, we only care
* that we got a response. All of the caring is going to happen in the
* nvme-fc layer.
*/
lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
if (unlikely(!lport) || unlikely(!rport))
return -EINVAL;
vport = lport->vport;
if (vport->load_flag & FC_UNLOADING)
return -ENODEV;
/* Need the ndlp. It is stored in the driver's rport. */
ndlp = rport->ndlp;
if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
"6051 Remoteport x%px, rport has invalid ndlp. "
"Failing LS Req\n", pnvme_rport);
lpfc_printf_vlog(vport, KERN_ERR,
LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR,
"6051 NVMEx LS REQ: Bad NDLP x%px, Failing "
"LS Req\n",
ndlp);
return -ENODEV;
}
/* The remote node has to be a mapped nvme target or an
* unmapped nvme initiator or it's an error.
*/
ntype = ndlp->nlp_type;
nstate = ndlp->nlp_state;
if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) ||
(ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
"6088 DID x%06x not ready for "
"IO. State x%x, Type x%x\n",
pnvme_rport->port_id,
ndlp->nlp_state, ndlp->nlp_type);
lpfc_printf_vlog(vport, KERN_ERR,
LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR,
"6088 NVMEx LS REQ: Fail DID x%06x not "
"ready for IO. Type x%x, State x%x\n",
ndlp->nlp_DID, ntype, nstate);
return -ENODEV;
}
bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
/*
* there are two dma buf in the request, actually there is one and
* the second one is just the start address + cmd size.
* Before calling lpfc_nvme_gen_req these buffers need to be wrapped
* in a lpfc_dmabuf struct. When freeing we just free the wrapper
* because the nvem layer owns the data bufs.
* We do not have to break these packets open, we don't care what is
* in them. And we do not have to look at the resonse data, we only
* care that we got a response. All of the caring is going to happen
* in the nvme-fc layer.
*/
bmp = kmalloc(sizeof(*bmp), GFP_KERNEL);
if (!bmp) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
"6044 Could not find node for DID %x\n",
pnvme_rport->port_id);
return 2;
lpfc_printf_vlog(vport, KERN_ERR,
LOG_NVME_DISC | LOG_NVME_IOERR,
"6044 NVMEx LS REQ: Could not alloc LS buf "
"for DID %x\n",
ndlp->nlp_DID);
return -ENOMEM;
}
INIT_LIST_HEAD(&bmp->list);
bmp->virt = lpfc_mbuf_alloc(vport->phba, MEM_PRI, &(bmp->phys));
if (!bmp->virt) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
"6042 Could not find node for DID %x\n",
pnvme_rport->port_id);
lpfc_printf_vlog(vport, KERN_ERR,
LOG_NVME_DISC | LOG_NVME_IOERR,
"6042 NVMEx LS REQ: Could not alloc mbuf "
"for DID %x\n",
ndlp->nlp_DID);
kfree(bmp);
return 3;
return -ENOMEM;
}
INIT_LIST_HEAD(&bmp->list);
bpl = (struct ulp_bde64 *)bmp->virt;
bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rqstdma));
bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rqstdma));
@ -688,52 +763,180 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
bpl->tus.f.bdeSize = pnvme_lsreq->rsplen;
bpl->tus.w = le32_to_cpu(bpl->tus.w);
/* Expand print to include key fields. */
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
"6149 Issue LS Req to DID 0x%06x lport x%px, "
"rport x%px lsreq x%px rqstlen:%d rsplen:%d "
"%pad %pad\n",
ndlp->nlp_DID, pnvme_lport, pnvme_rport,
pnvme_lsreq, pnvme_lsreq->rqstlen,
pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
&pnvme_lsreq->rspdma);
"6149 NVMEx LS REQ: Issue to DID 0x%06x lsreq x%px, "
"rqstlen:%d rsplen:%d %pad %pad\n",
ndlp->nlp_DID, pnvme_lsreq, pnvme_lsreq->rqstlen,
pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
&pnvme_lsreq->rspdma);
atomic_inc(&lport->fc4NvmeLsRequests);
/* Hardcode the wait to 30 seconds. Connections are failing otherwise.
* This code allows it all to work.
*/
ret = lpfc_nvme_gen_req(vport, bmp, pnvme_lsreq->rqstaddr,
pnvme_lsreq, lpfc_nvme_cmpl_gen_req,
ndlp, 2, 30, 0);
pnvme_lsreq, gen_req_cmp, ndlp, 2,
LPFC_NVME_LS_TIMEOUT, 0);
if (ret != WQE_SUCCESS) {
atomic_inc(&lport->xmt_ls_err);
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
"6052 EXIT. issue ls wqe failed lport x%px, "
"rport x%px lsreq x%px Status %x DID %x\n",
pnvme_lport, pnvme_rport, pnvme_lsreq,
ret, ndlp->nlp_DID);
lpfc_printf_vlog(vport, KERN_ERR,
LOG_NVME_DISC | LOG_NVME_IOERR,
"6052 NVMEx REQ: EXIT. issue ls wqe failed "
"lsreq x%px Status %x DID %x\n",
pnvme_lsreq, ret, ndlp->nlp_DID);
lpfc_mbuf_free(vport->phba, bmp->virt, bmp->phys);
kfree(bmp);
return ret;
return -EIO;
}
/* Stub in routine and return 0 for now. */
return ret;
return 0;
}
/**
* lpfc_nvme_ls_abort - Issue an Link Service request
* @lpfc_pnvme: Pointer to the driver's nvme instance data
* @lpfc_nvme_lport: Pointer to the driver's local port data
* @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
* lpfc_nvme_ls_req - Issue an NVME Link Service request
* @lpfc_nvme_lport: Transport localport that LS is to be issued from.
* @lpfc_nvme_rport: Transport remoteport that LS is to be sent to.
* @pnvme_lsreq - the transport nvme_ls_req structure for the LS
*
* Driver registers this routine to handle any link service request
* from the nvme_fc transport to a remote nvme-aware port.
*
* Return value :
* 0 - Success
* TODO: What are the failure codes.
* non-zero: various error codes, in form of -Exxx
**/
static int
lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
struct nvme_fc_remote_port *pnvme_rport,
struct nvmefc_ls_req *pnvme_lsreq)
{
struct lpfc_nvme_lport *lport;
struct lpfc_nvme_rport *rport;
struct lpfc_vport *vport;
int ret;
lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
if (unlikely(!lport) || unlikely(!rport))
return -EINVAL;
vport = lport->vport;
if (vport->load_flag & FC_UNLOADING)
return -ENODEV;
atomic_inc(&lport->fc4NvmeLsRequests);
ret = __lpfc_nvme_ls_req(vport, rport->ndlp, pnvme_lsreq,
lpfc_nvme_ls_req_cmp);
if (ret)
atomic_inc(&lport->xmt_ls_err);
return ret;
}
/**
* __lpfc_nvme_ls_abort - Generic service routine to abort a prior
* NVME LS request
* @vport: The local port that issued the LS
* @ndlp: The remote port the LS was sent to
* @pnvme_lsreq: Pointer to LS request structure from the transport
*
* The driver validates the ndlp, looks for the LS, and aborts the
* LS if found.
*
* Returns:
* 0 : if LS found and aborted
* non-zero: various error conditions in form -Exxx
**/
int
__lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
struct nvmefc_ls_req *pnvme_lsreq)
{
struct lpfc_hba *phba = vport->phba;
struct lpfc_sli_ring *pring;
struct lpfc_iocbq *wqe, *next_wqe;
bool foundit = false;
if (!ndlp) {
lpfc_printf_log(phba, KERN_ERR,
LOG_NVME_DISC | LOG_NODE |
LOG_NVME_IOERR | LOG_NVME_ABTS,
"6049 NVMEx LS REQ Abort: Bad NDLP x%px DID "
"x%06x, Failing LS Req\n",
ndlp, ndlp ? ndlp->nlp_DID : 0);
return -EINVAL;
}
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS,
"6040 NVMEx LS REQ Abort: Issue LS_ABORT for lsreq "
"x%p rqstlen:%d rsplen:%d %pad %pad\n",
pnvme_lsreq, pnvme_lsreq->rqstlen,
pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
&pnvme_lsreq->rspdma);
/*
* Lock the ELS ring txcmplq and look for the wqe that matches
* this ELS. If found, issue an abort on the wqe.
*/
pring = phba->sli4_hba.nvmels_wq->pring;
spin_lock_irq(&phba->hbalock);
spin_lock(&pring->ring_lock);
list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) {
if (wqe->context2 == pnvme_lsreq) {
wqe->iocb_flag |= LPFC_DRIVER_ABORTED;
foundit = true;
break;
}
}
spin_unlock(&pring->ring_lock);
if (foundit)
lpfc_sli_issue_abort_iotag(phba, pring, wqe);
spin_unlock_irq(&phba->hbalock);
if (foundit)
return 0;
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS,
"6213 NVMEx LS REQ Abort: Unable to locate req x%p\n",
pnvme_lsreq);
return -EINVAL;
}
static int
lpfc_nvme_xmt_ls_rsp(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
struct nvmefc_ls_rsp *ls_rsp)
{
struct lpfc_async_xchg_ctx *axchg =
container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp);
struct lpfc_nvme_lport *lport;
int rc;
if (axchg->phba->pport->load_flag & FC_UNLOADING)
return -ENODEV;
lport = (struct lpfc_nvme_lport *)localport->private;
rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, __lpfc_nvme_xmt_ls_rsp_cmp);
if (rc) {
/*
* unless the failure is due to having already sent
* the response, an abort will be generated for the
* exchange if the rsp can't be sent.
*/
if (rc != -EALREADY)
atomic_inc(&lport->xmt_ls_abort);
return rc;
}
return 0;
}
/**
* lpfc_nvme_ls_abort - Abort a prior NVME LS request
* @lpfc_nvme_lport: Transport localport that LS is to be issued from.
* @lpfc_nvme_rport: Transport remoteport that LS is to be sent to.
* @pnvme_lsreq - the transport nvme_ls_req structure for the LS
*
* Driver registers this routine to abort a NVME LS request that is
* in progress (from the transports perspective).
**/
static void
lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
@ -744,9 +947,7 @@ lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
struct lpfc_vport *vport;
struct lpfc_hba *phba;
struct lpfc_nodelist *ndlp;
LIST_HEAD(abort_list);
struct lpfc_sli_ring *pring;
struct lpfc_iocbq *wqe, *next_wqe;
int ret;
lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
if (unlikely(!lport))
@ -758,48 +959,10 @@ lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
return;
ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
if (!ndlp) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
"6049 Could not find node for DID %x\n",
pnvme_rport->port_id);
return;
}
/* Expand print to include key fields. */
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
"6040 ENTER. lport x%px, rport x%px lsreq x%px rqstlen:%d "
"rsplen:%d %pad %pad\n",
pnvme_lport, pnvme_rport,
pnvme_lsreq, pnvme_lsreq->rqstlen,
pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
&pnvme_lsreq->rspdma);
/*
* Lock the ELS ring txcmplq and build a local list of all ELS IOs
* that need an ABTS. The IOs need to stay on the txcmplq so that
* the abort operation completes them successfully.
*/
pring = phba->sli4_hba.nvmels_wq->pring;
spin_lock_irq(&phba->hbalock);
spin_lock(&pring->ring_lock);
list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) {
/* Add to abort_list on on NDLP match. */
if (lpfc_check_sli_ndlp(phba, pring, wqe, ndlp)) {
wqe->iocb_flag |= LPFC_DRIVER_ABORTED;
list_add_tail(&wqe->dlist, &abort_list);
}
}
spin_unlock(&pring->ring_lock);
spin_unlock_irq(&phba->hbalock);
/* Abort the targeted IOs and remove them from the abort list. */
list_for_each_entry_safe(wqe, next_wqe, &abort_list, dlist) {
ret = __lpfc_nvme_ls_abort(vport, ndlp, pnvme_lsreq);
if (!ret)
atomic_inc(&lport->xmt_ls_abort);
spin_lock_irq(&phba->hbalock);
list_del_init(&wqe->dlist);
lpfc_sli_issue_abort_iotag(phba, pring, wqe);
spin_unlock_irq(&phba->hbalock);
}
}
/* Fix up the existing sgls for NVME IO. */
@ -1911,6 +2074,7 @@ static struct nvme_fc_port_template lpfc_nvme_template = {
.fcp_io = lpfc_nvme_fcp_io_submit,
.ls_abort = lpfc_nvme_ls_abort,
.fcp_abort = lpfc_nvme_fcp_abort,
.xmt_ls_rsp = lpfc_nvme_xmt_ls_rsp,
.max_hw_queues = 1,
.max_sgl_segments = LPFC_NVME_DEFAULT_SEGS,
@ -2106,6 +2270,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
atomic_set(&lport->cmpl_fcp_err, 0);
atomic_set(&lport->cmpl_ls_xb, 0);
atomic_set(&lport->cmpl_ls_err, 0);
atomic_set(&lport->fc4NvmeLsRequests, 0);
atomic_set(&lport->fc4NvmeLsCmpls, 0);
}

Просмотреть файл

@ -21,6 +21,10 @@
* included with this package. *
********************************************************************/
#include <linux/nvme.h>
#include <linux/nvme-fc-driver.h>
#include <linux/nvme-fc.h>
#define LPFC_NVME_DEFAULT_SEGS (64 + 1) /* 256K IOs */
#define LPFC_NVME_ERSP_LEN 0x20
@ -74,3 +78,179 @@ struct lpfc_nvme_rport {
struct lpfc_nvme_fcpreq_priv {
struct lpfc_io_buf *nvme_buf;
};
/*
* set NVME LS request timeouts to 30s. It is larger than the 2*R_A_TOV
* set by the spec, which appears to have issues with some devices.
*/
#define LPFC_NVME_LS_TIMEOUT 30
#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */
#define LPFC_NVMET_RQE_MIN_POST 128
#define LPFC_NVMET_RQE_DEF_POST 512
#define LPFC_NVMET_RQE_DEF_COUNT 2048
#define LPFC_NVMET_SUCCESS_LEN 12
#define LPFC_NVMET_MRQ_AUTO 0
#define LPFC_NVMET_MRQ_MAX 16
#define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC)
/* Used for NVME Target */
#define LPFC_NVMET_INV_HOST_ACTIVE 1
struct lpfc_nvmet_tgtport {
struct lpfc_hba *phba;
struct completion *tport_unreg_cmp;
atomic_t state; /* tracks nvmet hosthandle invalidation */
/* Stats counters - lpfc_nvmet_unsol_ls_buffer */
atomic_t rcv_ls_req_in;
atomic_t rcv_ls_req_out;
atomic_t rcv_ls_req_drop;
atomic_t xmt_ls_abort;
atomic_t xmt_ls_abort_cmpl;
/* Stats counters - lpfc_nvmet_xmt_ls_rsp */
atomic_t xmt_ls_rsp;
atomic_t xmt_ls_drop;
/* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */
atomic_t xmt_ls_rsp_error;
atomic_t xmt_ls_rsp_aborted;
atomic_t xmt_ls_rsp_xb_set;
atomic_t xmt_ls_rsp_cmpl;
/* Stats counters - lpfc_nvmet_unsol_fcp_buffer */
atomic_t rcv_fcp_cmd_in;
atomic_t rcv_fcp_cmd_out;
atomic_t rcv_fcp_cmd_drop;
atomic_t rcv_fcp_cmd_defer;
atomic_t xmt_fcp_release;
/* Stats counters - lpfc_nvmet_xmt_fcp_op */
atomic_t xmt_fcp_drop;
atomic_t xmt_fcp_read_rsp;
atomic_t xmt_fcp_read;
atomic_t xmt_fcp_write;
atomic_t xmt_fcp_rsp;
/* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */
atomic_t xmt_fcp_rsp_xb_set;
atomic_t xmt_fcp_rsp_cmpl;
atomic_t xmt_fcp_rsp_error;
atomic_t xmt_fcp_rsp_aborted;
atomic_t xmt_fcp_rsp_drop;
/* Stats counters - lpfc_nvmet_xmt_fcp_abort */
atomic_t xmt_fcp_xri_abort_cqe;
atomic_t xmt_fcp_abort;
atomic_t xmt_fcp_abort_cmpl;
atomic_t xmt_abort_sol;
atomic_t xmt_abort_unsol;
atomic_t xmt_abort_rsp;
atomic_t xmt_abort_rsp_error;
/* Stats counters - defer IO */
atomic_t defer_ctx;
atomic_t defer_fod;
atomic_t defer_wqfull;
};
struct lpfc_nvmet_ctx_info {
struct list_head nvmet_ctx_list;
spinlock_t nvmet_ctx_list_lock; /* lock per CPU */
struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu;
struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu;
uint16_t nvmet_ctx_list_cnt;
char pad[16]; /* pad to a cache-line */
};
/* This retrieves the context info associated with the specified cpu / mrq */
#define lpfc_get_ctx_list(phba, cpu, mrq) \
(phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq))
/* Values for state field of struct lpfc_async_xchg_ctx */
#define LPFC_NVME_STE_LS_RCV 1
#define LPFC_NVME_STE_LS_ABORT 2
#define LPFC_NVME_STE_LS_RSP 3
#define LPFC_NVME_STE_RCV 4
#define LPFC_NVME_STE_DATA 5
#define LPFC_NVME_STE_ABORT 6
#define LPFC_NVME_STE_DONE 7
#define LPFC_NVME_STE_FREE 0xff
/* Values for flag field of struct lpfc_async_xchg_ctx */
#define LPFC_NVME_IO_INP 0x1 /* IO is in progress on exchange */
#define LPFC_NVME_ABORT_OP 0x2 /* Abort WQE issued on exchange */
#define LPFC_NVME_XBUSY 0x4 /* XB bit set on IO cmpl */
#define LPFC_NVME_CTX_RLS 0x8 /* ctx free requested */
#define LPFC_NVME_ABTS_RCV 0x10 /* ABTS received on exchange */
#define LPFC_NVME_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */
#define LPFC_NVME_DEFER_WQFULL 0x40 /* Waiting on a free WQE */
#define LPFC_NVME_TNOTIFY 0x80 /* notify transport of abts */
struct lpfc_async_xchg_ctx {
union {
struct nvmefc_tgt_fcp_req fcp_req;
} hdlrctx;
struct list_head list;
struct lpfc_hba *phba;
struct lpfc_nodelist *ndlp;
struct nvmefc_ls_req *ls_req;
struct nvmefc_ls_rsp ls_rsp;
struct lpfc_iocbq *wqeq;
struct lpfc_iocbq *abort_wqeq;
spinlock_t ctxlock; /* protect flag access */
uint32_t sid;
uint32_t offset;
uint16_t oxid;
uint16_t size;
uint16_t entry_cnt;
uint16_t cpu;
uint16_t idx;
uint16_t state;
uint16_t flag;
void *payload;
struct rqb_dmabuf *rqb_buffer;
struct lpfc_nvmet_ctxbuf *ctxbuf;
struct lpfc_sli4_hdw_queue *hdwq;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
uint64_t ts_isr_cmd;
uint64_t ts_cmd_nvme;
uint64_t ts_nvme_data;
uint64_t ts_data_wqput;
uint64_t ts_isr_data;
uint64_t ts_data_nvme;
uint64_t ts_nvme_status;
uint64_t ts_status_wqput;
uint64_t ts_isr_status;
uint64_t ts_status_nvme;
#endif
};
/* routines found in lpfc_nvme.c */
int __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
struct nvmefc_ls_req *pnvme_lsreq,
void (*gen_req_cmp)(struct lpfc_hba *phba,
struct lpfc_iocbq *cmdwqe,
struct lpfc_wcqe_complete *wcqe));
void __lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport,
struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe);
int __lpfc_nvme_ls_abort(struct lpfc_vport *vport,
struct lpfc_nodelist *ndlp, struct nvmefc_ls_req *pnvme_lsreq);
/* routines found in lpfc_nvmet.c */
int lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
struct lpfc_async_xchg_ctx *ctxp, uint32_t sid,
uint16_t xri);
int __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg,
struct nvmefc_ls_rsp *ls_rsp,
void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba,
struct lpfc_iocbq *cmdwqe,
struct lpfc_wcqe_complete *wcqe));
void __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba,
struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe);

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,158 +0,0 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
* Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
* Broadcom refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2004-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
* www.broadcom.com *
* Portions Copyright (C) 2004-2005 Christoph Hellwig *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of version 2 of the GNU General *
* Public License as published by the Free Software Foundation. *
* This program is distributed in the hope that it will be useful. *
* ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
* WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
* DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
* TO BE LEGALLY INVALID. See the GNU General Public License for *
* more details, a copy of which can be found in the file COPYING *
* included with this package. *
********************************************************************/
#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */
#define LPFC_NVMET_RQE_MIN_POST 128
#define LPFC_NVMET_RQE_DEF_POST 512
#define LPFC_NVMET_RQE_DEF_COUNT 2048
#define LPFC_NVMET_SUCCESS_LEN 12
#define LPFC_NVMET_MRQ_AUTO 0
#define LPFC_NVMET_MRQ_MAX 16
#define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC)
/* Used for NVME Target */
struct lpfc_nvmet_tgtport {
struct lpfc_hba *phba;
struct completion *tport_unreg_cmp;
/* Stats counters - lpfc_nvmet_unsol_ls_buffer */
atomic_t rcv_ls_req_in;
atomic_t rcv_ls_req_out;
atomic_t rcv_ls_req_drop;
atomic_t xmt_ls_abort;
atomic_t xmt_ls_abort_cmpl;
/* Stats counters - lpfc_nvmet_xmt_ls_rsp */
atomic_t xmt_ls_rsp;
atomic_t xmt_ls_drop;
/* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */
atomic_t xmt_ls_rsp_error;
atomic_t xmt_ls_rsp_aborted;
atomic_t xmt_ls_rsp_xb_set;
atomic_t xmt_ls_rsp_cmpl;
/* Stats counters - lpfc_nvmet_unsol_fcp_buffer */
atomic_t rcv_fcp_cmd_in;
atomic_t rcv_fcp_cmd_out;
atomic_t rcv_fcp_cmd_drop;
atomic_t rcv_fcp_cmd_defer;
atomic_t xmt_fcp_release;
/* Stats counters - lpfc_nvmet_xmt_fcp_op */
atomic_t xmt_fcp_drop;
atomic_t xmt_fcp_read_rsp;
atomic_t xmt_fcp_read;
atomic_t xmt_fcp_write;
atomic_t xmt_fcp_rsp;
/* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */
atomic_t xmt_fcp_rsp_xb_set;
atomic_t xmt_fcp_rsp_cmpl;
atomic_t xmt_fcp_rsp_error;
atomic_t xmt_fcp_rsp_aborted;
atomic_t xmt_fcp_rsp_drop;
/* Stats counters - lpfc_nvmet_xmt_fcp_abort */
atomic_t xmt_fcp_xri_abort_cqe;
atomic_t xmt_fcp_abort;
atomic_t xmt_fcp_abort_cmpl;
atomic_t xmt_abort_sol;
atomic_t xmt_abort_unsol;
atomic_t xmt_abort_rsp;
atomic_t xmt_abort_rsp_error;
/* Stats counters - defer IO */
atomic_t defer_ctx;
atomic_t defer_fod;
atomic_t defer_wqfull;
};
struct lpfc_nvmet_ctx_info {
struct list_head nvmet_ctx_list;
spinlock_t nvmet_ctx_list_lock; /* lock per CPU */
struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu;
struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu;
uint16_t nvmet_ctx_list_cnt;
char pad[16]; /* pad to a cache-line */
};
/* This retrieves the context info associated with the specified cpu / mrq */
#define lpfc_get_ctx_list(phba, cpu, mrq) \
(phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq))
struct lpfc_nvmet_rcv_ctx {
union {
struct nvmefc_tgt_ls_req ls_req;
struct nvmefc_tgt_fcp_req fcp_req;
} ctx;
struct list_head list;
struct lpfc_hba *phba;
struct lpfc_iocbq *wqeq;
struct lpfc_iocbq *abort_wqeq;
spinlock_t ctxlock; /* protect flag access */
uint32_t sid;
uint32_t offset;
uint16_t oxid;
uint16_t size;
uint16_t entry_cnt;
uint16_t cpu;
uint16_t idx;
uint16_t state;
/* States */
#define LPFC_NVMET_STE_LS_RCV 1
#define LPFC_NVMET_STE_LS_ABORT 2
#define LPFC_NVMET_STE_LS_RSP 3
#define LPFC_NVMET_STE_RCV 4
#define LPFC_NVMET_STE_DATA 5
#define LPFC_NVMET_STE_ABORT 6
#define LPFC_NVMET_STE_DONE 7
#define LPFC_NVMET_STE_FREE 0xff
uint16_t flag;
#define LPFC_NVMET_IO_INP 0x1 /* IO is in progress on exchange */
#define LPFC_NVMET_ABORT_OP 0x2 /* Abort WQE issued on exchange */
#define LPFC_NVMET_XBUSY 0x4 /* XB bit set on IO cmpl */
#define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */
#define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */
#define LPFC_NVMET_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */
#define LPFC_NVMET_DEFER_WQFULL 0x40 /* Waiting on a free WQE */
#define LPFC_NVMET_TNOTIFY 0x80 /* notify transport of abts */
struct rqb_dmabuf *rqb_buffer;
struct lpfc_nvmet_ctxbuf *ctxbuf;
struct lpfc_sli4_hdw_queue *hdwq;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
uint64_t ts_isr_cmd;
uint64_t ts_cmd_nvme;
uint64_t ts_nvme_data;
uint64_t ts_data_wqput;
uint64_t ts_isr_data;
uint64_t ts_data_nvme;
uint64_t ts_nvme_status;
uint64_t ts_status_wqput;
uint64_t ts_isr_status;
uint64_t ts_status_nvme;
#endif
};

Просмотреть файл

@ -39,8 +39,6 @@
#include <asm/set_memory.h>
#endif
#include <linux/nvme-fc-driver.h>
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@ -50,7 +48,6 @@
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
#include "lpfc_nvmet.h"
#include "lpfc_crtn.h"
#include "lpfc_logmsg.h"
#include "lpfc_compat.h"
@ -2795,6 +2792,123 @@ lpfc_sli_get_buff(struct lpfc_hba *phba,
return &hbq_entry->dbuf;
}
/**
* lpfc_nvme_unsol_ls_handler - Process an unsolicited event data buffer
* containing a NVME LS request.
* @phba: pointer to lpfc hba data structure.
* @piocb: pointer to the iocbq struct representing the sequence starting
* frame.
*
* This routine initially validates the NVME LS, validates there is a login
* with the port that sent the LS, and then calls the appropriate nvme host
* or target LS request handler.
**/
static void
lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
{
struct lpfc_nodelist *ndlp;
struct lpfc_dmabuf *d_buf;
struct hbq_dmabuf *nvmebuf;
struct fc_frame_header *fc_hdr;
struct lpfc_async_xchg_ctx *axchg = NULL;
char *failwhy = NULL;
uint32_t oxid, sid, did, fctl, size;
int ret = 1;
d_buf = piocb->context2;
nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
fc_hdr = nvmebuf->hbuf.virt;
oxid = be16_to_cpu(fc_hdr->fh_ox_id);
sid = sli4_sid_from_fc_hdr(fc_hdr);
did = sli4_did_from_fc_hdr(fc_hdr);
fctl = (fc_hdr->fh_f_ctl[0] << 16 |
fc_hdr->fh_f_ctl[1] << 8 |
fc_hdr->fh_f_ctl[2]);
size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl);
lpfc_nvmeio_data(phba, "NVME LS RCV: xri x%x sz %d from %06x\n",
oxid, size, sid);
if (phba->pport->load_flag & FC_UNLOADING) {
failwhy = "Driver Unloading";
} else if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
failwhy = "NVME FC4 Disabled";
} else if (!phba->nvmet_support && !phba->pport->localport) {
failwhy = "No Localport";
} else if (phba->nvmet_support && !phba->targetport) {
failwhy = "No Targetport";
} else if (unlikely(fc_hdr->fh_r_ctl != FC_RCTL_ELS4_REQ)) {
failwhy = "Bad NVME LS R_CTL";
} else if (unlikely((fctl & 0x00FF0000) !=
(FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT))) {
failwhy = "Bad NVME LS F_CTL";
} else {
axchg = kzalloc(sizeof(*axchg), GFP_ATOMIC);
if (!axchg)
failwhy = "No CTX memory";
}
if (unlikely(failwhy)) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR,
"6154 Drop NVME LS: SID %06X OXID x%X: %s\n",
sid, oxid, failwhy);
goto out_fail;
}
/* validate the source of the LS is logged in */
ndlp = lpfc_findnode_did(phba->pport, sid);
if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
(ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
"6216 NVME Unsol rcv: No ndlp: "
"NPort_ID x%x oxid x%x\n",
sid, oxid);
goto out_fail;
}
axchg->phba = phba;
axchg->ndlp = ndlp;
axchg->size = size;
axchg->oxid = oxid;
axchg->sid = sid;
axchg->wqeq = NULL;
axchg->state = LPFC_NVME_STE_LS_RCV;
axchg->entry_cnt = 1;
axchg->rqb_buffer = (void *)nvmebuf;
axchg->hdwq = &phba->sli4_hba.hdwq[0];
axchg->payload = nvmebuf->dbuf.virt;
INIT_LIST_HEAD(&axchg->list);
if (phba->nvmet_support)
ret = lpfc_nvmet_handle_lsreq(phba, axchg);
else
ret = lpfc_nvme_handle_lsreq(phba, axchg);
/* if zero, LS was successfully handled. If non-zero, LS not handled */
if (!ret)
return;
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR,
"6155 Drop NVME LS from DID %06X: SID %06X OXID x%X "
"NVMe%s handler failed %d\n",
did, sid, oxid,
(phba->nvmet_support) ? "T" : "I", ret);
out_fail:
/* recycle receive buffer */
lpfc_in_buf_free(phba, &nvmebuf->dbuf);
/* If start of new exchange, abort it */
if (axchg && (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX)))
ret = lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid);
if (ret)
kfree(axchg);
}
/**
* lpfc_complete_unsol_iocb - Complete an unsolicited sequence
* @phba: Pointer to HBA context object.
@ -2816,7 +2930,7 @@ lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
switch (fch_type) {
case FC_TYPE_NVME:
lpfc_nvmet_unsol_ls_event(phba, pring, saveq);
lpfc_nvme_unsol_ls_handler(phba, saveq);
return 1;
default:
break;
@ -13981,8 +14095,8 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
/* Just some basic sanity checks on FCP Command frame */
fctl = (fc_hdr->fh_f_ctl[0] << 16 |
fc_hdr->fh_f_ctl[1] << 8 |
fc_hdr->fh_f_ctl[2]);
fc_hdr->fh_f_ctl[1] << 8 |
fc_hdr->fh_f_ctl[2]);
if (((fctl &
(FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) !=
(FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) ||
@ -19891,7 +20005,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
struct lpfc_iocbq *pwqe)
{
union lpfc_wqe128 *wqe = &pwqe->wqe;
struct lpfc_nvmet_rcv_ctx *ctxp;
struct lpfc_async_xchg_ctx *ctxp;
struct lpfc_queue *wq;
struct lpfc_sglq *sglq;
struct lpfc_sli_ring *pring;

Просмотреть файл

@ -2164,18 +2164,6 @@ const struct file_operations def_blk_fops = {
.fallocate = blkdev_fallocate,
};
int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
{
int res;
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
res = blkdev_ioctl(bdev, 0, cmd, arg);
set_fs(old_fs);
return res;
}
EXPORT_SYMBOL(ioctl_by_bdev);
/**
* lookup_bdev - lookup a struct block_device by name
* @pathname: special file representing the block device

9
include/linux/dasd_mod.h Normal file
Просмотреть файл

@ -0,0 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef DASD_MOD_H
#define DASD_MOD_H
#include <asm/dasd.h>
extern int dasd_biodasdinfo(struct gendisk *disk, dasd_information2_t *info);
#endif

Просмотреть файл

@ -2646,7 +2646,6 @@ extern int sync_filesystem(struct super_block *);
extern const struct file_operations def_blk_fops;
extern const struct file_operations def_chr_fops;
#ifdef CONFIG_BLOCK
extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);

Просмотреть файл

@ -159,8 +159,6 @@ struct disk_part_tbl {
struct disk_events;
struct badblocks;
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct blk_integrity {
const struct blk_integrity_profile *profile;
unsigned char flags;
@ -169,8 +167,6 @@ struct blk_integrity {
unsigned char tag_size;
};
#endif /* CONFIG_BLK_DEV_INTEGRITY */
struct gendisk {
/* major, first_minor and minors are input parameters only,
* don't use directly. Use disk_devt() and disk_max_parts().

Просмотреть файл

@ -9,6 +9,124 @@
#include <linux/scatterlist.h>
/*
* ********************** FC-NVME LS API ********************
*
* Data structures used by both FC-NVME hosts and FC-NVME
* targets to perform FC-NVME LS requests or transmit
* responses.
*
* ***********************************************************
*/
/**
* struct nvmefc_ls_req - Request structure passed from the transport
* to the LLDD to perform a NVME-FC LS request and obtain
* a response.
* Used by nvme-fc transport (host) to send LS's such as
* Create Association, Create Connection and Disconnect
* Association.
* Used by the nvmet-fc transport (controller) to send
* LS's such as Disconnect Association.
*
* Values set by the requestor prior to calling the LLDD ls_req entrypoint:
* @rqstaddr: pointer to request buffer
* @rqstdma: PCI DMA address of request buffer
* @rqstlen: Length, in bytes, of request buffer
* @rspaddr: pointer to response buffer
* @rspdma: PCI DMA address of response buffer
* @rsplen: Length, in bytes, of response buffer
* @timeout: Maximum amount of time, in seconds, to wait for the LS response.
* If timeout exceeded, LLDD to abort LS exchange and complete
* LS request with error status.
* @private: pointer to memory allocated alongside the ls request structure
* that is specifically for the LLDD to use while processing the
* request. The length of the buffer corresponds to the
* lsrqst_priv_sz value specified in the xxx_template supplied
* by the LLDD.
* @done: The callback routine the LLDD is to invoke upon completion of
* the LS request. req argument is the pointer to the original LS
* request structure. Status argument must be 0 upon success, a
* negative errno on failure (example: -ENXIO).
*/
struct nvmefc_ls_req {
void *rqstaddr;
dma_addr_t rqstdma;
u32 rqstlen;
void *rspaddr;
dma_addr_t rspdma;
u32 rsplen;
u32 timeout;
void *private;
void (*done)(struct nvmefc_ls_req *req, int status);
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
/**
* struct nvmefc_ls_rsp - Structure passed from the transport to the LLDD
* to request the transmit the NVME-FC LS response to a
* NVME-FC LS request. The structure originates in the LLDD
* and is given to the transport via the xxx_rcv_ls_req()
* transport routine. As such, the structure represents the
* FC exchange context for the NVME-FC LS request that was
* received and which the response is to be sent for.
* Used by the LLDD to pass the nvmet-fc transport (controller)
* received LS's such as Create Association, Create Connection
* and Disconnect Association.
* Used by the LLDD to pass the nvme-fc transport (host)
* received LS's such as Disconnect Association or Disconnect
* Connection.
*
* The structure is allocated by the LLDD whenever a LS Request is received
* from the FC link. The address of the structure is passed to the nvmet-fc
* or nvme-fc layer via the xxx_rcv_ls_req() transport routines.
*
* The address of the structure is to be passed back to the LLDD
* when the response is to be transmit. The LLDD will use the address to
* map back to the LLDD exchange structure which maintains information such
* the remote N_Port that sent the LS as well as any FC exchange context.
* Upon completion of the LS response transmit, the LLDD will pass the
* address of the structure back to the transport LS rsp done() routine,
* allowing the transport release dma resources. Upon completion of
* the done() routine, no further access to the structure will be made by
* the transport and the LLDD can de-allocate the structure.
*
* Field initialization:
* At the time of the xxx_rcv_ls_req() call, there is no content that
* is valid in the structure.
*
* When the structure is used for the LLDD->xmt_ls_rsp() call, the
* transport layer will fully set the fields in order to specify the
* response payload buffer and its length as well as the done routine
* to be called upon completion of the transmit. The transport layer
* will also set a private pointer for its own use in the done routine.
*
* Values set by the transport layer prior to calling the LLDD xmt_ls_rsp
* entrypoint:
* @rspbuf: pointer to the LS response buffer
* @rspdma: PCI DMA address of the LS response buffer
* @rsplen: Length, in bytes, of the LS response buffer
* @done: The callback routine the LLDD is to invoke upon completion of
* transmitting the LS response. req argument is the pointer to
* the original ls request.
* @nvme_fc_private: pointer to an internal transport-specific structure
* used as part of the transport done() processing. The LLDD is
* not to access this pointer.
*/
struct nvmefc_ls_rsp {
void *rspbuf;
dma_addr_t rspdma;
u16 rsplen;
void (*done)(struct nvmefc_ls_rsp *rsp);
void *nvme_fc_private; /* LLDD is not to access !! */
};
/*
* ********************** LLDD FC-NVME Host API ********************
*
@ -18,7 +136,6 @@
*/
/**
* struct nvme_fc_port_info - port-specific ids and FC connection-specific
* data element used during NVME Host role
@ -43,49 +160,6 @@ struct nvme_fc_port_info {
u32 dev_loss_tmo;
};
/**
* struct nvmefc_ls_req - Request structure passed from NVME-FC transport
* to LLDD in order to perform a NVME FC-4 LS
* request and obtain a response.
*
* Values set by the NVME-FC layer prior to calling the LLDD ls_req
* entrypoint.
* @rqstaddr: pointer to request buffer
* @rqstdma: PCI DMA address of request buffer
* @rqstlen: Length, in bytes, of request buffer
* @rspaddr: pointer to response buffer
* @rspdma: PCI DMA address of response buffer
* @rsplen: Length, in bytes, of response buffer
* @timeout: Maximum amount of time, in seconds, to wait for the LS response.
* If timeout exceeded, LLDD to abort LS exchange and complete
* LS request with error status.
* @private: pointer to memory allocated alongside the ls request structure
* that is specifically for the LLDD to use while processing the
* request. The length of the buffer corresponds to the
* lsrqst_priv_sz value specified in the nvme_fc_port_template
* supplied by the LLDD.
* @done: The callback routine the LLDD is to invoke upon completion of
* the LS request. req argument is the pointer to the original LS
* request structure. Status argument must be 0 upon success, a
* negative errno on failure (example: -ENXIO).
*/
struct nvmefc_ls_req {
void *rqstaddr;
dma_addr_t rqstdma;
u32 rqstlen;
void *rspaddr;
dma_addr_t rspdma;
u32 rsplen;
u32 timeout;
void *private;
void (*done)(struct nvmefc_ls_req *req, int status);
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
enum nvmefc_fcp_datadir {
NVMEFC_FCP_NODATA, /* payload_length and sg_cnt will be zero */
NVMEFC_FCP_WRITE,
@ -337,6 +411,21 @@ struct nvme_fc_remote_port {
* indicating an FC transport Aborted status.
* Entrypoint is Mandatory.
*
* @xmt_ls_rsp: Called to transmit the response to a FC-NVME FC-4 LS service.
* The nvmefc_ls_rsp structure is the same LLDD-supplied exchange
* structure specified in the nvme_fc_rcv_ls_req() call made when
* the LS request was received. The structure will fully describe
* the buffers for the response payload and the dma address of the
* payload. The LLDD is to transmit the response (or return a
* non-zero errno status), and upon completion of the transmit, call
* the "done" routine specified in the nvmefc_ls_rsp structure
* (argument to done is the address of the nvmefc_ls_rsp structure
* itself). Upon the completion of the done routine, the LLDD shall
* consider the LS handling complete and the nvmefc_ls_rsp structure
* may be freed/released.
* Entrypoint is mandatory if the LLDD calls the nvme_fc_rcv_ls_req()
* entrypoint.
*
* @max_hw_queues: indicates the maximum number of hw queues the LLDD
* supports for cpu affinitization.
* Value is Mandatory. Must be at least 1.
@ -371,7 +460,7 @@ struct nvme_fc_remote_port {
* @lsrqst_priv_sz: The LLDD sets this field to the amount of additional
* memory that it would like fc nvme layer to allocate on the LLDD's
* behalf whenever a ls request structure is allocated. The additional
* memory area solely for the of the LLDD and its location is
* memory area is solely for use by the LLDD and its location is
* specified by the ls_request->private pointer.
* Value is Mandatory. Allowed to be zero.
*
@ -405,6 +494,9 @@ struct nvme_fc_port_template {
struct nvme_fc_remote_port *,
void *hw_queue_handle,
struct nvmefc_fcp_req *);
int (*xmt_ls_rsp)(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *rport,
struct nvmefc_ls_rsp *ls_rsp);
u32 max_hw_queues;
u16 max_sgl_segments;
@ -441,6 +533,34 @@ void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport);
int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport,
u32 dev_loss_tmo);
/*
* Routine called to pass a NVME-FC LS request, received by the lldd,
* to the nvme-fc transport.
*
* If the return value is zero: the LS was successfully accepted by the
* transport.
* If the return value is non-zero: the transport has not accepted the
* LS. The lldd should ABTS-LS the LS.
*
* Note: if the LLDD receives and ABTS for the LS prior to the transport
* calling the ops->xmt_ls_rsp() routine to transmit a response, the LLDD
* shall mark the LS as aborted, and when the xmt_ls_rsp() is called: the
* response shall not be transmit and the struct nvmefc_ls_rsp() done
* routine shall be called. The LLDD may transmit the ABTS response as
* soon as the LS was marked or can delay until the xmt_ls_rsp() call is
* made.
* Note: if an RCV LS was successfully posted to the transport and the
* remoteport is then unregistered before xmt_ls_rsp() was called for
* the lsrsp structure, the transport will still call xmt_ls_rsp()
* afterward to cleanup the outstanding lsrsp structure. The LLDD should
* noop the transmission of the rsp and call the lsrsp->done() routine
* to allow the lsrsp structure to be released.
*/
int nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *remoteport,
struct nvmefc_ls_rsp *lsrsp,
void *lsreqbuf, u32 lsreqbuf_len);
/*
* *************** LLDD FC-NVME Target/Subsystem API ***************
@ -470,55 +590,6 @@ struct nvmet_fc_port_info {
};
/**
* struct nvmefc_tgt_ls_req - Structure used between LLDD and NVMET-FC
* layer to represent the exchange context for
* a FC-NVME Link Service (LS).
*
* The structure is allocated by the LLDD whenever a LS Request is received
* from the FC link. The address of the structure is passed to the nvmet-fc
* layer via the nvmet_fc_rcv_ls_req() call. The address of the structure
* will be passed back to the LLDD when the response is to be transmit.
* The LLDD is to use the address to map back to the LLDD exchange structure
* which maintains information such as the targetport the LS was received
* on, the remote FC NVME initiator that sent the LS, and any FC exchange
* context. Upon completion of the LS response transmit, the address of the
* structure will be passed back to the LS rsp done() routine, allowing the
* nvmet-fc layer to release dma resources. Upon completion of the done()
* routine, no further access will be made by the nvmet-fc layer and the
* LLDD can de-allocate the structure.
*
* Field initialization:
* At the time of the nvmet_fc_rcv_ls_req() call, there is no content that
* is valid in the structure.
*
* When the structure is used for the LLDD->xmt_ls_rsp() call, the nvmet-fc
* layer will fully set the fields in order to specify the response
* payload buffer and its length as well as the done routine to be called
* upon compeletion of the transmit. The nvmet-fc layer will also set a
* private pointer for its own use in the done routine.
*
* Values set by the NVMET-FC layer prior to calling the LLDD xmt_ls_rsp
* entrypoint.
* @rspbuf: pointer to the LS response buffer
* @rspdma: PCI DMA address of the LS response buffer
* @rsplen: Length, in bytes, of the LS response buffer
* @done: The callback routine the LLDD is to invoke upon completion of
* transmitting the LS response. req argument is the pointer to
* the original ls request.
* @nvmet_fc_private: pointer to an internal NVMET-FC layer structure used
* as part of the NVMET-FC processing. The LLDD is not to access
* this pointer.
*/
struct nvmefc_tgt_ls_req {
void *rspbuf;
dma_addr_t rspdma;
u16 rsplen;
void (*done)(struct nvmefc_tgt_ls_req *req);
void *nvmet_fc_private; /* LLDD is not to access !! */
};
/* Operations that NVME-FC layer may request the LLDD to perform for FCP */
enum {
NVMET_FCOP_READDATA = 1, /* xmt data to initiator */
@ -693,17 +764,19 @@ struct nvmet_fc_target_port {
* Entrypoint is Mandatory.
*
* @xmt_ls_rsp: Called to transmit the response to a FC-NVME FC-4 LS service.
* The nvmefc_tgt_ls_req structure is the same LLDD-supplied exchange
* The nvmefc_ls_rsp structure is the same LLDD-supplied exchange
* structure specified in the nvmet_fc_rcv_ls_req() call made when
* the LS request was received. The structure will fully describe
* the LS request was received. The structure will fully describe
* the buffers for the response payload and the dma address of the
* payload. The LLDD is to transmit the response (or return a non-zero
* errno status), and upon completion of the transmit, call the
* "done" routine specified in the nvmefc_tgt_ls_req structure
* (argument to done is the ls reqwuest structure itself).
* After calling the done routine, the LLDD shall consider the
* LS handling complete and the nvmefc_tgt_ls_req structure may
* be freed/released.
* payload. The LLDD is to transmit the response (or return a
* non-zero errno status), and upon completion of the transmit, call
* the "done" routine specified in the nvmefc_ls_rsp structure
* (argument to done is the address of the nvmefc_ls_rsp structure
* itself). Upon the completion of the done() routine, the LLDD shall
* consider the LS handling complete and the nvmefc_ls_rsp structure
* may be freed/released.
* The transport will always call the xmt_ls_rsp() routine for any
* LS received.
* Entrypoint is Mandatory.
*
* @fcp_op: Called to perform a data transfer or transmit a response.
@ -798,6 +871,39 @@ struct nvmet_fc_target_port {
* should cause the initiator to rescan the discovery controller
* on the targetport.
*
* @ls_req: Called to issue a FC-NVME FC-4 LS service request.
* The nvme_fc_ls_req structure will fully describe the buffers for
* the request payload and where to place the response payload.
* The targetport that is to issue the LS request is identified by
* the targetport argument. The remote port that is to receive the
* LS request is identified by the hosthandle argument. The nvmet-fc
* transport is only allowed to issue FC-NVME LS's on behalf of an
* association that was created prior by a Create Association LS.
* The hosthandle will originate from the LLDD in the struct
* nvmefc_ls_rsp structure for the Create Association LS that
* was delivered to the transport. The transport will save the
* hosthandle as an attribute of the association. If the LLDD
* loses connectivity with the remote port, it must call the
* nvmet_fc_invalidate_host() routine to remove any references to
* the remote port in the transport.
* The LLDD is to allocate an exchange, issue the LS request, obtain
* the LS response, and call the "done" routine specified in the
* request structure (argument to done is the ls request structure
* itself).
* Entrypoint is Optional - but highly recommended.
*
* @ls_abort: called to request the LLDD to abort the indicated ls request.
* The call may return before the abort has completed. After aborting
* the request, the LLDD must still call the ls request done routine
* indicating an FC transport Aborted status.
* Entrypoint is Mandatory if the ls_req entry point is specified.
*
* @host_release: called to inform the LLDD that the request to invalidate
* the host port indicated by the hosthandle has been fully completed.
* No associations exist with the host port and there will be no
* further references to hosthandle.
* Entrypoint is Mandatory if the lldd calls nvmet_fc_invalidate_host().
*
* @max_hw_queues: indicates the maximum number of hw queues the LLDD
* supports for cpu affinitization.
* Value is Mandatory. Must be at least 1.
@ -826,11 +932,19 @@ struct nvmet_fc_target_port {
* area solely for the of the LLDD and its location is specified by
* the targetport->private pointer.
* Value is Mandatory. Allowed to be zero.
*
* @lsrqst_priv_sz: The LLDD sets this field to the amount of additional
* memory that it would like nvmet-fc layer to allocate on the LLDD's
* behalf whenever a ls request structure is allocated. The additional
* memory area is solely for use by the LLDD and its location is
* specified by the ls_request->private pointer.
* Value is Mandatory. Allowed to be zero.
*
*/
struct nvmet_fc_target_template {
void (*targetport_delete)(struct nvmet_fc_target_port *tgtport);
int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_ls_req *tls_req);
struct nvmefc_ls_rsp *ls_rsp);
int (*fcp_op)(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_fcp_req *fcpreq);
void (*fcp_abort)(struct nvmet_fc_target_port *tgtport,
@ -840,6 +954,11 @@ struct nvmet_fc_target_template {
void (*defer_rcv)(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_fcp_req *fcpreq);
void (*discovery_event)(struct nvmet_fc_target_port *tgtport);
int (*ls_req)(struct nvmet_fc_target_port *targetport,
void *hosthandle, struct nvmefc_ls_req *lsreq);
void (*ls_abort)(struct nvmet_fc_target_port *targetport,
void *hosthandle, struct nvmefc_ls_req *lsreq);
void (*host_release)(void *hosthandle);
u32 max_hw_queues;
u16 max_sgl_segments;
@ -848,7 +967,9 @@ struct nvmet_fc_target_template {
u32 target_features;
/* sizes of additional private data for data structures */
u32 target_priv_sz;
u32 lsrqst_priv_sz;
};
@ -859,10 +980,61 @@ int nvmet_fc_register_targetport(struct nvmet_fc_port_info *portinfo,
int nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *tgtport);
/*
* Routine called to pass a NVME-FC LS request, received by the lldd,
* to the nvmet-fc transport.
*
* If the return value is zero: the LS was successfully accepted by the
* transport.
* If the return value is non-zero: the transport has not accepted the
* LS. The lldd should ABTS-LS the LS.
*
* Note: if the LLDD receives and ABTS for the LS prior to the transport
* calling the ops->xmt_ls_rsp() routine to transmit a response, the LLDD
* shall mark the LS as aborted, and when the xmt_ls_rsp() is called: the
* response shall not be transmit and the struct nvmefc_ls_rsp() done
* routine shall be called. The LLDD may transmit the ABTS response as
* soon as the LS was marked or can delay until the xmt_ls_rsp() call is
* made.
* Note: if an RCV LS was successfully posted to the transport and the
* targetport is then unregistered before xmt_ls_rsp() was called for
* the lsrsp structure, the transport will still call xmt_ls_rsp()
* afterward to cleanup the outstanding lsrsp structure. The LLDD should
* noop the transmission of the rsp and call the lsrsp->done() routine
* to allow the lsrsp structure to be released.
*/
int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_ls_req *lsreq,
void *hosthandle,
struct nvmefc_ls_rsp *rsp,
void *lsreqbuf, u32 lsreqbuf_len);
/*
* Routine called by the LLDD whenever it has a logout or loss of
* connectivity to a NVME-FC host port which there had been active
* NVMe controllers for. The host port is indicated by the
* hosthandle. The hosthandle is given to the nvmet-fc transport
* when a NVME LS was received, typically to create a new association.
* The nvmet-fc transport will cache the hostport value with the
* association for use in LS requests for the association.
* When the LLDD calls this routine, the nvmet-fc transport will
* immediately terminate all associations that were created with
* the hosthandle host port.
* The LLDD, after calling this routine and having control returned,
* must assume the transport may subsequently utilize hosthandle as
* part of sending LS's to terminate the association. The LLDD
* should reject the LS's if they are attempted.
* Once the last association has terminated for the hosthandle host
* port, the nvmet-fc transport will call the ops->host_release()
* callback. As of the callback, the nvmet-fc transport will no
* longer reference hosthandle.
*/
void nvmet_fc_invalidate_host(struct nvmet_fc_target_port *tgtport,
void *hosthandle);
/*
* If nvmet_fc_rcv_fcp_req returns non-zero, the transport has not accepted
* the FCP cmd. The lldd should ABTS-LS the cmd.
*/
int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_fcp_req *fcpreq,
void *cmdiubuf, u32 cmdiubuf_len);

Просмотреть файл

@ -4,8 +4,8 @@
*/
/*
* This file contains definitions relative to FC-NVME-2 r1.06
* (T11-2019-00210-v001).
* This file contains definitions relative to FC-NVME-2 r1.08
* (T11-2019-00210-v004).
*/
#ifndef _NVME_FC_H
@ -81,7 +81,8 @@ struct nvme_fc_ersp_iu {
};
#define FCNVME_NVME_SR_OPCODE 0x01
#define FCNVME_NVME_SR_OPCODE 0x01
#define FCNVME_NVME_SR_RSP_OPCODE 0x02
struct nvme_fc_nvme_sr_iu {
__u8 fc_id;
@ -94,7 +95,7 @@ struct nvme_fc_nvme_sr_iu {
enum {
FCNVME_SRSTAT_ACC = 0x0,
FCNVME_SRSTAT_INV_FCID = 0x1,
/* reserved 0x1 */
/* reserved 0x2 */
FCNVME_SRSTAT_LOGICAL_ERR = 0x3,
FCNVME_SRSTAT_INV_QUALIF = 0x4,
@ -397,7 +398,7 @@ struct fcnvme_ls_disconnect_conn_rqst {
struct fcnvme_ls_rqst_w0 w0;
__be32 desc_list_len;
struct fcnvme_lsdesc_assoc_id associd;
struct fcnvme_lsdesc_disconn_cmd connectid;
struct fcnvme_lsdesc_conn_id connectid;
};
struct fcnvme_ls_disconnect_conn_acc {

Просмотреть файл

@ -38,6 +38,8 @@ enum {
NVMF_ADDR_FAMILY_IP6 = 2, /* IP6 */
NVMF_ADDR_FAMILY_IB = 3, /* InfiniBand */
NVMF_ADDR_FAMILY_FC = 4, /* Fibre Channel */
NVMF_ADDR_FAMILY_LOOP = 254, /* Reserved for host usage */
NVMF_ADDR_FAMILY_MAX,
};
/* Transport Type codes for Discovery Log Page entry TRTYPE field */
@ -299,6 +301,8 @@ struct nvme_id_ctrl {
};
enum {
NVME_CTRL_CMIC_MULTI_CTRL = 1 << 1,
NVME_CTRL_CMIC_ANA = 1 << 3,
NVME_CTRL_ONCS_COMPARE = 1 << 0,
NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
NVME_CTRL_ONCS_DSM = 1 << 2,
@ -394,8 +398,12 @@ enum {
enum {
NVME_NS_FEAT_THIN = 1 << 0,
NVME_NS_FEAT_ATOMICS = 1 << 1,
NVME_NS_FEAT_IO_OPT = 1 << 4,
NVME_NS_ATTR_RO = 1 << 0,
NVME_NS_FLBAS_LBA_MASK = 0xf,
NVME_NS_FLBAS_META_EXT = 0x10,
NVME_NS_NMIC_SHARED = 1 << 0,
NVME_LBAF_RP_BEST = 0,
NVME_LBAF_RP_BETTER = 1,
NVME_LBAF_RP_GOOD = 2,
@ -412,6 +420,12 @@ enum {
NVME_NS_DPS_PI_TYPE3 = 3,
};
/* Identify Namespace Metadata Capabilities (MC): */
enum {
NVME_MC_EXTENDED_LBA = (1 << 0),
NVME_MC_METADATA_PTR = (1 << 1),
};
struct nvme_ns_id_desc {
__u8 nidt;
__u8 nidl;
@ -1177,7 +1191,7 @@ struct nvmf_disc_rsp_page_hdr {
__le64 numrec;
__le16 recfmt;
__u8 resv14[1006];
struct nvmf_disc_rsp_page_entry entries[0];
struct nvmf_disc_rsp_page_entry entries[];
};
enum {

Просмотреть файл

@ -172,7 +172,10 @@ struct floppy_drive_params {
* used in succession to try to read the disk. If the FDC cannot lock onto
* the disk, the next format is tried. This uses the variable 'probing'.
*/
short autodetect[8]; /* autodetected formats */
#define FD_AUTODETECT_SIZE 8
short autodetect[FD_AUTODETECT_SIZE]; /* autodetected formats */
int checkfreq; /* how often should the drive be checked for disk
* changes */
@ -357,10 +360,25 @@ struct floppy_raw_cmd {
int buffer_length; /* length of allocated buffer */
unsigned char rate;
#define FD_RAW_CMD_SIZE 16
#define FD_RAW_REPLY_SIZE 16
#define FD_RAW_CMD_FULLSIZE (FD_RAW_CMD_SIZE + 1 + FD_RAW_REPLY_SIZE)
/* The command may take up the space initially intended for the reply
* and the reply count. Needed for long 82078 commands such as RESTORE,
* which takes 17 command bytes.
*/
unsigned char cmd_count;
unsigned char cmd[16];
unsigned char reply_count;
unsigned char reply[16];
union {
struct {
unsigned char cmd[FD_RAW_CMD_SIZE];
unsigned char reply_count;
unsigned char reply[FD_RAW_REPLY_SIZE];
};
unsigned char fullcmd[FD_RAW_CMD_FULLSIZE];
};
int track;
int resultcode;

Просмотреть файл

@ -7,13 +7,23 @@
* Handbook", Sanches and Canton.
*/
/* Fd controller regs. S&C, about page 340 */
#define FD_STATUS 4
#define FD_DATA 5
/* 82077's auxiliary status registers A & B (R) */
#define FD_SRA 0
#define FD_SRB 1
/* Digital Output Register */
#define FD_DOR 2
/* 82077's tape drive register (R/W) */
#define FD_TDR 3
/* 82077's data rate select register (W) */
#define FD_DSR 4
/* Fd controller regs. S&C, about page 340 */
#define FD_STATUS 4
#define FD_DATA 5
/* Digital Input Register (read) */
#define FD_DIR 7

Просмотреть файл

@ -25,6 +25,16 @@ enum {
LO_FLAGS_DIRECT_IO = 16,
};
/* LO_FLAGS that can be set using LOOP_SET_STATUS(64) */
#define LOOP_SET_STATUS_SETTABLE_FLAGS (LO_FLAGS_AUTOCLEAR | LO_FLAGS_PARTSCAN)
/* LO_FLAGS that can be cleared using LOOP_SET_STATUS(64) */
#define LOOP_SET_STATUS_CLEARABLE_FLAGS (LO_FLAGS_AUTOCLEAR)
/* LO_FLAGS that can be set using LOOP_CONFIGURE */
#define LOOP_CONFIGURE_SETTABLE_FLAGS (LO_FLAGS_READ_ONLY | LO_FLAGS_AUTOCLEAR \
| LO_FLAGS_PARTSCAN | LO_FLAGS_DIRECT_IO)
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
#include <linux/types.h> /* for __u64 */
@ -37,7 +47,7 @@ struct loop_info {
int lo_offset;
int lo_encrypt_type;
int lo_encrypt_key_size; /* ioctl w/o */
int lo_flags; /* ioctl r/o */
int lo_flags;
char lo_name[LO_NAME_SIZE];
unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
unsigned long lo_init[2];
@ -53,13 +63,29 @@ struct loop_info64 {
__u32 lo_number; /* ioctl r/o */
__u32 lo_encrypt_type;
__u32 lo_encrypt_key_size; /* ioctl w/o */
__u32 lo_flags; /* ioctl r/o */
__u32 lo_flags;
__u8 lo_file_name[LO_NAME_SIZE];
__u8 lo_crypt_name[LO_NAME_SIZE];
__u8 lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
__u64 lo_init[2];
};
/**
* struct loop_config - Complete configuration for a loop device.
* @fd: fd of the file to be used as a backing file for the loop device.
* @block_size: block size to use; ignored if 0.
* @info: struct loop_info64 to configure the loop device with.
*
* This structure is used with the LOOP_CONFIGURE ioctl, and can be used to
* atomically setup and configure all loop device parameters at once.
*/
struct loop_config {
__u32 fd;
__u32 block_size;
struct loop_info64 info;
__u64 __reserved[8];
};
/*
* Loop filter types
*/
@ -90,6 +116,7 @@ struct loop_info64 {
#define LOOP_SET_CAPACITY 0x4C07
#define LOOP_SET_DIRECT_IO 0x4C08
#define LOOP_SET_BLOCK_SIZE 0x4C09
#define LOOP_CONFIGURE 0x4C0A
/* /dev/loop-control interface */
#define LOOP_CTL_ADD 0x4C80