Merge branch 'vfio-ccw-for-martin' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/vfio-ccw into features
Pull vfio-ccw branch to add the basic channel I/O passthrough intrastructure based on vfio. The focus is on supporting dasd-eckd(cu_type/dev_type = 0x3990/0x3390) as the target device. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Коммит
6fb81d69d0
|
@ -22,5 +22,7 @@ qeth.txt
|
|||
- HiperSockets Bridge Port Support.
|
||||
s390dbf.txt
|
||||
- information on using the s390 debug feature.
|
||||
vfio-ccw.txt
|
||||
information on the vfio-ccw I/O subchannel driver.
|
||||
zfcpdump.txt
|
||||
- information on the s390 SCSI dump tool.
|
||||
|
|
|
@ -0,0 +1,303 @@
|
|||
vfio-ccw: the basic infrastructure
|
||||
==================================
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
Here we describe the vfio support for I/O subchannel devices for
|
||||
Linux/s390. Motivation for vfio-ccw is to passthrough subchannels to a
|
||||
virtual machine, while vfio is the means.
|
||||
|
||||
Different than other hardware architectures, s390 has defined a unified
|
||||
I/O access method, which is so called Channel I/O. It has its own access
|
||||
patterns:
|
||||
- Channel programs run asynchronously on a separate (co)processor.
|
||||
- The channel subsystem will access any memory designated by the caller
|
||||
in the channel program directly, i.e. there is no iommu involved.
|
||||
Thus when we introduce vfio support for these devices, we realize it
|
||||
with a mediated device (mdev) implementation. The vfio mdev will be
|
||||
added to an iommu group, so as to make itself able to be managed by the
|
||||
vfio framework. And we add read/write callbacks for special vfio I/O
|
||||
regions to pass the channel programs from the mdev to its parent device
|
||||
(the real I/O subchannel device) to do further address translation and
|
||||
to perform I/O instructions.
|
||||
|
||||
This document does not intend to explain the s390 I/O architecture in
|
||||
every detail. More information/reference could be found here:
|
||||
- A good start to know Channel I/O in general:
|
||||
https://en.wikipedia.org/wiki/Channel_I/O
|
||||
- s390 architecture:
|
||||
s390 Principles of Operation manual (IBM Form. No. SA22-7832)
|
||||
- The existing Qemu code which implements a simple emulated channel
|
||||
subsystem could also be a good reference. It makes it easier to follow
|
||||
the flow.
|
||||
qemu/hw/s390x/css.c
|
||||
|
||||
For vfio mediated device framework:
|
||||
- Documentation/vfio-mediated-device.txt
|
||||
|
||||
Motivation of vfio-ccw
|
||||
----------------------
|
||||
|
||||
Currently, a guest virtualized via qemu/kvm on s390 only sees
|
||||
paravirtualized virtio devices via the "Virtio Over Channel I/O
|
||||
(virtio-ccw)" transport. This makes virtio devices discoverable via
|
||||
standard operating system algorithms for handling channel devices.
|
||||
|
||||
However this is not enough. On s390 for the majority of devices, which
|
||||
use the standard Channel I/O based mechanism, we also need to provide
|
||||
the functionality of passing through them to a Qemu virtual machine.
|
||||
This includes devices that don't have a virtio counterpart (e.g. tape
|
||||
drives) or that have specific characteristics which guests want to
|
||||
exploit.
|
||||
|
||||
For passing a device to a guest, we want to use the same interface as
|
||||
everybody else, namely vfio. Thus, we would like to introduce vfio
|
||||
support for channel devices. And we would like to name this new vfio
|
||||
device "vfio-ccw".
|
||||
|
||||
Access patterns of CCW devices
|
||||
------------------------------
|
||||
|
||||
s390 architecture has implemented a so called channel subsystem, that
|
||||
provides a unified view of the devices physically attached to the
|
||||
systems. Though the s390 hardware platform knows about a huge variety of
|
||||
different peripheral attachments like disk devices (aka. DASDs), tapes,
|
||||
communication controllers, etc. They can all be accessed by a well
|
||||
defined access method and they are presenting I/O completion a unified
|
||||
way: I/O interruptions.
|
||||
|
||||
All I/O requires the use of channel command words (CCWs). A CCW is an
|
||||
instruction to a specialized I/O channel processor. A channel program is
|
||||
a sequence of CCWs which are executed by the I/O channel subsystem. To
|
||||
issue a channel program to the channel subsystem, it is required to
|
||||
build an operation request block (ORB), which can be used to point out
|
||||
the format of the CCW and other control information to the system. The
|
||||
operating system signals the I/O channel subsystem to begin executing
|
||||
the channel program with a SSCH (start sub-channel) instruction. The
|
||||
central processor is then free to proceed with non-I/O instructions
|
||||
until interrupted. The I/O completion result is received by the
|
||||
interrupt handler in the form of interrupt response block (IRB).
|
||||
|
||||
Back to vfio-ccw, in short:
|
||||
- ORBs and channel programs are built in guest kernel (with guest
|
||||
physical addresses).
|
||||
- ORBs and channel programs are passed to the host kernel.
|
||||
- Host kernel translates the guest physical addresses to real addresses
|
||||
and starts the I/O with issuing a privileged Channel I/O instruction
|
||||
(e.g SSCH).
|
||||
- channel programs run asynchronously on a separate processor.
|
||||
- I/O completion will be signaled to the host with I/O interruptions.
|
||||
And it will be copied as IRB to user space to pass it back to the
|
||||
guest.
|
||||
|
||||
Physical vfio ccw device and its child mdev
|
||||
-------------------------------------------
|
||||
|
||||
As mentioned above, we realize vfio-ccw with a mdev implementation.
|
||||
|
||||
Channel I/O does not have IOMMU hardware support, so the physical
|
||||
vfio-ccw device does not have an IOMMU level translation or isolation.
|
||||
|
||||
Sub-channel I/O instructions are all privileged instructions, When
|
||||
handling the I/O instruction interception, vfio-ccw has the software
|
||||
policing and translation how the channel program is programmed before
|
||||
it gets sent to hardware.
|
||||
|
||||
Within this implementation, we have two drivers for two types of
|
||||
devices:
|
||||
- The vfio_ccw driver for the physical subchannel device.
|
||||
This is an I/O subchannel driver for the real subchannel device. It
|
||||
realizes a group of callbacks and registers to the mdev framework as a
|
||||
parent (physical) device. As a consequence, mdev provides vfio_ccw a
|
||||
generic interface (sysfs) to create mdev devices. A vfio mdev could be
|
||||
created by vfio_ccw then and added to the mediated bus. It is the vfio
|
||||
device that added to an IOMMU group and a vfio group.
|
||||
vfio_ccw also provides an I/O region to accept channel program
|
||||
request from user space and store I/O interrupt result for user
|
||||
space to retrieve. To notify user space an I/O completion, it offers
|
||||
an interface to setup an eventfd fd for asynchronous signaling.
|
||||
|
||||
- The vfio_mdev driver for the mediated vfio ccw device.
|
||||
This is provided by the mdev framework. It is a vfio device driver for
|
||||
the mdev that created by vfio_ccw.
|
||||
It realize a group of vfio device driver callbacks, adds itself to a
|
||||
vfio group, and registers itself to the mdev framework as a mdev
|
||||
driver.
|
||||
It uses a vfio iommu backend that uses the existing map and unmap
|
||||
ioctls, but rather than programming them into an IOMMU for a device,
|
||||
it simply stores the translations for use by later requests. This
|
||||
means that a device programmed in a VM with guest physical addresses
|
||||
can have the vfio kernel convert that address to process virtual
|
||||
address, pin the page and program the hardware with the host physical
|
||||
address in one step.
|
||||
For a mdev, the vfio iommu backend will not pin the pages during the
|
||||
VFIO_IOMMU_MAP_DMA ioctl. Mdev framework will only maintain a database
|
||||
of the iova<->vaddr mappings in this operation. And they export a
|
||||
vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
|
||||
backend for the physical devices to pin and unpin pages by demand.
|
||||
|
||||
Below is a high Level block diagram.
|
||||
|
||||
+-------------+
|
||||
| |
|
||||
| +---------+ | mdev_register_driver() +--------------+
|
||||
| | Mdev | +<-----------------------+ |
|
||||
| | bus | | | vfio_mdev.ko |
|
||||
| | driver | +----------------------->+ |<-> VFIO user
|
||||
| +---------+ | probe()/remove() +--------------+ APIs
|
||||
| |
|
||||
| MDEV CORE |
|
||||
| MODULE |
|
||||
| mdev.ko |
|
||||
| +---------+ | mdev_register_device() +--------------+
|
||||
| |Physical | +<-----------------------+ |
|
||||
| | device | | | vfio_ccw.ko |<-> subchannel
|
||||
| |interface| +----------------------->+ | device
|
||||
| +---------+ | callback +--------------+
|
||||
+-------------+
|
||||
|
||||
The process of how these work together.
|
||||
1. vfio_ccw.ko drives the physical I/O subchannel, and registers the
|
||||
physical device (with callbacks) to mdev framework.
|
||||
When vfio_ccw probing the subchannel device, it registers device
|
||||
pointer and callbacks to the mdev framework. Mdev related file nodes
|
||||
under the device node in sysfs would be created for the subchannel
|
||||
device, namely 'mdev_create', 'mdev_destroy' and
|
||||
'mdev_supported_types'.
|
||||
2. Create a mediated vfio ccw device.
|
||||
Use the 'mdev_create' sysfs file, we need to manually create one (and
|
||||
only one for our case) mediated device.
|
||||
3. vfio_mdev.ko drives the mediated ccw device.
|
||||
vfio_mdev is also the vfio device drvier. It will probe the mdev and
|
||||
add it to an iommu_group and a vfio_group. Then we could pass through
|
||||
the mdev to a guest.
|
||||
|
||||
vfio-ccw I/O region
|
||||
-------------------
|
||||
|
||||
An I/O region is used to accept channel program request from user
|
||||
space and store I/O interrupt result for user space to retrieve. The
|
||||
defination of the region is:
|
||||
|
||||
struct ccw_io_region {
|
||||
#define ORB_AREA_SIZE 12
|
||||
__u8 orb_area[ORB_AREA_SIZE];
|
||||
#define SCSW_AREA_SIZE 12
|
||||
__u8 scsw_area[SCSW_AREA_SIZE];
|
||||
#define IRB_AREA_SIZE 96
|
||||
__u8 irb_area[IRB_AREA_SIZE];
|
||||
__u32 ret_code;
|
||||
} __packed;
|
||||
|
||||
While starting an I/O request, orb_area should be filled with the
|
||||
guest ORB, and scsw_area should be filled with the SCSW of the Virtual
|
||||
Subchannel.
|
||||
|
||||
irb_area stores the I/O result.
|
||||
|
||||
ret_code stores a return code for each access of the region.
|
||||
|
||||
vfio-ccw patches overview
|
||||
-------------------------
|
||||
|
||||
For now, our patches are rebased on the latest mdev implementation.
|
||||
vfio-ccw follows what vfio-pci did on the s390 paltform and uses
|
||||
vfio-iommu-type1 as the vfio iommu backend. It's a good start to launch
|
||||
the code review for vfio-ccw. Note that the implementation is far from
|
||||
complete yet; but we'd like to get feedback for the general
|
||||
architecture.
|
||||
|
||||
* CCW translation APIs
|
||||
- Description:
|
||||
These introduce a group of APIs (start with 'cp_') to do CCW
|
||||
translation. The CCWs passed in by a user space program are
|
||||
organized with their guest physical memory addresses. These APIs
|
||||
will copy the CCWs into the kernel space, and assemble a runnable
|
||||
kernel channel program by updating the guest physical addresses with
|
||||
their corresponding host physical addresses.
|
||||
- Patches:
|
||||
vfio: ccw: introduce channel program interfaces
|
||||
|
||||
* vfio_ccw device driver
|
||||
- Description:
|
||||
The following patches utilizes the CCW translation APIs and introduce
|
||||
vfio_ccw, which is the driver for the I/O subchannel devices you want
|
||||
to pass through.
|
||||
vfio_ccw implements the following vfio ioctls:
|
||||
VFIO_DEVICE_GET_INFO
|
||||
VFIO_DEVICE_GET_IRQ_INFO
|
||||
VFIO_DEVICE_GET_REGION_INFO
|
||||
VFIO_DEVICE_RESET
|
||||
VFIO_DEVICE_SET_IRQS
|
||||
This provides an I/O region, so that the user space program can pass a
|
||||
channel program to the kernel, to do further CCW translation before
|
||||
issuing them to a real device.
|
||||
This also provides the SET_IRQ ioctl to setup an event notifier to
|
||||
notify the user space program the I/O completion in an asynchronous
|
||||
way.
|
||||
- Patches:
|
||||
vfio: ccw: basic implementation for vfio_ccw driver
|
||||
vfio: ccw: introduce ccw_io_region
|
||||
vfio: ccw: realize VFIO_DEVICE_GET_REGION_INFO ioctl
|
||||
vfio: ccw: realize VFIO_DEVICE_RESET ioctl
|
||||
vfio: ccw: realize VFIO_DEVICE_G(S)ET_IRQ_INFO ioctls
|
||||
|
||||
The user of vfio-ccw is not limited to Qemu, while Qemu is definitely a
|
||||
good example to get understand how these patches work. Here is a little
|
||||
bit more detail how an I/O request triggered by the Qemu guest will be
|
||||
handled (without error handling).
|
||||
|
||||
Explanation:
|
||||
Q1-Q7: Qemu side process.
|
||||
K1-K5: Kernel side process.
|
||||
|
||||
Q1. Get I/O region info during initialization.
|
||||
Q2. Setup event notifier and handler to handle I/O completion.
|
||||
|
||||
... ...
|
||||
|
||||
Q3. Intercept a ssch instruction.
|
||||
Q4. Write the guest channel program and ORB to the I/O region.
|
||||
K1. Copy from guest to kernel.
|
||||
K2. Translate the guest channel program to a host kernel space
|
||||
channel program, which becomes runnable for a real device.
|
||||
K3. With the necessary information contained in the orb passed in
|
||||
by Qemu, issue the ccwchain to the device.
|
||||
K4. Return the ssch CC code.
|
||||
Q5. Return the CC code to the guest.
|
||||
|
||||
... ...
|
||||
|
||||
K5. Interrupt handler gets the I/O result and write the result to
|
||||
the I/O region.
|
||||
K6. Signal Qemu to retrieve the result.
|
||||
Q6. Get the signal and event handler reads out the result from the I/O
|
||||
region.
|
||||
Q7. Update the irb for the guest.
|
||||
|
||||
Limitations
|
||||
-----------
|
||||
|
||||
The current vfio-ccw implementation focuses on supporting basic commands
|
||||
needed to implement block device functionality (read/write) of DASD/ECKD
|
||||
device only. Some commands may need special handling in the future, for
|
||||
example, anything related to path grouping.
|
||||
|
||||
DASD is a kind of storage device. While ECKD is a data recording format.
|
||||
More information for DASD and ECKD could be found here:
|
||||
https://en.wikipedia.org/wiki/Direct-access_storage_device
|
||||
https://en.wikipedia.org/wiki/Count_key_data
|
||||
|
||||
Together with the corresponding work in Qemu, we can bring the passed
|
||||
through DASD/ECKD device online in a guest now and use it as a block
|
||||
device.
|
||||
|
||||
Reference
|
||||
---------
|
||||
1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
|
||||
2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
|
||||
3. https://en.wikipedia.org/wiki/Channel_I/O
|
||||
4. Documentation/s390/cds.txt
|
||||
5. Documentation/vfio.txt
|
||||
6. Documentation/vfio-mediated-device.txt
|
10
MAINTAINERS
10
MAINTAINERS
|
@ -10860,6 +10860,16 @@ W: http://www.ibm.com/developerworks/linux/linux390/
|
|||
S: Supported
|
||||
F: drivers/iommu/s390-iommu.c
|
||||
|
||||
S390 VFIO-CCW DRIVER
|
||||
M: Cornelia Huck <cornelia.huck@de.ibm.com>
|
||||
M: Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
|
||||
L: linux-s390@vger.kernel.org
|
||||
L: kvm@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/s390/cio/vfio_ccw*
|
||||
F: Documentation/s390/vfio-ccw.txt
|
||||
F: include/uapi/linux/vfio_ccw.h
|
||||
|
||||
S3C24XX SD/MMC Driver
|
||||
M: Ben Dooks <ben-linux@fluff.org>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
|
|
|
@ -682,6 +682,16 @@ config EADM_SCH
|
|||
To compile this driver as a module, choose M here: the
|
||||
module will be called eadm_sch.
|
||||
|
||||
config VFIO_CCW
|
||||
def_tristate n
|
||||
prompt "Support for VFIO-CCW subchannels"
|
||||
depends on S390_CCW_IOMMU && VFIO_MDEV
|
||||
help
|
||||
This driver allows usage of I/O subchannels via VFIO-CCW.
|
||||
|
||||
To compile this driver as a module, choose M here: the
|
||||
module will be called vfio_ccw.
|
||||
|
||||
endmenu
|
||||
|
||||
menu "Dump support"
|
||||
|
|
|
@ -33,6 +33,24 @@ struct ccw1 {
|
|||
__u32 cda;
|
||||
} __attribute__ ((packed,aligned(8)));
|
||||
|
||||
/**
|
||||
* struct ccw0 - channel command word
|
||||
* @cmd_code: command code
|
||||
* @cda: data address
|
||||
* @flags: flags, like IDA addressing, etc.
|
||||
* @reserved: will be ignored
|
||||
* @count: byte count
|
||||
*
|
||||
* The format-0 ccw structure.
|
||||
*/
|
||||
struct ccw0 {
|
||||
__u8 cmd_code;
|
||||
__u32 cda : 24;
|
||||
__u8 flags;
|
||||
__u8 reserved;
|
||||
__u16 count;
|
||||
} __packed __aligned(8);
|
||||
|
||||
#define CCW_FLAG_DC 0x80
|
||||
#define CCW_FLAG_CC 0x40
|
||||
#define CCW_FLAG_SLI 0x20
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#define CONSOLE_ISC 1 /* console I/O subchannel */
|
||||
#define EADM_SCH_ISC 4 /* EADM subchannels */
|
||||
#define CHSC_SCH_ISC 7 /* CHSC subchannels */
|
||||
#define VFIO_CCW_ISC IO_SCH_ISC /* VFIO-CCW I/O subchannels */
|
||||
/* Adapter interrupts. */
|
||||
#define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */
|
||||
#define PCI_ISC 2 /* PCI I/O subchannels */
|
||||
|
|
|
@ -327,6 +327,14 @@ config S390_IOMMU
|
|||
help
|
||||
Support for the IOMMU API for s390 PCI devices.
|
||||
|
||||
config S390_CCW_IOMMU
|
||||
bool "S390 CCW IOMMU Support"
|
||||
depends on S390 && CCW
|
||||
select IOMMU_API
|
||||
help
|
||||
Enables bits of IOMMU API required by VFIO. The iommu_ops
|
||||
is not implemented as it is not necessary for VFIO.
|
||||
|
||||
config MTK_IOMMU
|
||||
bool "MTK IOMMU Support"
|
||||
depends on ARM || ARM64
|
||||
|
|
|
@ -17,3 +17,6 @@ obj-$(CONFIG_CCWGROUP) += ccwgroup.o
|
|||
|
||||
qdio-objs := qdio_main.o qdio_thinint.o qdio_debug.o qdio_setup.o
|
||||
obj-$(CONFIG_QDIO) += qdio.o
|
||||
|
||||
vfio_ccw-objs += vfio_ccw_drv.o vfio_ccw_cp.o vfio_ccw_ops.o vfio_ccw_fsm.o
|
||||
obj-$(CONFIG_VFIO_CCW) += vfio_ccw.o
|
||||
|
|
|
@ -170,12 +170,14 @@ cio_start_key (struct subchannel *sch, /* subchannel structure */
|
|||
return ccode;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cio_start_key);
|
||||
|
||||
int
|
||||
cio_start (struct subchannel *sch, struct ccw1 *cpa, __u8 lpm)
|
||||
{
|
||||
return cio_start_key(sch, cpa, lpm, PAGE_DEFAULT_KEY);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cio_start);
|
||||
|
||||
/*
|
||||
* resume suspended I/O operation
|
||||
|
@ -208,6 +210,7 @@ cio_resume (struct subchannel *sch)
|
|||
return -ENODEV;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cio_resume);
|
||||
|
||||
/*
|
||||
* halt I/O operation
|
||||
|
@ -241,6 +244,7 @@ cio_halt(struct subchannel *sch)
|
|||
return -ENODEV;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cio_halt);
|
||||
|
||||
/*
|
||||
* Clear I/O operation
|
||||
|
@ -271,6 +275,7 @@ cio_clear(struct subchannel *sch)
|
|||
return -ENODEV;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cio_clear);
|
||||
|
||||
/*
|
||||
* Function: cio_cancel
|
||||
|
@ -308,7 +313,68 @@ cio_cancel (struct subchannel *sch)
|
|||
return -ENODEV;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cio_cancel);
|
||||
|
||||
/**
|
||||
* cio_cancel_halt_clear - Cancel running I/O by performing cancel, halt
|
||||
* and clear ordinally if subchannel is valid.
|
||||
* @sch: subchannel on which to perform the cancel_halt_clear operation
|
||||
* @iretry: the number of the times remained to retry the next operation
|
||||
*
|
||||
* This should be called repeatedly since halt/clear are asynchronous
|
||||
* operations. We do one try with cio_cancel, three tries with cio_halt,
|
||||
* 255 tries with cio_clear. The caller should initialize @iretry with
|
||||
* the value 255 for its first call to this, and keep using the same
|
||||
* @iretry in the subsequent calls until it gets a non -EBUSY return.
|
||||
*
|
||||
* Returns 0 if device now idle, -ENODEV for device not operational,
|
||||
* -EBUSY if an interrupt is expected (either from halt/clear or from a
|
||||
* status pending), and -EIO if out of retries.
|
||||
*/
|
||||
int cio_cancel_halt_clear(struct subchannel *sch, int *iretry)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (cio_update_schib(sch))
|
||||
return -ENODEV;
|
||||
if (!sch->schib.pmcw.ena)
|
||||
/* Not operational -> done. */
|
||||
return 0;
|
||||
/* Stage 1: cancel io. */
|
||||
if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) &&
|
||||
!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
|
||||
if (!scsw_is_tm(&sch->schib.scsw)) {
|
||||
ret = cio_cancel(sch);
|
||||
if (ret != -EINVAL)
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* Cancel io unsuccessful or not applicable (transport mode).
|
||||
* Continue with asynchronous instructions.
|
||||
*/
|
||||
*iretry = 3; /* 3 halt retries. */
|
||||
}
|
||||
/* Stage 2: halt io. */
|
||||
if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
|
||||
if (*iretry) {
|
||||
*iretry -= 1;
|
||||
ret = cio_halt(sch);
|
||||
if (ret != -EBUSY)
|
||||
return (ret == 0) ? -EBUSY : ret;
|
||||
}
|
||||
/* Halt io unsuccessful. */
|
||||
*iretry = 255; /* 255 clear retries. */
|
||||
}
|
||||
/* Stage 3: clear io. */
|
||||
if (*iretry) {
|
||||
*iretry -= 1;
|
||||
ret = cio_clear(sch);
|
||||
return (ret == 0) ? -EBUSY : ret;
|
||||
}
|
||||
/* Function was unsuccessful */
|
||||
return -EIO;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cio_cancel_halt_clear);
|
||||
|
||||
static void cio_apply_config(struct subchannel *sch, struct schib *schib)
|
||||
{
|
||||
|
@ -382,6 +448,7 @@ int cio_commit_config(struct subchannel *sch)
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cio_commit_config);
|
||||
|
||||
/**
|
||||
* cio_update_schib - Perform stsch and update schib if subchannel is valid.
|
||||
|
@ -987,6 +1054,7 @@ int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key)
|
|||
return cio_start_handle_notoper(sch, lpm);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cio_tm_start_key);
|
||||
|
||||
/**
|
||||
* cio_tm_intrg - perform interrogate function
|
||||
|
@ -1012,3 +1080,4 @@ int cio_tm_intrg(struct subchannel *sch)
|
|||
return -ENODEV;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cio_tm_intrg);
|
||||
|
|
|
@ -123,6 +123,7 @@ extern int cio_enable_subchannel(struct subchannel *, u32);
|
|||
extern int cio_disable_subchannel (struct subchannel *);
|
||||
extern int cio_cancel (struct subchannel *);
|
||||
extern int cio_clear (struct subchannel *);
|
||||
extern int cio_cancel_halt_clear(struct subchannel *, int *);
|
||||
extern int cio_resume (struct subchannel *);
|
||||
extern int cio_halt (struct subchannel *);
|
||||
extern int cio_start (struct subchannel *, struct ccw1 *, __u8);
|
||||
|
|
|
@ -124,14 +124,6 @@ ccw_device_set_timeout(struct ccw_device *cdev, int expires)
|
|||
add_timer(&cdev->private->timer);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cancel running i/o. This is called repeatedly since halt/clear are
|
||||
* asynchronous operations. We do one try with cio_cancel, two tries
|
||||
* with cio_halt, 255 tries with cio_clear. If everythings fails panic.
|
||||
* Returns 0 if device now idle, -ENODEV for device not operational and
|
||||
* -EBUSY if an interrupt is expected (either from halt/clear or from a
|
||||
* status pending).
|
||||
*/
|
||||
int
|
||||
ccw_device_cancel_halt_clear(struct ccw_device *cdev)
|
||||
{
|
||||
|
@ -139,44 +131,14 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev)
|
|||
int ret;
|
||||
|
||||
sch = to_subchannel(cdev->dev.parent);
|
||||
if (cio_update_schib(sch))
|
||||
return -ENODEV;
|
||||
if (!sch->schib.pmcw.ena)
|
||||
/* Not operational -> done. */
|
||||
return 0;
|
||||
/* Stage 1: cancel io. */
|
||||
if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_HALT_PEND) &&
|
||||
!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
|
||||
if (!scsw_is_tm(&sch->schib.scsw)) {
|
||||
ret = cio_cancel(sch);
|
||||
if (ret != -EINVAL)
|
||||
return ret;
|
||||
}
|
||||
/* cancel io unsuccessful or not applicable (transport mode).
|
||||
* Continue with asynchronous instructions. */
|
||||
cdev->private->iretry = 3; /* 3 halt retries. */
|
||||
}
|
||||
if (!(scsw_actl(&sch->schib.scsw) & SCSW_ACTL_CLEAR_PEND)) {
|
||||
/* Stage 2: halt io. */
|
||||
if (cdev->private->iretry) {
|
||||
cdev->private->iretry--;
|
||||
ret = cio_halt(sch);
|
||||
if (ret != -EBUSY)
|
||||
return (ret == 0) ? -EBUSY : ret;
|
||||
}
|
||||
/* halt io unsuccessful. */
|
||||
cdev->private->iretry = 255; /* 255 clear retries. */
|
||||
}
|
||||
/* Stage 3: clear io. */
|
||||
if (cdev->private->iretry) {
|
||||
cdev->private->iretry--;
|
||||
ret = cio_clear (sch);
|
||||
return (ret == 0) ? -EBUSY : ret;
|
||||
}
|
||||
/* Function was unsuccessful */
|
||||
ret = cio_cancel_halt_clear(sch, &cdev->private->iretry);
|
||||
|
||||
if (ret == -EIO)
|
||||
CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n",
|
||||
cdev->private->dev_id.ssid, cdev->private->dev_id.devno);
|
||||
return -EIO;
|
||||
cdev->private->dev_id.ssid,
|
||||
cdev->private->dev_id.devno);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ccw_device_update_sense_data(struct ccw_device *cdev)
|
||||
|
|
|
@ -0,0 +1,842 @@
|
|||
/*
|
||||
* channel program interfaces
|
||||
*
|
||||
* Copyright IBM Corp. 2017
|
||||
*
|
||||
* Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
|
||||
* Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/vfio.h>
|
||||
#include <asm/idals.h>
|
||||
|
||||
#include "vfio_ccw_cp.h"
|
||||
|
||||
/*
|
||||
* Max length for ccw chain.
|
||||
* XXX: Limit to 256, need to check more?
|
||||
*/
|
||||
#define CCWCHAIN_LEN_MAX 256
|
||||
|
||||
struct pfn_array {
|
||||
unsigned long pa_iova;
|
||||
unsigned long *pa_iova_pfn;
|
||||
unsigned long *pa_pfn;
|
||||
int pa_nr;
|
||||
};
|
||||
|
||||
struct pfn_array_table {
|
||||
struct pfn_array *pat_pa;
|
||||
int pat_nr;
|
||||
};
|
||||
|
||||
struct ccwchain {
|
||||
struct list_head next;
|
||||
struct ccw1 *ch_ccw;
|
||||
/* Guest physical address of the current chain. */
|
||||
u64 ch_iova;
|
||||
/* Count of the valid ccws in chain. */
|
||||
int ch_len;
|
||||
/* Pinned PAGEs for the original data. */
|
||||
struct pfn_array_table *ch_pat;
|
||||
};
|
||||
|
||||
/*
|
||||
* pfn_array_pin() - pin user pages in memory
|
||||
* @pa: pfn_array on which to perform the operation
|
||||
* @mdev: the mediated device to perform pin/unpin operations
|
||||
*
|
||||
* Attempt to pin user pages in memory.
|
||||
*
|
||||
* Usage of pfn_array:
|
||||
* @pa->pa_iova starting guest physical I/O address. Assigned by caller.
|
||||
* @pa->pa_iova_pfn array that stores PFNs of the pages need to pin. Allocated
|
||||
* by caller.
|
||||
* @pa->pa_pfn array that receives PFNs of the pages pinned. Allocated by
|
||||
* caller.
|
||||
* @pa->pa_nr number of pages from @pa->pa_iova to pin. Assigned by
|
||||
* caller.
|
||||
* number of pages pinned. Assigned by callee.
|
||||
*
|
||||
* Returns:
|
||||
* Number of pages pinned on success.
|
||||
* If @pa->pa_nr is 0 or negative, returns 0.
|
||||
* If no pages were pinned, returns -errno.
|
||||
*/
|
||||
static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
if (pa->pa_nr <= 0) {
|
||||
pa->pa_nr = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
|
||||
for (i = 1; i < pa->pa_nr; i++)
|
||||
pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
|
||||
|
||||
ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
|
||||
IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
|
||||
|
||||
if (ret > 0 && ret != pa->pa_nr) {
|
||||
vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret);
|
||||
pa->pa_nr = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Unpin the pages before releasing the memory. */
|
||||
static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
|
||||
{
|
||||
vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
|
||||
pa->pa_nr = 0;
|
||||
kfree(pa->pa_iova_pfn);
|
||||
}
|
||||
|
||||
/* Alloc memory for PFNs, then pin pages with them. */
|
||||
static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
|
||||
u64 iova, unsigned int len)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (!len || pa->pa_nr)
|
||||
return -EINVAL;
|
||||
|
||||
pa->pa_iova = iova;
|
||||
|
||||
pa->pa_nr = ((iova & ~PAGE_MASK) + len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
||||
if (!pa->pa_nr)
|
||||
return -EINVAL;
|
||||
|
||||
pa->pa_iova_pfn = kcalloc(pa->pa_nr,
|
||||
sizeof(*pa->pa_iova_pfn) +
|
||||
sizeof(*pa->pa_pfn),
|
||||
GFP_KERNEL);
|
||||
if (unlikely(!pa->pa_iova_pfn))
|
||||
return -ENOMEM;
|
||||
pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
|
||||
|
||||
ret = pfn_array_pin(pa, mdev);
|
||||
|
||||
if (ret > 0)
|
||||
return ret;
|
||||
else if (!ret)
|
||||
ret = -EINVAL;
|
||||
|
||||
kfree(pa->pa_iova_pfn);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int pfn_array_table_init(struct pfn_array_table *pat, int nr)
|
||||
{
|
||||
pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
|
||||
if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) {
|
||||
pat->pat_nr = 0;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pat->pat_nr = nr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pfn_array_table_unpin_free(struct pfn_array_table *pat,
|
||||
struct device *mdev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pat->pat_nr; i++)
|
||||
pfn_array_unpin_free(pat->pat_pa + i, mdev);
|
||||
|
||||
if (pat->pat_nr) {
|
||||
kfree(pat->pat_pa);
|
||||
pat->pat_pa = NULL;
|
||||
pat->pat_nr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat,
|
||||
unsigned long iova)
|
||||
{
|
||||
struct pfn_array *pa = pat->pat_pa;
|
||||
unsigned long iova_pfn = iova >> PAGE_SHIFT;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < pat->pat_nr; i++, pa++)
|
||||
for (j = 0; j < pa->pa_nr; j++)
|
||||
if (pa->pa_iova_pfn[i] == iova_pfn)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
/* Create the list idal words for a pfn_array_table. */
|
||||
static inline void pfn_array_table_idal_create_words(
|
||||
struct pfn_array_table *pat,
|
||||
unsigned long *idaws)
|
||||
{
|
||||
struct pfn_array *pa;
|
||||
int i, j, k;
|
||||
|
||||
/*
|
||||
* Idal words (execept the first one) rely on the memory being 4k
|
||||
* aligned. If a user virtual address is 4K aligned, then it's
|
||||
* corresponding kernel physical address will also be 4K aligned. Thus
|
||||
* there will be no problem here to simply use the phys to create an
|
||||
* idaw.
|
||||
*/
|
||||
k = 0;
|
||||
for (i = 0; i < pat->pat_nr; i++) {
|
||||
pa = pat->pat_pa + i;
|
||||
for (j = 0; j < pa->pa_nr; j++) {
|
||||
idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT;
|
||||
if (k == 0)
|
||||
idaws[k] += pa->pa_iova & (PAGE_SIZE - 1);
|
||||
k++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Within the domain (@mdev), copy @n bytes from a guest physical
|
||||
* address (@iova) to a host physical address (@to).
|
||||
*/
|
||||
static long copy_from_iova(struct device *mdev,
|
||||
void *to, u64 iova,
|
||||
unsigned long n)
|
||||
{
|
||||
struct pfn_array pa = {0};
|
||||
u64 from;
|
||||
int i, ret;
|
||||
unsigned long l, m;
|
||||
|
||||
ret = pfn_array_alloc_pin(&pa, mdev, iova, n);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
l = n;
|
||||
for (i = 0; i < pa.pa_nr; i++) {
|
||||
from = pa.pa_pfn[i] << PAGE_SHIFT;
|
||||
m = PAGE_SIZE;
|
||||
if (i == 0) {
|
||||
from += iova & (PAGE_SIZE - 1);
|
||||
m -= iova & (PAGE_SIZE - 1);
|
||||
}
|
||||
|
||||
m = min(l, m);
|
||||
memcpy(to + (n - l), (void *)from, m);
|
||||
|
||||
l -= m;
|
||||
if (l == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
pfn_array_unpin_free(&pa, mdev);
|
||||
|
||||
return l;
|
||||
}
|
||||
|
||||
static long copy_ccw_from_iova(struct channel_program *cp,
|
||||
struct ccw1 *to, u64 iova,
|
||||
unsigned long len)
|
||||
{
|
||||
struct ccw0 ccw0;
|
||||
struct ccw1 *pccw1;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!cp->orb.cmd.fmt) {
|
||||
pccw1 = to;
|
||||
for (i = 0; i < len; i++) {
|
||||
ccw0 = *(struct ccw0 *)pccw1;
|
||||
if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
|
||||
pccw1->cmd_code = CCW_CMD_TIC;
|
||||
pccw1->flags = 0;
|
||||
pccw1->count = 0;
|
||||
} else {
|
||||
pccw1->cmd_code = ccw0.cmd_code;
|
||||
pccw1->flags = ccw0.flags;
|
||||
pccw1->count = ccw0.count;
|
||||
}
|
||||
pccw1->cda = ccw0.cda;
|
||||
pccw1++;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helpers to operate ccwchain.
|
||||
*/
|
||||
#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
|
||||
|
||||
#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
|
||||
|
||||
#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
|
||||
|
||||
#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
|
||||
|
||||
|
||||
#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
|
||||
|
||||
static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
|
||||
{
|
||||
struct ccwchain *chain;
|
||||
void *data;
|
||||
size_t size;
|
||||
|
||||
/* Make ccw address aligned to 8. */
|
||||
size = ((sizeof(*chain) + 7L) & -8L) +
|
||||
sizeof(*chain->ch_ccw) * len +
|
||||
sizeof(*chain->ch_pat) * len;
|
||||
chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
|
||||
if (!chain)
|
||||
return NULL;
|
||||
|
||||
data = (u8 *)chain + ((sizeof(*chain) + 7L) & -8L);
|
||||
chain->ch_ccw = (struct ccw1 *)data;
|
||||
|
||||
data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
|
||||
chain->ch_pat = (struct pfn_array_table *)data;
|
||||
|
||||
chain->ch_len = len;
|
||||
|
||||
list_add_tail(&chain->next, &cp->ccwchain_list);
|
||||
|
||||
return chain;
|
||||
}
|
||||
|
||||
static void ccwchain_free(struct ccwchain *chain)
|
||||
{
|
||||
list_del(&chain->next);
|
||||
kfree(chain);
|
||||
}
|
||||
|
||||
/* Free resource for a ccw that allocated memory for its cda. */
|
||||
static void ccwchain_cda_free(struct ccwchain *chain, int idx)
|
||||
{
|
||||
struct ccw1 *ccw = chain->ch_ccw + idx;
|
||||
|
||||
if (!ccw->count)
|
||||
return;
|
||||
|
||||
kfree((void *)(u64)ccw->cda);
|
||||
}
|
||||
|
||||
/* Unpin the pages then free the memory resources. */
|
||||
static void cp_unpin_free(struct channel_program *cp)
|
||||
{
|
||||
struct ccwchain *chain, *temp;
|
||||
int i;
|
||||
|
||||
list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
|
||||
for (i = 0; i < chain->ch_len; i++) {
|
||||
pfn_array_table_unpin_free(chain->ch_pat + i,
|
||||
cp->mdev);
|
||||
ccwchain_cda_free(chain, i);
|
||||
}
|
||||
ccwchain_free(chain);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ccwchain_calc_length - calculate the length of the ccw chain.
|
||||
* @iova: guest physical address of the target ccw chain
|
||||
* @cp: channel_program on which to perform the operation
|
||||
*
|
||||
* This is the chain length not considering any TICs.
|
||||
* You need to do a new round for each TIC target.
|
||||
*
|
||||
* Returns: the length of the ccw chain or -errno.
|
||||
*/
|
||||
static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
|
||||
{
|
||||
struct ccw1 *ccw, *p;
|
||||
int cnt;
|
||||
|
||||
/*
|
||||
* Copy current chain from guest to host kernel.
|
||||
* Currently the chain length is limited to CCWCHAIN_LEN_MAX (256).
|
||||
* So copying 2K is enough (safe).
|
||||
*/
|
||||
p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL);
|
||||
if (!ccw)
|
||||
return -ENOMEM;
|
||||
|
||||
cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX);
|
||||
if (cnt) {
|
||||
kfree(ccw);
|
||||
return cnt;
|
||||
}
|
||||
|
||||
cnt = 0;
|
||||
do {
|
||||
cnt++;
|
||||
|
||||
if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw)))
|
||||
break;
|
||||
|
||||
ccw++;
|
||||
} while (cnt < CCWCHAIN_LEN_MAX + 1);
|
||||
|
||||
if (cnt == CCWCHAIN_LEN_MAX + 1)
|
||||
cnt = -EINVAL;
|
||||
|
||||
kfree(p);
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
|
||||
{
|
||||
struct ccwchain *chain;
|
||||
u32 ccw_head, ccw_tail;
|
||||
|
||||
list_for_each_entry(chain, &cp->ccwchain_list, next) {
|
||||
ccw_head = chain->ch_iova;
|
||||
ccw_tail = ccw_head + (chain->ch_len - 1) * sizeof(struct ccw1);
|
||||
|
||||
if ((ccw_head <= tic->cda) && (tic->cda <= ccw_tail))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ccwchain_loop_tic(struct ccwchain *chain,
|
||||
struct channel_program *cp);
|
||||
|
||||
static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
|
||||
{
|
||||
struct ccwchain *chain;
|
||||
int len, ret;
|
||||
|
||||
/* May transfer to an existing chain. */
|
||||
if (tic_target_chain_exists(tic, cp))
|
||||
return 0;
|
||||
|
||||
/* Get chain length. */
|
||||
len = ccwchain_calc_length(tic->cda, cp);
|
||||
if (len < 0)
|
||||
return len;
|
||||
|
||||
/* Need alloc a new chain for this one. */
|
||||
chain = ccwchain_alloc(cp, len);
|
||||
if (!chain)
|
||||
return -ENOMEM;
|
||||
chain->ch_iova = tic->cda;
|
||||
|
||||
/* Copy the new chain from user. */
|
||||
ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len);
|
||||
if (ret) {
|
||||
ccwchain_free(chain);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Loop for tics on this new chain. */
|
||||
return ccwchain_loop_tic(chain, cp);
|
||||
}
|
||||
|
||||
/* Loop for TICs. */
|
||||
static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
|
||||
{
|
||||
struct ccw1 *tic;
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < chain->ch_len; i++) {
|
||||
tic = chain->ch_ccw + i;
|
||||
|
||||
if (!ccw_is_tic(tic))
|
||||
continue;
|
||||
|
||||
ret = ccwchain_handle_tic(tic, cp);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ccwchain_fetch_tic(struct ccwchain *chain,
|
||||
int idx,
|
||||
struct channel_program *cp)
|
||||
{
|
||||
struct ccw1 *ccw = chain->ch_ccw + idx;
|
||||
struct ccwchain *iter;
|
||||
u32 ccw_head, ccw_tail;
|
||||
|
||||
list_for_each_entry(iter, &cp->ccwchain_list, next) {
|
||||
ccw_head = iter->ch_iova;
|
||||
ccw_tail = ccw_head + (iter->ch_len - 1) * sizeof(struct ccw1);
|
||||
|
||||
if ((ccw_head <= ccw->cda) && (ccw->cda <= ccw_tail)) {
|
||||
ccw->cda = (__u32) (addr_t) (iter->ch_ccw +
|
||||
(ccw->cda - ccw_head));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static int ccwchain_fetch_direct(struct ccwchain *chain,
|
||||
int idx,
|
||||
struct channel_program *cp)
|
||||
{
|
||||
struct ccw1 *ccw;
|
||||
struct pfn_array_table *pat;
|
||||
unsigned long *idaws;
|
||||
int idaw_nr;
|
||||
|
||||
ccw = chain->ch_ccw + idx;
|
||||
|
||||
/*
|
||||
* Pin data page(s) in memory.
|
||||
* The number of pages actually is the count of the idaws which will be
|
||||
* needed when translating a direct ccw to a idal ccw.
|
||||
*/
|
||||
pat = chain->ch_pat + idx;
|
||||
if (pfn_array_table_init(pat, 1))
|
||||
return -ENOMEM;
|
||||
idaw_nr = pfn_array_alloc_pin(pat->pat_pa, cp->mdev,
|
||||
ccw->cda, ccw->count);
|
||||
if (idaw_nr < 0)
|
||||
return idaw_nr;
|
||||
|
||||
/* Translate this direct ccw to a idal ccw. */
|
||||
idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
|
||||
if (!idaws) {
|
||||
pfn_array_table_unpin_free(pat, cp->mdev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
ccw->cda = (__u32) virt_to_phys(idaws);
|
||||
ccw->flags |= CCW_FLAG_IDA;
|
||||
|
||||
pfn_array_table_idal_create_words(pat, idaws);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ccwchain_fetch_idal(struct ccwchain *chain,
|
||||
int idx,
|
||||
struct channel_program *cp)
|
||||
{
|
||||
struct ccw1 *ccw;
|
||||
struct pfn_array_table *pat;
|
||||
unsigned long *idaws;
|
||||
u64 idaw_iova;
|
||||
unsigned int idaw_nr, idaw_len;
|
||||
int i, ret;
|
||||
|
||||
ccw = chain->ch_ccw + idx;
|
||||
|
||||
/* Calculate size of idaws. */
|
||||
ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova));
|
||||
if (ret)
|
||||
return ret;
|
||||
idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count);
|
||||
idaw_len = idaw_nr * sizeof(*idaws);
|
||||
|
||||
/* Pin data page(s) in memory. */
|
||||
pat = chain->ch_pat + idx;
|
||||
ret = pfn_array_table_init(pat, idaw_nr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Translate idal ccw to use new allocated idaws. */
|
||||
idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL);
|
||||
if (!idaws) {
|
||||
ret = -ENOMEM;
|
||||
goto out_unpin;
|
||||
}
|
||||
|
||||
ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len);
|
||||
if (ret)
|
||||
goto out_free_idaws;
|
||||
|
||||
ccw->cda = virt_to_phys(idaws);
|
||||
|
||||
for (i = 0; i < idaw_nr; i++) {
|
||||
idaw_iova = *(idaws + i);
|
||||
if (IS_ERR_VALUE(idaw_iova)) {
|
||||
ret = -EFAULT;
|
||||
goto out_free_idaws;
|
||||
}
|
||||
|
||||
ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev,
|
||||
idaw_iova, 1);
|
||||
if (ret < 0)
|
||||
goto out_free_idaws;
|
||||
}
|
||||
|
||||
pfn_array_table_idal_create_words(pat, idaws);
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_idaws:
|
||||
kfree(idaws);
|
||||
out_unpin:
|
||||
pfn_array_table_unpin_free(pat, cp->mdev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch one ccw.
|
||||
* To reduce memory copy, we'll pin the cda page in memory,
|
||||
* and to get rid of the cda 2G limitiaion of ccw1, we'll translate
|
||||
* direct ccws to idal ccws.
|
||||
*/
|
||||
static int ccwchain_fetch_one(struct ccwchain *chain,
|
||||
int idx,
|
||||
struct channel_program *cp)
|
||||
{
|
||||
struct ccw1 *ccw = chain->ch_ccw + idx;
|
||||
|
||||
if (ccw_is_test(ccw) || ccw_is_noop(ccw))
|
||||
return 0;
|
||||
|
||||
if (ccw_is_tic(ccw))
|
||||
return ccwchain_fetch_tic(chain, idx, cp);
|
||||
|
||||
if (ccw_is_idal(ccw))
|
||||
return ccwchain_fetch_idal(chain, idx, cp);
|
||||
|
||||
return ccwchain_fetch_direct(chain, idx, cp);
|
||||
}
|
||||
|
||||
/**
|
||||
* cp_init() - allocate ccwchains for a channel program.
|
||||
* @cp: channel_program on which to perform the operation
|
||||
* @mdev: the mediated device to perform pin/unpin operations
|
||||
* @orb: control block for the channel program from the guest
|
||||
*
|
||||
* This creates one or more ccwchain(s), and copies the raw data of
|
||||
* the target channel program from @orb->cmd.iova to the new ccwchain(s).
|
||||
*
|
||||
* Limitations:
|
||||
* 1. Supports only prefetch enabled mode.
|
||||
* 2. Supports idal(c64) ccw chaining.
|
||||
* 3. Supports 4k idaw.
|
||||
*
|
||||
* Returns:
|
||||
* %0 on success and a negative error value on failure.
|
||||
*/
|
||||
int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
|
||||
{
|
||||
u64 iova = orb->cmd.cpa;
|
||||
struct ccwchain *chain;
|
||||
int len, ret;
|
||||
|
||||
/*
|
||||
* XXX:
|
||||
* Only support prefetch enable mode now.
|
||||
* Only support 64bit addressing idal.
|
||||
* Only support 4k IDAW.
|
||||
*/
|
||||
if (!orb->cmd.pfch || !orb->cmd.c64 || orb->cmd.i2k)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
INIT_LIST_HEAD(&cp->ccwchain_list);
|
||||
memcpy(&cp->orb, orb, sizeof(*orb));
|
||||
cp->mdev = mdev;
|
||||
|
||||
/* Get chain length. */
|
||||
len = ccwchain_calc_length(iova, cp);
|
||||
if (len < 0)
|
||||
return len;
|
||||
|
||||
/* Alloc mem for the head chain. */
|
||||
chain = ccwchain_alloc(cp, len);
|
||||
if (!chain)
|
||||
return -ENOMEM;
|
||||
chain->ch_iova = iova;
|
||||
|
||||
/* Copy the head chain from guest. */
|
||||
ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len);
|
||||
if (ret) {
|
||||
ccwchain_free(chain);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Now loop for its TICs. */
|
||||
ret = ccwchain_loop_tic(chain, cp);
|
||||
if (ret)
|
||||
cp_unpin_free(cp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* cp_free() - free resources for channel program.
|
||||
* @cp: channel_program on which to perform the operation
|
||||
*
|
||||
* This unpins the memory pages and frees the memory space occupied by
|
||||
* @cp, which must have been returned by a previous call to cp_init().
|
||||
* Otherwise, undefined behavior occurs.
|
||||
*/
|
||||
void cp_free(struct channel_program *cp)
|
||||
{
|
||||
cp_unpin_free(cp);
|
||||
}
|
||||
|
||||
/**
|
||||
* cp_prefetch() - translate a guest physical address channel program to
|
||||
* a real-device runnable channel program.
|
||||
* @cp: channel_program on which to perform the operation
|
||||
*
|
||||
* This function translates the guest-physical-address channel program
|
||||
* and stores the result to ccwchain list. @cp must have been
|
||||
* initialized by a previous call with cp_init(). Otherwise, undefined
|
||||
* behavior occurs.
|
||||
*
|
||||
* The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
|
||||
* as helpers to do ccw chain translation inside the kernel. Basically
|
||||
* they accept a channel program issued by a virtual machine, and
|
||||
* translate the channel program to a real-device runnable channel
|
||||
* program.
|
||||
*
|
||||
* These APIs will copy the ccws into kernel-space buffers, and update
|
||||
* the guest phsical addresses with their corresponding host physical
|
||||
* addresses. Then channel I/O device drivers could issue the
|
||||
* translated channel program to real devices to perform an I/O
|
||||
* operation.
|
||||
*
|
||||
* These interfaces are designed to support translation only for
|
||||
* channel programs, which are generated and formatted by a
|
||||
* guest. Thus this will make it possible for things like VFIO to
|
||||
* leverage the interfaces to passthrough a channel I/O mediated
|
||||
* device in QEMU.
|
||||
*
|
||||
* We support direct ccw chaining by translating them to idal ccws.
|
||||
*
|
||||
* Returns:
|
||||
* %0 on success and a negative error value on failure.
|
||||
*/
|
||||
int cp_prefetch(struct channel_program *cp)
|
||||
{
|
||||
struct ccwchain *chain;
|
||||
int len, idx, ret;
|
||||
|
||||
list_for_each_entry(chain, &cp->ccwchain_list, next) {
|
||||
len = chain->ch_len;
|
||||
for (idx = 0; idx < len; idx++) {
|
||||
ret = ccwchain_fetch_one(chain, idx, cp);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cp_get_orb() - get the orb of the channel program
|
||||
* @cp: channel_program on which to perform the operation
|
||||
* @intparm: new intparm for the returned orb
|
||||
* @lpm: candidate value of the logical-path mask for the returned orb
|
||||
*
|
||||
* This function returns the address of the updated orb of the channel
|
||||
* program. Channel I/O device drivers could use this orb to issue a
|
||||
* ssch.
|
||||
*/
|
||||
union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm)
|
||||
{
|
||||
union orb *orb;
|
||||
struct ccwchain *chain;
|
||||
struct ccw1 *cpa;
|
||||
|
||||
orb = &cp->orb;
|
||||
|
||||
orb->cmd.intparm = intparm;
|
||||
orb->cmd.fmt = 1;
|
||||
orb->cmd.key = PAGE_DEFAULT_KEY >> 4;
|
||||
|
||||
if (orb->cmd.lpm == 0)
|
||||
orb->cmd.lpm = lpm;
|
||||
|
||||
chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
|
||||
cpa = chain->ch_ccw;
|
||||
orb->cmd.cpa = (__u32) __pa(cpa);
|
||||
|
||||
return orb;
|
||||
}
|
||||
|
||||
/**
|
||||
* cp_update_scsw() - update scsw for a channel program.
|
||||
* @cp: channel_program on which to perform the operation
|
||||
* @scsw: I/O results of the channel program and also the target to be
|
||||
* updated
|
||||
*
|
||||
* @scsw contains the I/O results of the channel program that pointed
|
||||
* to by @cp. However what @scsw->cpa stores is a host physical
|
||||
* address, which is meaningless for the guest, which is waiting for
|
||||
* the I/O results.
|
||||
*
|
||||
* This function updates @scsw->cpa to its coressponding guest physical
|
||||
* address.
|
||||
*/
|
||||
void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
|
||||
{
|
||||
struct ccwchain *chain;
|
||||
u32 cpa = scsw->cmd.cpa;
|
||||
u32 ccw_head, ccw_tail;
|
||||
|
||||
/*
|
||||
* LATER:
|
||||
* For now, only update the cmd.cpa part. We may need to deal with
|
||||
* other portions of the schib as well, even if we don't return them
|
||||
* in the ioctl directly. Path status changes etc.
|
||||
*/
|
||||
list_for_each_entry(chain, &cp->ccwchain_list, next) {
|
||||
ccw_head = (u32)(u64)chain->ch_ccw;
|
||||
ccw_tail = (u32)(u64)(chain->ch_ccw + chain->ch_len - 1);
|
||||
|
||||
if ((ccw_head <= cpa) && (cpa <= ccw_tail)) {
|
||||
/*
|
||||
* (cpa - ccw_head) is the offset value of the host
|
||||
* physical ccw to its chain head.
|
||||
* Adding this value to the guest physical ccw chain
|
||||
* head gets us the guest cpa.
|
||||
*/
|
||||
cpa = chain->ch_iova + (cpa - ccw_head);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
scsw->cmd.cpa = cpa;
|
||||
}
|
||||
|
||||
/**
|
||||
* cp_iova_pinned() - check if an iova is pinned for a ccw chain.
|
||||
* @cmd: ccwchain command on which to perform the operation
|
||||
* @iova: the iova to check
|
||||
*
|
||||
* If the @iova is currently pinned for the ccw chain, return true;
|
||||
* else return false.
|
||||
*/
|
||||
bool cp_iova_pinned(struct channel_program *cp, u64 iova)
|
||||
{
|
||||
struct ccwchain *chain;
|
||||
int i;
|
||||
|
||||
list_for_each_entry(chain, &cp->ccwchain_list, next) {
|
||||
for (i = 0; i < chain->ch_len; i++)
|
||||
if (pfn_array_table_iova_pinned(chain->ch_pat + i,
|
||||
iova))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* channel program interfaces
|
||||
*
|
||||
* Copyright IBM Corp. 2017
|
||||
*
|
||||
* Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
|
||||
* Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef _VFIO_CCW_CP_H_
|
||||
#define _VFIO_CCW_CP_H_
|
||||
|
||||
#include <asm/cio.h>
|
||||
#include <asm/scsw.h>
|
||||
|
||||
#include "orb.h"
|
||||
|
||||
/**
|
||||
* struct channel_program - manage information for channel program
|
||||
* @ccwchain_list: list head of ccwchains
|
||||
* @orb: orb for the currently processed ssch request
|
||||
* @mdev: the mediated device to perform page pinning/unpinning
|
||||
*
|
||||
* @ccwchain_list is the head of a ccwchain list, that contents the
|
||||
* translated result of the guest channel program that pointed out by
|
||||
* the iova parameter when calling cp_init.
|
||||
*/
|
||||
struct channel_program {
|
||||
struct list_head ccwchain_list;
|
||||
union orb orb;
|
||||
struct device *mdev;
|
||||
};
|
||||
|
||||
extern int cp_init(struct channel_program *cp, struct device *mdev,
|
||||
union orb *orb);
|
||||
extern void cp_free(struct channel_program *cp);
|
||||
extern int cp_prefetch(struct channel_program *cp);
|
||||
extern union orb *cp_get_orb(struct channel_program *cp, u32 intparm, u8 lpm);
|
||||
extern void cp_update_scsw(struct channel_program *cp, union scsw *scsw);
|
||||
extern bool cp_iova_pinned(struct channel_program *cp, u64 iova);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,308 @@
|
|||
/*
|
||||
* VFIO based Physical Subchannel device driver
|
||||
*
|
||||
* Copyright IBM Corp. 2017
|
||||
*
|
||||
* Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
|
||||
* Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uuid.h>
|
||||
#include <linux/mdev.h>
|
||||
|
||||
#include <asm/isc.h>
|
||||
|
||||
#include "ioasm.h"
|
||||
#include "css.h"
|
||||
#include "vfio_ccw_private.h"
|
||||
|
||||
struct workqueue_struct *vfio_ccw_work_q;
|
||||
|
||||
/*
|
||||
* Helpers
|
||||
*/
|
||||
int vfio_ccw_sch_quiesce(struct subchannel *sch)
|
||||
{
|
||||
struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
|
||||
DECLARE_COMPLETION_ONSTACK(completion);
|
||||
int iretry, ret = 0;
|
||||
|
||||
spin_lock_irq(sch->lock);
|
||||
if (!sch->schib.pmcw.ena)
|
||||
goto out_unlock;
|
||||
ret = cio_disable_subchannel(sch);
|
||||
if (ret != -EBUSY)
|
||||
goto out_unlock;
|
||||
|
||||
do {
|
||||
iretry = 255;
|
||||
|
||||
ret = cio_cancel_halt_clear(sch, &iretry);
|
||||
while (ret == -EBUSY) {
|
||||
/*
|
||||
* Flush all I/O and wait for
|
||||
* cancel/halt/clear completion.
|
||||
*/
|
||||
private->completion = &completion;
|
||||
spin_unlock_irq(sch->lock);
|
||||
|
||||
wait_for_completion_timeout(&completion, 3*HZ);
|
||||
|
||||
spin_lock_irq(sch->lock);
|
||||
private->completion = NULL;
|
||||
flush_workqueue(vfio_ccw_work_q);
|
||||
ret = cio_cancel_halt_clear(sch, &iretry);
|
||||
};
|
||||
|
||||
ret = cio_disable_subchannel(sch);
|
||||
} while (ret == -EBUSY);
|
||||
out_unlock:
|
||||
private->state = VFIO_CCW_STATE_NOT_OPER;
|
||||
spin_unlock_irq(sch->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vfio_ccw_sch_io_todo(struct work_struct *work)
|
||||
{
|
||||
struct vfio_ccw_private *private;
|
||||
struct subchannel *sch;
|
||||
struct irb *irb;
|
||||
|
||||
private = container_of(work, struct vfio_ccw_private, io_work);
|
||||
irb = &private->irb;
|
||||
sch = private->sch;
|
||||
|
||||
if (scsw_is_solicited(&irb->scsw)) {
|
||||
cp_update_scsw(&private->cp, &irb->scsw);
|
||||
cp_free(&private->cp);
|
||||
}
|
||||
memcpy(private->io_region.irb_area, irb, sizeof(*irb));
|
||||
|
||||
if (private->io_trigger)
|
||||
eventfd_signal(private->io_trigger, 1);
|
||||
|
||||
if (private->mdev)
|
||||
private->state = VFIO_CCW_STATE_IDLE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sysfs interfaces
|
||||
*/
|
||||
static ssize_t chpids_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct subchannel *sch = to_subchannel(dev);
|
||||
struct chsc_ssd_info *ssd = &sch->ssd_info;
|
||||
ssize_t ret = 0;
|
||||
int chp;
|
||||
int mask;
|
||||
|
||||
for (chp = 0; chp < 8; chp++) {
|
||||
mask = 0x80 >> chp;
|
||||
if (ssd->path_mask & mask)
|
||||
ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id);
|
||||
else
|
||||
ret += sprintf(buf + ret, "00 ");
|
||||
}
|
||||
ret += sprintf(buf+ret, "\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t pimpampom_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct subchannel *sch = to_subchannel(dev);
|
||||
struct pmcw *pmcw = &sch->schib.pmcw;
|
||||
|
||||
return sprintf(buf, "%02x %02x %02x\n",
|
||||
pmcw->pim, pmcw->pam, pmcw->pom);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(chpids, 0444, chpids_show, NULL);
|
||||
static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL);
|
||||
|
||||
static struct attribute *vfio_subchannel_attrs[] = {
|
||||
&dev_attr_chpids.attr,
|
||||
&dev_attr_pimpampom.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group vfio_subchannel_attr_group = {
|
||||
.attrs = vfio_subchannel_attrs,
|
||||
};
|
||||
|
||||
/*
|
||||
* Css driver callbacks
|
||||
*/
|
||||
static void vfio_ccw_sch_irq(struct subchannel *sch)
|
||||
{
|
||||
struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
|
||||
|
||||
inc_irq_stat(IRQIO_CIO);
|
||||
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
|
||||
}
|
||||
|
||||
static int vfio_ccw_sch_probe(struct subchannel *sch)
|
||||
{
|
||||
struct pmcw *pmcw = &sch->schib.pmcw;
|
||||
struct vfio_ccw_private *private;
|
||||
int ret;
|
||||
|
||||
if (pmcw->qf) {
|
||||
dev_warn(&sch->dev, "vfio: ccw: does not support QDIO: %s\n",
|
||||
dev_name(&sch->dev));
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
|
||||
if (!private)
|
||||
return -ENOMEM;
|
||||
private->sch = sch;
|
||||
dev_set_drvdata(&sch->dev, private);
|
||||
|
||||
spin_lock_irq(sch->lock);
|
||||
private->state = VFIO_CCW_STATE_NOT_OPER;
|
||||
sch->isc = VFIO_CCW_ISC;
|
||||
ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
|
||||
spin_unlock_irq(sch->lock);
|
||||
if (ret)
|
||||
goto out_free;
|
||||
|
||||
ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
|
||||
if (ret)
|
||||
goto out_disable;
|
||||
|
||||
ret = vfio_ccw_mdev_reg(sch);
|
||||
if (ret)
|
||||
goto out_rm_group;
|
||||
|
||||
INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
|
||||
atomic_set(&private->avail, 1);
|
||||
private->state = VFIO_CCW_STATE_STANDBY;
|
||||
|
||||
return 0;
|
||||
|
||||
out_rm_group:
|
||||
sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
|
||||
out_disable:
|
||||
cio_disable_subchannel(sch);
|
||||
out_free:
|
||||
dev_set_drvdata(&sch->dev, NULL);
|
||||
kfree(private);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfio_ccw_sch_remove(struct subchannel *sch)
|
||||
{
|
||||
struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
|
||||
|
||||
vfio_ccw_sch_quiesce(sch);
|
||||
|
||||
vfio_ccw_mdev_unreg(sch);
|
||||
|
||||
sysfs_remove_group(&sch->dev.kobj, &vfio_subchannel_attr_group);
|
||||
|
||||
dev_set_drvdata(&sch->dev, NULL);
|
||||
|
||||
kfree(private);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vfio_ccw_sch_shutdown(struct subchannel *sch)
|
||||
{
|
||||
vfio_ccw_sch_quiesce(sch);
|
||||
}
|
||||
|
||||
/**
|
||||
* vfio_ccw_sch_event - process subchannel event
|
||||
* @sch: subchannel
|
||||
* @process: non-zero if function is called in process context
|
||||
*
|
||||
* An unspecified event occurred for this subchannel. Adjust data according
|
||||
* to the current operational state of the subchannel. Return zero when the
|
||||
* event has been handled sufficiently or -EAGAIN when this function should
|
||||
* be called again in process context.
|
||||
*/
|
||||
static int vfio_ccw_sch_event(struct subchannel *sch, int process)
|
||||
{
|
||||
struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(sch->lock, flags);
|
||||
if (!device_is_registered(&sch->dev))
|
||||
goto out_unlock;
|
||||
|
||||
if (work_pending(&sch->todo_work))
|
||||
goto out_unlock;
|
||||
|
||||
if (cio_update_schib(sch)) {
|
||||
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
private = dev_get_drvdata(&sch->dev);
|
||||
if (private->state == VFIO_CCW_STATE_NOT_OPER) {
|
||||
private->state = private->mdev ? VFIO_CCW_STATE_IDLE :
|
||||
VFIO_CCW_STATE_STANDBY;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irqrestore(sch->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct css_device_id vfio_ccw_sch_ids[] = {
|
||||
{ .match_flags = 0x1, .type = SUBCHANNEL_TYPE_IO, },
|
||||
{ /* end of list */ },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(css, vfio_ccw_sch_ids);
|
||||
|
||||
static struct css_driver vfio_ccw_sch_driver = {
|
||||
.drv = {
|
||||
.name = "vfio_ccw",
|
||||
.owner = THIS_MODULE,
|
||||
},
|
||||
.subchannel_type = vfio_ccw_sch_ids,
|
||||
.irq = vfio_ccw_sch_irq,
|
||||
.probe = vfio_ccw_sch_probe,
|
||||
.remove = vfio_ccw_sch_remove,
|
||||
.shutdown = vfio_ccw_sch_shutdown,
|
||||
.sch_event = vfio_ccw_sch_event,
|
||||
};
|
||||
|
||||
static int __init vfio_ccw_sch_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
vfio_ccw_work_q = create_singlethread_workqueue("vfio-ccw");
|
||||
if (!vfio_ccw_work_q)
|
||||
return -ENOMEM;
|
||||
|
||||
isc_register(VFIO_CCW_ISC);
|
||||
ret = css_driver_register(&vfio_ccw_sch_driver);
|
||||
if (ret) {
|
||||
isc_unregister(VFIO_CCW_ISC);
|
||||
destroy_workqueue(vfio_ccw_work_q);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit vfio_ccw_sch_exit(void)
|
||||
{
|
||||
css_driver_unregister(&vfio_ccw_sch_driver);
|
||||
isc_unregister(VFIO_CCW_ISC);
|
||||
destroy_workqueue(vfio_ccw_work_q);
|
||||
}
|
||||
module_init(vfio_ccw_sch_init);
|
||||
module_exit(vfio_ccw_sch_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
* Finite state machine for vfio-ccw device handling
|
||||
*
|
||||
* Copyright IBM Corp. 2017
|
||||
*
|
||||
* Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/mdev.h>
|
||||
|
||||
#include "ioasm.h"
|
||||
#include "vfio_ccw_private.h"
|
||||
|
||||
static int fsm_io_helper(struct vfio_ccw_private *private)
|
||||
{
|
||||
struct subchannel *sch;
|
||||
union orb *orb;
|
||||
int ccode;
|
||||
__u8 lpm;
|
||||
unsigned long flags;
|
||||
|
||||
sch = private->sch;
|
||||
|
||||
spin_lock_irqsave(sch->lock, flags);
|
||||
private->state = VFIO_CCW_STATE_BUSY;
|
||||
spin_unlock_irqrestore(sch->lock, flags);
|
||||
|
||||
orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
|
||||
|
||||
/* Issue "Start Subchannel" */
|
||||
ccode = ssch(sch->schid, orb);
|
||||
|
||||
switch (ccode) {
|
||||
case 0:
|
||||
/*
|
||||
* Initialize device status information
|
||||
*/
|
||||
sch->schib.scsw.cmd.actl |= SCSW_ACTL_START_PEND;
|
||||
return 0;
|
||||
case 1: /* Status pending */
|
||||
case 2: /* Busy */
|
||||
return -EBUSY;
|
||||
case 3: /* Device/path not operational */
|
||||
{
|
||||
lpm = orb->cmd.lpm;
|
||||
if (lpm != 0)
|
||||
sch->lpm &= ~lpm;
|
||||
else
|
||||
sch->lpm = 0;
|
||||
|
||||
if (cio_update_schib(sch))
|
||||
return -ENODEV;
|
||||
|
||||
return sch->lpm ? -EACCES : -ENODEV;
|
||||
}
|
||||
default:
|
||||
return ccode;
|
||||
}
|
||||
}
|
||||
|
||||
static void fsm_notoper(struct vfio_ccw_private *private,
|
||||
enum vfio_ccw_event event)
|
||||
{
|
||||
struct subchannel *sch = private->sch;
|
||||
|
||||
/*
|
||||
* TODO:
|
||||
* Probably we should send the machine check to the guest.
|
||||
*/
|
||||
css_sched_sch_todo(sch, SCH_TODO_UNREG);
|
||||
private->state = VFIO_CCW_STATE_NOT_OPER;
|
||||
}
|
||||
|
||||
/*
|
||||
* No operation action.
|
||||
*/
|
||||
static void fsm_nop(struct vfio_ccw_private *private,
|
||||
enum vfio_ccw_event event)
|
||||
{
|
||||
}
|
||||
|
||||
static void fsm_io_error(struct vfio_ccw_private *private,
|
||||
enum vfio_ccw_event event)
|
||||
{
|
||||
pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state);
|
||||
private->io_region.ret_code = -EIO;
|
||||
}
|
||||
|
||||
static void fsm_io_busy(struct vfio_ccw_private *private,
|
||||
enum vfio_ccw_event event)
|
||||
{
|
||||
private->io_region.ret_code = -EBUSY;
|
||||
}
|
||||
|
||||
static void fsm_disabled_irq(struct vfio_ccw_private *private,
|
||||
enum vfio_ccw_event event)
|
||||
{
|
||||
struct subchannel *sch = private->sch;
|
||||
|
||||
/*
|
||||
* An interrupt in a disabled state means a previous disable was not
|
||||
* successful - should not happen, but we try to disable again.
|
||||
*/
|
||||
cio_disable_subchannel(sch);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deal with the ccw command request from the userspace.
|
||||
*/
|
||||
static void fsm_io_request(struct vfio_ccw_private *private,
|
||||
enum vfio_ccw_event event)
|
||||
{
|
||||
union orb *orb;
|
||||
union scsw *scsw = &private->scsw;
|
||||
struct ccw_io_region *io_region = &private->io_region;
|
||||
struct mdev_device *mdev = private->mdev;
|
||||
|
||||
private->state = VFIO_CCW_STATE_BOXED;
|
||||
|
||||
memcpy(scsw, io_region->scsw_area, sizeof(*scsw));
|
||||
|
||||
if (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) {
|
||||
orb = (union orb *)io_region->orb_area;
|
||||
|
||||
io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev),
|
||||
orb);
|
||||
if (io_region->ret_code)
|
||||
goto err_out;
|
||||
|
||||
io_region->ret_code = cp_prefetch(&private->cp);
|
||||
if (io_region->ret_code) {
|
||||
cp_free(&private->cp);
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
/* Start channel program and wait for I/O interrupt. */
|
||||
io_region->ret_code = fsm_io_helper(private);
|
||||
if (io_region->ret_code) {
|
||||
cp_free(&private->cp);
|
||||
goto err_out;
|
||||
}
|
||||
return;
|
||||
} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
|
||||
/* XXX: Handle halt. */
|
||||
io_region->ret_code = -EOPNOTSUPP;
|
||||
goto err_out;
|
||||
} else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
|
||||
/* XXX: Handle clear. */
|
||||
io_region->ret_code = -EOPNOTSUPP;
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
err_out:
|
||||
private->state = VFIO_CCW_STATE_IDLE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Got an interrupt for a normal io (state busy).
|
||||
*/
|
||||
static void fsm_irq(struct vfio_ccw_private *private,
|
||||
enum vfio_ccw_event event)
|
||||
{
|
||||
struct irb *irb;
|
||||
|
||||
if (!private)
|
||||
return;
|
||||
|
||||
irb = this_cpu_ptr(&cio_irb);
|
||||
memcpy(&private->irb, irb, sizeof(*irb));
|
||||
|
||||
queue_work(vfio_ccw_work_q, &private->io_work);
|
||||
|
||||
if (private->completion)
|
||||
complete(private->completion);
|
||||
}
|
||||
|
||||
/*
|
||||
* Device statemachine
|
||||
*/
|
||||
fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
|
||||
[VFIO_CCW_STATE_NOT_OPER] = {
|
||||
[VFIO_CCW_EVENT_NOT_OPER] = fsm_nop,
|
||||
[VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
|
||||
[VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq,
|
||||
},
|
||||
[VFIO_CCW_STATE_STANDBY] = {
|
||||
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
|
||||
[VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
|
||||
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
|
||||
},
|
||||
[VFIO_CCW_STATE_IDLE] = {
|
||||
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
|
||||
[VFIO_CCW_EVENT_IO_REQ] = fsm_io_request,
|
||||
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
|
||||
},
|
||||
[VFIO_CCW_STATE_BOXED] = {
|
||||
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
|
||||
[VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
|
||||
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
|
||||
},
|
||||
[VFIO_CCW_STATE_BUSY] = {
|
||||
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
|
||||
[VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
|
||||
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
|
||||
},
|
||||
};
|
|
@ -0,0 +1,447 @@
|
|||
/*
|
||||
* Physical device callbacks for vfio_ccw
|
||||
*
|
||||
* Copyright IBM Corp. 2017
|
||||
*
|
||||
* Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
|
||||
* Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/mdev.h>
|
||||
|
||||
#include "vfio_ccw_private.h"
|
||||
|
||||
static int vfio_ccw_mdev_reset(struct mdev_device *mdev)
|
||||
{
|
||||
struct vfio_ccw_private *private;
|
||||
struct subchannel *sch;
|
||||
int ret;
|
||||
|
||||
private = dev_get_drvdata(mdev_parent_dev(mdev));
|
||||
if (!private)
|
||||
return -ENODEV;
|
||||
|
||||
sch = private->sch;
|
||||
/*
|
||||
* TODO:
|
||||
* In the cureent stage, some things like "no I/O running" and "no
|
||||
* interrupt pending" are clear, but we are not sure what other state
|
||||
* we need to care about.
|
||||
* There are still a lot more instructions need to be handled. We
|
||||
* should come back here later.
|
||||
*/
|
||||
ret = vfio_ccw_sch_quiesce(sch);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
|
||||
if (!ret)
|
||||
private->state = VFIO_CCW_STATE_IDLE;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
|
||||
unsigned long action,
|
||||
void *data)
|
||||
{
|
||||
struct vfio_ccw_private *private =
|
||||
container_of(nb, struct vfio_ccw_private, nb);
|
||||
|
||||
if (!private)
|
||||
return NOTIFY_STOP;
|
||||
|
||||
/*
|
||||
* Vendor drivers MUST unpin pages in response to an
|
||||
* invalidation.
|
||||
*/
|
||||
if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
|
||||
struct vfio_iommu_type1_dma_unmap *unmap = data;
|
||||
|
||||
if (!cp_iova_pinned(&private->cp, unmap->iova))
|
||||
return NOTIFY_OK;
|
||||
|
||||
if (vfio_ccw_mdev_reset(private->mdev))
|
||||
return NOTIFY_BAD;
|
||||
|
||||
cp_free(&private->cp);
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static ssize_t name_show(struct kobject *kobj, struct device *dev, char *buf)
|
||||
{
|
||||
return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
|
||||
}
|
||||
MDEV_TYPE_ATTR_RO(name);
|
||||
|
||||
static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
|
||||
char *buf)
|
||||
{
|
||||
return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
|
||||
}
|
||||
MDEV_TYPE_ATTR_RO(device_api);
|
||||
|
||||
static ssize_t available_instances_show(struct kobject *kobj,
|
||||
struct device *dev, char *buf)
|
||||
{
|
||||
struct vfio_ccw_private *private = dev_get_drvdata(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", atomic_read(&private->avail));
|
||||
}
|
||||
MDEV_TYPE_ATTR_RO(available_instances);
|
||||
|
||||
static struct attribute *mdev_types_attrs[] = {
|
||||
&mdev_type_attr_name.attr,
|
||||
&mdev_type_attr_device_api.attr,
|
||||
&mdev_type_attr_available_instances.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group mdev_type_group = {
|
||||
.name = "io",
|
||||
.attrs = mdev_types_attrs,
|
||||
};
|
||||
|
||||
struct attribute_group *mdev_type_groups[] = {
|
||||
&mdev_type_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int vfio_ccw_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
|
||||
{
|
||||
struct vfio_ccw_private *private =
|
||||
dev_get_drvdata(mdev_parent_dev(mdev));
|
||||
|
||||
if (private->state == VFIO_CCW_STATE_NOT_OPER)
|
||||
return -ENODEV;
|
||||
|
||||
if (atomic_dec_if_positive(&private->avail) < 0)
|
||||
return -EPERM;
|
||||
|
||||
private->mdev = mdev;
|
||||
private->state = VFIO_CCW_STATE_IDLE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_ccw_mdev_remove(struct mdev_device *mdev)
|
||||
{
|
||||
struct vfio_ccw_private *private =
|
||||
dev_get_drvdata(mdev_parent_dev(mdev));
|
||||
int ret;
|
||||
|
||||
if (!private)
|
||||
goto out;
|
||||
|
||||
if ((private->state == VFIO_CCW_STATE_NOT_OPER) ||
|
||||
(private->state == VFIO_CCW_STATE_STANDBY))
|
||||
goto out;
|
||||
|
||||
ret = vfio_ccw_mdev_reset(mdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
private->state = VFIO_CCW_STATE_STANDBY;
|
||||
|
||||
out:
|
||||
private->mdev = NULL;
|
||||
atomic_inc(&private->avail);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_ccw_mdev_open(struct mdev_device *mdev)
|
||||
{
|
||||
struct vfio_ccw_private *private =
|
||||
dev_get_drvdata(mdev_parent_dev(mdev));
|
||||
unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
|
||||
|
||||
private->nb.notifier_call = vfio_ccw_mdev_notifier;
|
||||
|
||||
return vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
|
||||
&events, &private->nb);
|
||||
}
|
||||
|
||||
void vfio_ccw_mdev_release(struct mdev_device *mdev)
|
||||
{
|
||||
struct vfio_ccw_private *private =
|
||||
dev_get_drvdata(mdev_parent_dev(mdev));
|
||||
|
||||
vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
|
||||
&private->nb);
|
||||
}
|
||||
|
||||
static ssize_t vfio_ccw_mdev_read(struct mdev_device *mdev,
|
||||
char __user *buf,
|
||||
size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct vfio_ccw_private *private;
|
||||
struct ccw_io_region *region;
|
||||
|
||||
if (*ppos + count > sizeof(*region))
|
||||
return -EINVAL;
|
||||
|
||||
private = dev_get_drvdata(mdev_parent_dev(mdev));
|
||||
if (!private)
|
||||
return -ENODEV;
|
||||
|
||||
region = &private->io_region;
|
||||
if (copy_to_user(buf, (void *)region + *ppos, count))
|
||||
return -EFAULT;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
|
||||
const char __user *buf,
|
||||
size_t count,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct vfio_ccw_private *private;
|
||||
struct ccw_io_region *region;
|
||||
|
||||
if (*ppos + count > sizeof(*region))
|
||||
return -EINVAL;
|
||||
|
||||
private = dev_get_drvdata(mdev_parent_dev(mdev));
|
||||
if (!private)
|
||||
return -ENODEV;
|
||||
if (private->state != VFIO_CCW_STATE_IDLE)
|
||||
return -EACCES;
|
||||
|
||||
region = &private->io_region;
|
||||
if (copy_from_user((void *)region + *ppos, buf, count))
|
||||
return -EFAULT;
|
||||
|
||||
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
|
||||
if (region->ret_code != 0) {
|
||||
private->state = VFIO_CCW_STATE_IDLE;
|
||||
return region->ret_code;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static int vfio_ccw_mdev_get_device_info(struct vfio_device_info *info)
|
||||
{
|
||||
info->flags = VFIO_DEVICE_FLAGS_CCW | VFIO_DEVICE_FLAGS_RESET;
|
||||
info->num_regions = VFIO_CCW_NUM_REGIONS;
|
||||
info->num_irqs = VFIO_CCW_NUM_IRQS;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_ccw_mdev_get_region_info(struct vfio_region_info *info,
|
||||
u16 *cap_type_id,
|
||||
void **cap_type)
|
||||
{
|
||||
switch (info->index) {
|
||||
case VFIO_CCW_CONFIG_REGION_INDEX:
|
||||
info->offset = 0;
|
||||
info->size = sizeof(struct ccw_io_region);
|
||||
info->flags = VFIO_REGION_INFO_FLAG_READ
|
||||
| VFIO_REGION_INFO_FLAG_WRITE;
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
int vfio_ccw_mdev_get_irq_info(struct vfio_irq_info *info)
|
||||
{
|
||||
if (info->index != VFIO_CCW_IO_IRQ_INDEX)
|
||||
return -EINVAL;
|
||||
|
||||
info->count = 1;
|
||||
info->flags = VFIO_IRQ_INFO_EVENTFD;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_ccw_mdev_set_irqs(struct mdev_device *mdev,
|
||||
uint32_t flags,
|
||||
void __user *data)
|
||||
{
|
||||
struct vfio_ccw_private *private;
|
||||
struct eventfd_ctx **ctx;
|
||||
|
||||
if (!(flags & VFIO_IRQ_SET_ACTION_TRIGGER))
|
||||
return -EINVAL;
|
||||
|
||||
private = dev_get_drvdata(mdev_parent_dev(mdev));
|
||||
if (!private)
|
||||
return -ENODEV;
|
||||
|
||||
ctx = &private->io_trigger;
|
||||
|
||||
switch (flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
|
||||
case VFIO_IRQ_SET_DATA_NONE:
|
||||
{
|
||||
if (*ctx)
|
||||
eventfd_signal(*ctx, 1);
|
||||
return 0;
|
||||
}
|
||||
case VFIO_IRQ_SET_DATA_BOOL:
|
||||
{
|
||||
uint8_t trigger;
|
||||
|
||||
if (get_user(trigger, (uint8_t __user *)data))
|
||||
return -EFAULT;
|
||||
|
||||
if (trigger && *ctx)
|
||||
eventfd_signal(*ctx, 1);
|
||||
return 0;
|
||||
}
|
||||
case VFIO_IRQ_SET_DATA_EVENTFD:
|
||||
{
|
||||
int32_t fd;
|
||||
|
||||
if (get_user(fd, (int32_t __user *)data))
|
||||
return -EFAULT;
|
||||
|
||||
if (fd == -1) {
|
||||
if (*ctx)
|
||||
eventfd_ctx_put(*ctx);
|
||||
*ctx = NULL;
|
||||
} else if (fd >= 0) {
|
||||
struct eventfd_ctx *efdctx;
|
||||
|
||||
efdctx = eventfd_ctx_fdget(fd);
|
||||
if (IS_ERR(efdctx))
|
||||
return PTR_ERR(efdctx);
|
||||
|
||||
if (*ctx)
|
||||
eventfd_ctx_put(*ctx);
|
||||
|
||||
*ctx = efdctx;
|
||||
} else
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
|
||||
unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long minsz;
|
||||
|
||||
switch (cmd) {
|
||||
case VFIO_DEVICE_GET_INFO:
|
||||
{
|
||||
struct vfio_device_info info;
|
||||
|
||||
minsz = offsetofend(struct vfio_device_info, num_irqs);
|
||||
|
||||
if (copy_from_user(&info, (void __user *)arg, minsz))
|
||||
return -EFAULT;
|
||||
|
||||
if (info.argsz < minsz)
|
||||
return -EINVAL;
|
||||
|
||||
ret = vfio_ccw_mdev_get_device_info(&info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return copy_to_user((void __user *)arg, &info, minsz);
|
||||
}
|
||||
case VFIO_DEVICE_GET_REGION_INFO:
|
||||
{
|
||||
struct vfio_region_info info;
|
||||
u16 cap_type_id = 0;
|
||||
void *cap_type = NULL;
|
||||
|
||||
minsz = offsetofend(struct vfio_region_info, offset);
|
||||
|
||||
if (copy_from_user(&info, (void __user *)arg, minsz))
|
||||
return -EFAULT;
|
||||
|
||||
if (info.argsz < minsz)
|
||||
return -EINVAL;
|
||||
|
||||
ret = vfio_ccw_mdev_get_region_info(&info, &cap_type_id,
|
||||
&cap_type);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return copy_to_user((void __user *)arg, &info, minsz);
|
||||
}
|
||||
case VFIO_DEVICE_GET_IRQ_INFO:
|
||||
{
|
||||
struct vfio_irq_info info;
|
||||
|
||||
minsz = offsetofend(struct vfio_irq_info, count);
|
||||
|
||||
if (copy_from_user(&info, (void __user *)arg, minsz))
|
||||
return -EFAULT;
|
||||
|
||||
if (info.argsz < minsz || info.index >= VFIO_CCW_NUM_IRQS)
|
||||
return -EINVAL;
|
||||
|
||||
ret = vfio_ccw_mdev_get_irq_info(&info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (info.count == -1)
|
||||
return -EINVAL;
|
||||
|
||||
return copy_to_user((void __user *)arg, &info, minsz);
|
||||
}
|
||||
case VFIO_DEVICE_SET_IRQS:
|
||||
{
|
||||
struct vfio_irq_set hdr;
|
||||
size_t data_size;
|
||||
void __user *data;
|
||||
|
||||
minsz = offsetofend(struct vfio_irq_set, count);
|
||||
|
||||
if (copy_from_user(&hdr, (void __user *)arg, minsz))
|
||||
return -EFAULT;
|
||||
|
||||
ret = vfio_set_irqs_validate_and_prepare(&hdr, 1,
|
||||
VFIO_CCW_NUM_IRQS,
|
||||
&data_size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
data = (void __user *)(arg + minsz);
|
||||
return vfio_ccw_mdev_set_irqs(mdev, hdr.flags, data);
|
||||
}
|
||||
case VFIO_DEVICE_RESET:
|
||||
return vfio_ccw_mdev_reset(mdev);
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct mdev_parent_ops vfio_ccw_mdev_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.supported_type_groups = mdev_type_groups,
|
||||
.create = vfio_ccw_mdev_create,
|
||||
.remove = vfio_ccw_mdev_remove,
|
||||
.open = vfio_ccw_mdev_open,
|
||||
.release = vfio_ccw_mdev_release,
|
||||
.read = vfio_ccw_mdev_read,
|
||||
.write = vfio_ccw_mdev_write,
|
||||
.ioctl = vfio_ccw_mdev_ioctl,
|
||||
};
|
||||
|
||||
int vfio_ccw_mdev_reg(struct subchannel *sch)
|
||||
{
|
||||
return mdev_register_device(&sch->dev, &vfio_ccw_mdev_ops);
|
||||
}
|
||||
|
||||
void vfio_ccw_mdev_unreg(struct subchannel *sch)
|
||||
{
|
||||
mdev_unregister_device(&sch->dev);
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Private stuff for vfio_ccw driver
|
||||
*
|
||||
* Copyright IBM Corp. 2017
|
||||
*
|
||||
* Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
|
||||
* Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef _VFIO_CCW_PRIVATE_H_
|
||||
#define _VFIO_CCW_PRIVATE_H_
|
||||
|
||||
#include <linux/completion.h>
|
||||
#include <linux/eventfd.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/vfio_ccw.h>
|
||||
|
||||
#include "css.h"
|
||||
#include "vfio_ccw_cp.h"
|
||||
|
||||
/**
|
||||
* struct vfio_ccw_private
|
||||
* @sch: pointer to the subchannel
|
||||
* @state: internal state of the device
|
||||
* @completion: synchronization helper of the I/O completion
|
||||
* @avail: available for creating a mediated device
|
||||
* @mdev: pointer to the mediated device
|
||||
* @nb: notifier for vfio events
|
||||
* @io_region: MMIO region to input/output I/O arguments/results
|
||||
* @cp: channel program for the current I/O operation
|
||||
* @irb: irb info received from interrupt
|
||||
* @scsw: scsw info
|
||||
* @io_trigger: eventfd ctx for signaling userspace I/O results
|
||||
* @io_work: work for deferral process of I/O handling
|
||||
*/
|
||||
struct vfio_ccw_private {
|
||||
struct subchannel *sch;
|
||||
int state;
|
||||
struct completion *completion;
|
||||
atomic_t avail;
|
||||
struct mdev_device *mdev;
|
||||
struct notifier_block nb;
|
||||
struct ccw_io_region io_region;
|
||||
|
||||
struct channel_program cp;
|
||||
struct irb irb;
|
||||
union scsw scsw;
|
||||
|
||||
struct eventfd_ctx *io_trigger;
|
||||
struct work_struct io_work;
|
||||
} __aligned(8);
|
||||
|
||||
extern int vfio_ccw_mdev_reg(struct subchannel *sch);
|
||||
extern void vfio_ccw_mdev_unreg(struct subchannel *sch);
|
||||
|
||||
extern int vfio_ccw_sch_quiesce(struct subchannel *sch);
|
||||
|
||||
/*
|
||||
* States of the device statemachine.
|
||||
*/
|
||||
enum vfio_ccw_state {
|
||||
VFIO_CCW_STATE_NOT_OPER,
|
||||
VFIO_CCW_STATE_STANDBY,
|
||||
VFIO_CCW_STATE_IDLE,
|
||||
VFIO_CCW_STATE_BOXED,
|
||||
VFIO_CCW_STATE_BUSY,
|
||||
/* last element! */
|
||||
NR_VFIO_CCW_STATES
|
||||
};
|
||||
|
||||
/*
|
||||
* Asynchronous events of the device statemachine.
|
||||
*/
|
||||
enum vfio_ccw_event {
|
||||
VFIO_CCW_EVENT_NOT_OPER,
|
||||
VFIO_CCW_EVENT_IO_REQ,
|
||||
VFIO_CCW_EVENT_INTERRUPT,
|
||||
/* last element! */
|
||||
NR_VFIO_CCW_EVENTS
|
||||
};
|
||||
|
||||
/*
|
||||
* Action called through jumptable.
|
||||
*/
|
||||
typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
|
||||
extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
|
||||
|
||||
static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
|
||||
int event)
|
||||
{
|
||||
vfio_ccw_jumptable[private->state][event](private, event);
|
||||
}
|
||||
|
||||
extern struct workqueue_struct *vfio_ccw_work_q;
|
||||
|
||||
#endif
|
|
@ -198,6 +198,7 @@ struct vfio_device_info {
|
|||
#define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */
|
||||
#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */
|
||||
#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */
|
||||
#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */
|
||||
__u32 num_regions; /* Max region index + 1 */
|
||||
__u32 num_irqs; /* Max IRQ index + 1 */
|
||||
};
|
||||
|
@ -212,6 +213,7 @@ struct vfio_device_info {
|
|||
#define VFIO_DEVICE_API_PCI_STRING "vfio-pci"
|
||||
#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform"
|
||||
#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba"
|
||||
#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw"
|
||||
|
||||
/**
|
||||
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
|
||||
|
@ -446,6 +448,22 @@ enum {
|
|||
VFIO_PCI_NUM_IRQS
|
||||
};
|
||||
|
||||
/*
|
||||
* The vfio-ccw bus driver makes use of the following fixed region and
|
||||
* IRQ index mapping. Unimplemented regions return a size of zero.
|
||||
* Unimplemented IRQ types return a count of zero.
|
||||
*/
|
||||
|
||||
enum {
|
||||
VFIO_CCW_CONFIG_REGION_INDEX,
|
||||
VFIO_CCW_NUM_REGIONS
|
||||
};
|
||||
|
||||
enum {
|
||||
VFIO_CCW_IO_IRQ_INDEX,
|
||||
VFIO_CCW_NUM_IRQS
|
||||
};
|
||||
|
||||
/**
|
||||
* VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12,
|
||||
* struct vfio_pci_hot_reset_info)
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Interfaces for vfio-ccw
|
||||
*
|
||||
* Copyright IBM Corp. 2017
|
||||
*
|
||||
* Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#ifndef _VFIO_CCW_H_
|
||||
#define _VFIO_CCW_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct ccw_io_region {
|
||||
#define ORB_AREA_SIZE 12
|
||||
__u8 orb_area[ORB_AREA_SIZE];
|
||||
#define SCSW_AREA_SIZE 12
|
||||
__u8 scsw_area[SCSW_AREA_SIZE];
|
||||
#define IRB_AREA_SIZE 96
|
||||
__u8 irb_area[IRB_AREA_SIZE];
|
||||
__u32 ret_code;
|
||||
} __packed;
|
||||
|
||||
#endif
|
Загрузка…
Ссылка в новой задаче