From 13b1eb333beed018140be3d0b77bf34000125a34 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Tue, 2 Dec 2008 16:24:40 -0600 Subject: [PATCH 01/17] virtio-pci queue allocation not page-aligned kzalloc() does not guarantee page alignment, and in fact this broke when I enabled CONFIG_SLUB_DEBUG_ON. (Thanks to Anthony Liguori for spotting the missing kfree sub) Signed-off-by: Hollis Blanchard Signed-off-by: Rusty Russell (fixed kfree) Tested-by: Anthony Liguori --- drivers/virtio/virtio_pci.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index c7dc37c7cce9..7c385af071a6 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -216,7 +216,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; struct virtqueue *vq; - unsigned long flags; + unsigned long flags, size; u16 num; int err; @@ -237,7 +237,8 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, info->queue_index = index; info->num = num; - info->queue = kzalloc(PAGE_ALIGN(vring_size(num,PAGE_SIZE)), GFP_KERNEL); + size = PAGE_ALIGN(vring_size(num, PAGE_SIZE)); + info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); if (info->queue == NULL) { err = -ENOMEM; goto out_info; @@ -266,7 +267,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, out_activate_queue: iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); - kfree(info->queue); + free_pages_exact(info->queue, size); out_info: kfree(info); return ERR_PTR(err); @@ -277,7 +278,7 @@ static void vp_del_vq(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_vq_info *info = vq->priv; - unsigned long flags; + unsigned long flags, size; spin_lock_irqsave(&vp_dev->lock, flags); list_del(&info->node); @@ -289,7 +290,8 @@ static void vp_del_vq(struct virtqueue *vq) iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); - kfree(info->queue); + size = PAGE_ALIGN(vring_size(info->num, PAGE_SIZE)); + free_pages_exact(info->queue, size); kfree(info); } From 99e0b6c8e3f30602383bcfe3f574537a02ee2bea Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 30 Dec 2008 09:25:56 -0600 Subject: [PATCH 02/17] virtio: struct device - replace bus_id with dev_name(), dev_set_name() This patch is part of a larger patch series which will remove the "char bus_id[20]" name string from struct device. The device name is managed in the kobject anyway, and without any size limitation, and just needlessly copied into "struct device". To set and read the device name dev_name(dev) and dev_set_name(dev) must be used. If your code uses static kobjects, which it shouldn't do, "const char *init_name" can be used to statically provide the name the registered device should have. At registration time, the init_name field is cleared, to enforce the use of dev_name(dev) to access the device name at a later time. We need to get rid of all occurrences of bus_id in the entire tree to be able to enable the new interface. Please apply this patch, and possibly convert any remaining remaining occurrences of bus_id. We want to submit a patch to -next, which will remove bus_id from "struct device", to find the remaining pieces to convert, and finally switch over to the new api, which will remove the 20 bytes array and does no longer have a size limitation. Acked-by: Greg Kroah-Hartman Signed-off-by: Kay Sievers Signed-off-by: Rusty Russell --- drivers/virtio/virtio.c | 2 +- drivers/virtio/virtio_pci.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 5b78fd0aff0a..018c070a357f 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -176,7 +176,7 @@ int register_virtio_device(struct virtio_device *dev) /* Assign a unique device index and hence name. */ dev->index = dev_index++; - sprintf(dev->dev.bus_id, "virtio%u", dev->index); + dev_set_name(&dev->dev, "virtio%u", dev->index); /* We always start by resetting the device, in case a previous * driver messed it up. This also tests that code path a little. */ diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 7c385af071a6..e37d686edfe0 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -75,7 +75,7 @@ MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); * would make more sense for virtio to not insist on having it's own device. */ static struct device virtio_pci_root = { .parent = NULL, - .bus_id = "virtio-pci", + .init_name = "virtio-pci", }; /* Convert a generic virtio device to our structure */ @@ -359,7 +359,7 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, /* register a handler for the queue with the PCI device's interrupt */ err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, - vp_dev->vdev.dev.bus_id, vp_dev); + dev_name(&vp_dev->vdev.dev), vp_dev); if (err) goto out_set_drvdata; From 480daab42c4dd74b3c07031ddf9031251c530c77 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:25:56 -0600 Subject: [PATCH 03/17] virtio: Don't use PAGE_SIZE in virtio_pci.c The virtio PCI devices don't depend on the guest page size. This matters now PowerPC virtio is gaining ground (they like 64k pages). Signed-off-by: Rusty Russell --- drivers/virtio/virtio_pci.c | 2 +- include/linux/virtio_pci.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index e37d686edfe0..c2cd69ba0a8c 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -245,7 +245,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, } /* activate the queue */ - iowrite32(virt_to_phys(info->queue) >> PAGE_SHIFT, + iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); /* create the vring */ diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index cdef35742932..e13d7ebcf576 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -53,4 +53,8 @@ /* Virtio ABI version, this must match exactly */ #define VIRTIO_PCI_ABI_VERSION 0 + +/* How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 #endif From 5f0d1d7f2286c8a02dab69f5f0bd51681fab161e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:25:57 -0600 Subject: [PATCH 04/17] virtio: rename 'pagesize' arg to vring_init/vring_size It's really the alignment desired for consumer/producer separation; historically this x86 pagesize, but with PowerPC it'll still be x86 pagesize. And in theory lguest could choose a different value. Signed-off-by: Rusty Russell --- include/linux/virtio_ring.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index c4a598fb3826..01bf3124e312 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -83,7 +83,7 @@ struct vring { * __u16 avail_idx; * __u16 available[num]; * - * // Padding to the next page boundary. + * // Padding to the next align boundary. * char pad[]; * * // A ring of used descriptor heads with free-running index. @@ -93,19 +93,19 @@ struct vring { * }; */ static inline void vring_init(struct vring *vr, unsigned int num, void *p, - unsigned long pagesize) + unsigned long align) { vr->num = num; vr->desc = p; vr->avail = p + num*sizeof(struct vring_desc); - vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + pagesize-1) - & ~(pagesize - 1)); + vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + align-1) + & ~(align - 1)); } -static inline unsigned vring_size(unsigned int num, unsigned long pagesize) +static inline unsigned vring_size(unsigned int num, unsigned long align) { return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) - + pagesize - 1) & ~(pagesize - 1)) + + align - 1) & ~(align - 1)) + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; } From 498af14783935af487d17dbee4ac451783cbc2b7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:25:57 -0600 Subject: [PATCH 05/17] virtio: Don't use PAGE_SIZE for vring alignment in virtio_pci. That doesn't work for non-4k guests which are now appearing. Signed-off-by: Rusty Russell --- drivers/virtio/virtio_pci.c | 4 ++-- include/linux/virtio_pci.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index c2cd69ba0a8c..f28643f3a4e8 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -237,7 +237,7 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, info->queue_index = index; info->num = num; - size = PAGE_ALIGN(vring_size(num, PAGE_SIZE)); + size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN)); info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO); if (info->queue == NULL) { err = -ENOMEM; @@ -290,7 +290,7 @@ static void vp_del_vq(struct virtqueue *vq) iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); - size = PAGE_ALIGN(vring_size(info->num, PAGE_SIZE)); + size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN)); free_pages_exact(info->queue, size); kfree(info); } diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index e13d7ebcf576..cd0fd5d181a6 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -57,4 +57,8 @@ /* How many bits to shift physical queue address written to QUEUE_PFN. * 12 is historical, and due to x86 page size. */ #define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize again. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 #endif From 2966af73e70dee461c256b5eb877b2ff757f8c82 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:25:58 -0600 Subject: [PATCH 06/17] virtio: use LGUEST_VRING_ALIGN instead of relying on pagesize This doesn't really matter, since lguest is i386 only at the moment, but we could actually choose a different value. (lguest doesn't have a guarenteed ABI). Signed-off-by: Rusty Russell --- Documentation/lguest/lguest.c | 6 +++--- drivers/lguest/lguest_device.c | 2 +- include/linux/lguest_launcher.h | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index 804520633fcf..aa2574ca94c7 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -1030,7 +1030,7 @@ static void update_device_status(struct device *dev) /* Zero out the virtqueues. */ for (vq = dev->vq; vq; vq = vq->next) { memset(vq->vring.desc, 0, - vring_size(vq->config.num, getpagesize())); + vring_size(vq->config.num, LGUEST_VRING_ALIGN)); lg_last_avail(vq) = 0; } } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { @@ -1211,7 +1211,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, void *p; /* First we need some memory for this virtqueue. */ - pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) + pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1) / getpagesize(); p = get_pages(pages); @@ -1228,7 +1228,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, vq->config.pfn = to_guest_phys(p) / getpagesize(); /* Initialize the vring. */ - vring_init(&vq->vring, num_descs, p, getpagesize()); + vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); /* Append virtqueue to this device's descriptor. We use * device_config() to get the end of the device's current virtqueues; diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index a661bbdae3d6..f062dc55c573 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -250,7 +250,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, /* Figure out how many pages the ring will take, and map that memory */ lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, DIV_ROUND_UP(vring_size(lvq->config.num, - PAGE_SIZE), + LGUEST_VRING_ALIGN), PAGE_SIZE)); if (!lvq->pages) { err = -ENOMEM; diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index e7217dc58f39..bd0eba760522 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -59,4 +59,8 @@ enum lguest_req LHREQ_IRQ, /* + irq */ LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ }; + +/* The alignment to use between consumer and producer parts of vring. + * x86 pagesize for historical reasons. */ +#define LGUEST_VRING_ALIGN 4096 #endif /* _LINUX_LGUEST_LAUNCHER */ From db40598863e8cbbd11053ad3c8bae89000f603f9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:26:02 -0600 Subject: [PATCH 07/17] virtio: use KVM_S390_VIRTIO_RING_ALIGN instead of relying on pagesize This doesn't really matter, since s390 pagesize is 4k anyway. Signed-off-by: Rusty Russell Acked-by: Christian Borntraeger --- arch/s390/include/asm/kvm_virtio.h | 4 ++++ drivers/s390/kvm/kvm_virtio.c | 9 ++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h index c13568b9351c..0503936f101f 100644 --- a/arch/s390/include/asm/kvm_virtio.h +++ b/arch/s390/include/asm/kvm_virtio.h @@ -50,6 +50,10 @@ struct kvm_vqconfig { #define KVM_S390_VIRTIO_RESET 1 #define KVM_S390_VIRTIO_SET_STATUS 2 +/* The alignment to use between consumer and producer parts of vring. + * This is pagesize for historical reasons. */ +#define KVM_S390_VIRTIO_RING_ALIGN 4096 + #ifdef __KERNEL__ /* early virtio console setup */ #ifdef CONFIG_S390_GUEST diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 3d442444c618..f5a2dbe75575 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -188,7 +188,8 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, config = kvm_vq_config(kdev->desc)+index; err = vmem_add_mapping(config->address, - vring_size(config->num, PAGE_SIZE)); + vring_size(config->num, + KVM_S390_VIRTIO_RING_ALIGN)); if (err) goto out; @@ -209,7 +210,8 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, return vq; unmap: vmem_remove_mapping(config->address, - vring_size(config->num, PAGE_SIZE)); + vring_size(config->num, + KVM_S390_VIRTIO_RING_ALIGN)); out: return ERR_PTR(err); } @@ -220,7 +222,8 @@ static void kvm_del_vq(struct virtqueue *vq) vring_del_virtqueue(vq); vmem_remove_mapping(config->address, - vring_size(config->num, PAGE_SIZE)); + vring_size(config->num, + KVM_S390_VIRTIO_RING_ALIGN)); } /* From 87c7d57c17ade5024d95b6ca0da249da49b0672a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:26:03 -0600 Subject: [PATCH 08/17] virtio: hand virtio ring alignment as argument to vring_new_virtqueue This allows each virtio user to hand in the alignment appropriate to their virtio_ring structures. Signed-off-by: Rusty Russell Acked-by: Christian Borntraeger --- drivers/lguest/lguest_device.c | 4 ++-- drivers/s390/kvm/kvm_virtio.c | 3 ++- drivers/virtio/virtio_pci.c | 4 ++-- drivers/virtio/virtio_ring.c | 3 ++- include/linux/virtio_ring.h | 1 + 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index f062dc55c573..b02f6bcb64c4 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -259,8 +259,8 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, /* OK, tell virtio_ring.c to set up a virtqueue now we know its size * and we've got a pointer to its pages. */ - vq = vring_new_virtqueue(lvq->config.num, vdev, lvq->pages, - lg_notify, callback); + vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, + vdev, lvq->pages, lg_notify, callback); if (!vq) { err = -ENOMEM; goto unmap; diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index f5a2dbe75575..4e8354aa8576 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -193,7 +193,8 @@ static struct virtqueue *kvm_find_vq(struct virtio_device *vdev, if (err) goto out; - vq = vring_new_virtqueue(config->num, vdev, (void *) config->address, + vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN, + vdev, (void *) config->address, kvm_notify, callback); if (!vq) { err = -ENOMEM; diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index f28643f3a4e8..7462a51e820b 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -249,8 +249,8 @@ static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); /* create the vring */ - vq = vring_new_virtqueue(info->num, vdev, info->queue, - vp_notify, callback); + vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, + vdev, info->queue, vp_notify, callback); if (!vq) { err = -ENOMEM; goto out_activate_queue; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6eb5303fed11..5777196bf6c9 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -274,6 +274,7 @@ static struct virtqueue_ops vring_vq_ops = { }; struct virtqueue *vring_new_virtqueue(unsigned int num, + unsigned int vring_align, struct virtio_device *vdev, void *pages, void (*notify)(struct virtqueue *), @@ -292,7 +293,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, if (!vq) return NULL; - vring_init(&vq->vring, num, pages, PAGE_SIZE); + vring_init(&vq->vring, num, pages, vring_align); vq->vq.callback = callback; vq->vq.vdev = vdev; vq->vq.vq_ops = &vring_vq_ops; diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 01bf3124e312..71e03722fb59 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -115,6 +115,7 @@ struct virtio_device; struct virtqueue; struct virtqueue *vring_new_virtqueue(unsigned int num, + unsigned int vring_align, struct virtio_device *vdev, void *pages, void (*notify)(struct virtqueue *vq), From 1b4aa2faeca1b9922033daf2475b6fc13b0ffea6 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Thu, 13 Nov 2008 15:48:33 -0600 Subject: [PATCH 09/17] virtio: avoid implicit use of Linux page size in balloon interface Make the balloon interface always use 4K pages, and convert Linux pfns if necessary. This patch assumes that Linux's PAGE_SHIFT will never be less than 12. Signed-off-by: Hollis Blanchard Signed-off-by: Rusty Russell (modified) --- drivers/virtio/virtio_balloon.c | 13 +++++++++++-- include/linux/virtio_balloon.h | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 62eab43152d2..59268266b79a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -56,6 +56,15 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +static u32 page_to_balloon_pfn(struct page *page) +{ + unsigned long pfn = page_to_pfn(page); + + BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT); + /* Convert pfn from Linux page size to balloon page size. */ + return pfn >> (PAGE_SHIFT - VIRTIO_BALLOON_PFN_SHIFT); +} + static void balloon_ack(struct virtqueue *vq) { struct virtio_balloon *vb; @@ -99,7 +108,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) msleep(200); break; } - vb->pfns[vb->num_pfns] = page_to_pfn(page); + vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page); totalram_pages--; vb->num_pages++; list_add(&page->lru, &vb->pages); @@ -132,7 +141,7 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { page = list_first_entry(&vb->pages, struct page, lru); list_del(&page->lru); - vb->pfns[vb->num_pfns] = page_to_pfn(page); + vb->pfns[vb->num_pfns] = page_to_balloon_pfn(page); vb->num_pages--; } diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index c30c7bfbf39b..8726ff77763e 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -10,6 +10,9 @@ /* The feature bitmap for virtio balloon */ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ +/* Size of a PFN in the balloon interface. */ +#define VIRTIO_BALLOON_PFN_SHIFT 12 + struct virtio_balloon_config { /* Number of pages host wants Guest to give up. */ From 4b7f7e2049956f6e946ad56c1ee093e7bab74da9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:26:04 -0600 Subject: [PATCH 10/17] virtio: set max_segment_size and max_sectors to infinite. Setting max_segment_size allows more than 64k per sg element, unless the host specified a limit. Setting max_sectors indicates that our max_hw_segments is the only limit. Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 85d79a02d487..0e2ade648aff 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -277,6 +277,9 @@ static int virtblk_probe(struct virtio_device *vdev) } set_capacity(vblk->disk, cap); + /* No real sector limit. */ + blk_queue_max_sectors(vblk->disk->queue, -1U); + /* Host can optionally specify maximum segment size and number of * segments. */ err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, @@ -284,6 +287,8 @@ static int virtblk_probe(struct virtio_device *vdev) &v); if (!err) blk_queue_max_segment_size(vblk->disk->queue, v); + else + blk_queue_max_segment_size(vblk->disk->queue, -1UL); err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, offsetof(struct virtio_blk_config, seg_max), From 0864b79a153342c1dfbebb12b2d099fec76c5e18 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 30 Dec 2008 09:26:05 -0600 Subject: [PATCH 11/17] virtio: block: dynamic maximum segments Enhance the driver to handle whatever maximum segment number the host tells us to handle. Do to this, we need to allocate the scatterlist dynamically. We set max_phys_segments and max_hw_segments to the same value (1 if the host doesn't tell us, since that's safest and all known hosts do tell us). Note that kmalloc'ing the structure for large sg_elems might be problematic: the fix for this is sg_table, but that requires more work. Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 0e2ade648aff..7b9b38a12d25 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -6,7 +6,6 @@ #include #include -#define VIRTIO_MAX_SG (3+MAX_PHYS_SEGMENTS) #define PART_BITS 4 static int major, index; @@ -26,8 +25,11 @@ struct virtio_blk mempool_t *pool; + /* What host tells us, plus 2 for header & tailer. */ + unsigned int sg_elems; + /* Scatterlist: can be too big for stack. */ - struct scatterlist sg[VIRTIO_MAX_SG]; + struct scatterlist sg[/*sg_elems*/]; }; struct virtblk_req @@ -97,8 +99,6 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, if (blk_barrier_rq(vbr->req)) vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER; - /* This init could be done at vblk creation time */ - sg_init_table(vblk->sg, VIRTIO_MAX_SG); sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr)); num = blk_rq_map_sg(q, vbr->req, vblk->sg+1); sg_set_buf(&vblk->sg[num+1], &vbr->status, sizeof(vbr->status)); @@ -130,7 +130,7 @@ static void do_virtblk_request(struct request_queue *q) while ((req = elv_next_request(q)) != NULL) { vblk = req->rq_disk->private_data; - BUG_ON(req->nr_phys_segments > ARRAY_SIZE(vblk->sg)); + BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); /* If this request fails, stop queue and wait for something to finish to restart it. */ @@ -196,12 +196,22 @@ static int virtblk_probe(struct virtio_device *vdev) int err; u64 cap; u32 v; - u32 blk_size; + u32 blk_size, sg_elems; if (index_to_minor(index) >= 1 << MINORBITS) return -ENOSPC; - vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); + /* We need to know how many segments before we allocate. */ + err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, + offsetof(struct virtio_blk_config, seg_max), + &sg_elems); + if (err) + sg_elems = 1; + + /* We need an extra sg elements at head and tail. */ + sg_elems += 2; + vdev->priv = vblk = kmalloc(sizeof(*vblk) + + sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL); if (!vblk) { err = -ENOMEM; goto out; @@ -210,6 +220,8 @@ static int virtblk_probe(struct virtio_device *vdev) INIT_LIST_HEAD(&vblk->reqs); spin_lock_init(&vblk->lock); vblk->vdev = vdev; + vblk->sg_elems = sg_elems; + sg_init_table(vblk->sg, vblk->sg_elems); /* We expect one virtqueue, for output. */ vblk->vq = vdev->config->find_vq(vdev, 0, blk_done); @@ -277,6 +289,10 @@ static int virtblk_probe(struct virtio_device *vdev) } set_capacity(vblk->disk, cap); + /* We can handle whatever the host told us to handle. */ + blk_queue_max_phys_segments(vblk->disk->queue, vblk->sg_elems-2); + blk_queue_max_hw_segments(vblk->disk->queue, vblk->sg_elems-2); + /* No real sector limit. */ blk_queue_max_sectors(vblk->disk->queue, -1U); @@ -290,12 +306,6 @@ static int virtblk_probe(struct virtio_device *vdev) else blk_queue_max_segment_size(vblk->disk->queue, -1UL); - err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, - offsetof(struct virtio_blk_config, seg_max), - &v); - if (!err) - blk_queue_max_hw_segments(vblk->disk->queue, v); - /* Host can optionally specify the block size of the device */ err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, offsetof(struct virtio_blk_config, blk_size), From b194aee95622f649311f8e53418a17e210ff6827 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 26 Nov 2008 13:15:50 -0800 Subject: [PATCH 12/17] virtio_blk: fix type warning Fix parameter type warning: linux-next-20081126/drivers/block/virtio_blk.c:307: warning: large integer implicitly truncated to unsigned type Signed-off-by: Randy Dunlap cc: Rusty Russell Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 7b9b38a12d25..300078b02e5d 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -304,7 +304,7 @@ static int virtblk_probe(struct virtio_device *vdev) if (!err) blk_queue_max_segment_size(vblk->disk->queue, v); else - blk_queue_max_segment_size(vblk->disk->queue, -1UL); + blk_queue_max_segment_size(vblk->disk->queue, -1U); /* Host can optionally specify the block size of the device */ err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, From 29f9f12ec737af62835124e4a8bdb9de631f04dd Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Wed, 10 Dec 2008 17:45:34 +0000 Subject: [PATCH 13/17] virtio: add PCI device release() function Add a release() function for virtio_pci devices so as to avoid: Device 'virtio0' does not have a release() function, it is broken and must be fixed Move the code to free the resources associated with the device from virtio_pci_remove() into this new function. virtio_pci_remove() now merely unregisters the device which should cause the final ref to be dropped and virtio_pci_release_dev() to be called. Signed-off-by: Mark McLoughlin Reported-by: Michael Tokarev Cc: Anthony Liguori Signed-off-by: Rusty Russell --- drivers/virtio/virtio_pci.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 7462a51e820b..265fdf2d1276 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -307,6 +307,20 @@ static struct virtio_config_ops virtio_pci_config_ops = { .finalize_features = vp_finalize_features, }; +static void virtio_pci_release_dev(struct device *_d) +{ + struct virtio_device *dev = container_of(_d, struct virtio_device, dev); + struct virtio_pci_device *vp_dev = to_vp_device(dev); + struct pci_dev *pci_dev = vp_dev->pci_dev; + + free_irq(pci_dev->irq, vp_dev); + pci_set_drvdata(pci_dev, NULL); + pci_iounmap(pci_dev, vp_dev->ioaddr); + pci_release_regions(pci_dev); + pci_disable_device(pci_dev); + kfree(vp_dev); +} + /* the PCI probing function */ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) @@ -330,6 +344,7 @@ static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, return -ENOMEM; vp_dev->vdev.dev.parent = &virtio_pci_root; + vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->vdev.config = &virtio_pci_config_ops; vp_dev->pci_dev = pci_dev; INIT_LIST_HEAD(&vp_dev->virtqueues); @@ -389,12 +404,6 @@ static void __devexit virtio_pci_remove(struct pci_dev *pci_dev) struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); unregister_virtio_device(&vp_dev->vdev); - free_irq(pci_dev->irq, vp_dev); - pci_set_drvdata(pci_dev, NULL); - pci_iounmap(pci_dev, vp_dev->ioaddr); - pci_release_regions(pci_dev); - pci_disable_device(pci_dev); - kfree(vp_dev); } #ifdef CONFIG_PM From c29834584ea4eafccf2f62a0b8a32e64f792044c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 25 Nov 2008 13:36:26 +0100 Subject: [PATCH 14/17] virtio_console: support console resizing this patch uses the new hvc callback hvc_resize to set the window size which allows to change the tty size of hvc_console via a hvc_resize function. I have added a new feature bit VIRTIO_CONSOLE_F_SIZE. The driver will change the window size on tty open and via the config_changed callback of the transport. Currently lguest and kvm_s390 have not implemented this callback, but the callback can be implemented at a later point in time. Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- drivers/char/hvc_console.c | 1 + drivers/char/virtio_console.c | 30 +++++++++++++++++++++++++++++- include/linux/virtio_console.h | 11 +++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index fb57f67bb427..0587b66d6fc7 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -695,6 +695,7 @@ void hvc_resize(struct hvc_struct *hp, struct winsize ws) hp->ws = ws; schedule_work(&hp->tty_resize); } +EXPORT_SYMBOL_GPL(hvc_resize); /* * This kthread is either polling or interrupt driven. This is determined by diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 3fb0d2c88ba5..ff6f5a4b58fb 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -137,13 +137,34 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) return hvc_instantiate(0, 0, &virtio_cons); } +/* + * virtio console configuration. This supports: + * - console resize + */ +static void virtcons_apply_config(struct virtio_device *dev) +{ + struct winsize ws; + + if (virtio_has_feature(dev, VIRTIO_CONSOLE_F_SIZE)) { + dev->config->get(dev, + offsetof(struct virtio_console_config, cols), + &ws.ws_col, sizeof(u16)); + dev->config->get(dev, + offsetof(struct virtio_console_config, rows), + &ws.ws_row, sizeof(u16)); + hvc_resize(hvc, ws); + } +} + /* * we support only one console, the hvc struct is a global var - * There is no need to do anything + * We set the configuration at this point, since we now have a tty */ static int notifier_add_vio(struct hvc_struct *hp, int data) { hp->irq_requested = 1; + virtcons_apply_config(vdev); + return 0; } @@ -234,11 +255,18 @@ static struct virtio_device_id id_table[] = { { 0 }, }; +static unsigned int features[] = { + VIRTIO_CONSOLE_F_SIZE, +}; + static struct virtio_driver virtio_console = { + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, .probe = virtcons_probe, + .config_changed = virtcons_apply_config, }; static int __init init(void) diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index 19a0da0dba41..7615ffcdd555 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -7,6 +7,17 @@ /* The ID for virtio console */ #define VIRTIO_ID_CONSOLE 3 +/* Feature bits */ +#define VIRTIO_CONSOLE_F_SIZE 0 /* Does host provide console size? */ + +struct virtio_console_config { + /* colums of the screens */ + __u16 cols; + /* rows of the screens */ + __u16 rows; +} __attribute__((packed)); + + #ifdef __KERNEL__ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)); #endif /* __KERNEL__ */ From be3c5832d51174ef7f21cefd6ad612dabdcb62fd Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 18 Nov 2008 22:44:13 +0100 Subject: [PATCH 15/17] kvm-s390: implement config_changed for virtio on s390 This patch implements config_changed for the s390 virtio transport. We use the least significant bit of the interrupt parameter field to decide, if this interrupt should call the virtio virtqueue callback or the config_changed callback. This method is compatible with old host and guest code. Old 64 bit guests will not check the bit and trigger a harmless additional vring_interrupt call. Old host code will never set this bit, this is also safe. This patch also takes care of a potential future 31 bit virtio transport for s390. On 31 bit _LC_PFAULT_INTPARM and __LC_EXT_PARAMS are identical. We exploit the alignment of the token and fold the change bit into the lsb of the token itself. Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- drivers/s390/kvm/kvm_virtio.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 4e8354aa8576..28c90b89f2b4 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -299,13 +299,29 @@ static void scan_devices(void) */ static void kvm_extint_handler(u16 code) { - void *data = (void *) *(long *) __LC_PFAULT_INTPARM; - u16 subcode = S390_lowcore.cpu_addr; + struct virtqueue *vq; + u16 subcode; + int config_changed; + subcode = S390_lowcore.cpu_addr; if ((subcode & 0xff00) != VIRTIO_SUBCODE_64) return; - vring_interrupt(0, data); + /* The LSB might be overloaded, we have to mask it */ + vq = (struct virtqueue *) ((*(long *) __LC_PFAULT_INTPARM) & ~1UL); + + /* We use the LSB of extparam, to decide, if this interrupt is a config + * change or a "standard" interrupt */ + config_changed = (*(int *) __LC_EXT_PARAMS & 1); + + if (config_changed) { + struct virtio_driver *drv; + drv = container_of(vq->vdev->dev.driver, + struct virtio_driver, driver); + if (drv->config_changed) + drv->config_changed(vq->vdev); + } else + vring_interrupt(0, vq); } /* From 58a24566449892dda409b9ad92c2e56c76c5670c Mon Sep 17 00:00:00 2001 From: Matias Zabaljauregui Date: Mon, 29 Sep 2008 01:40:07 -0300 Subject: [PATCH 16/17] lguest: move the initial guest page table creation code to the host This patch moves the initial guest page table creation code to the host, so the launcher keeps working with PAE enabled configs. Signed-off-by: Matias Zabaljauregui Signed-off-by: Rusty Russell --- Documentation/lguest/lguest.c | 60 +++------------------------ arch/x86/lguest/i386_head.S | 15 ------- drivers/lguest/lg.h | 2 +- drivers/lguest/lguest_user.c | 13 +++--- drivers/lguest/page_tables.c | 72 ++++++++++++++++++++++++++++++++- include/linux/lguest_launcher.h | 2 +- 6 files changed, 83 insertions(+), 81 deletions(-) diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c index aa2574ca94c7..f2dbbf3bdeab 100644 --- a/Documentation/lguest/lguest.c +++ b/Documentation/lguest/lguest.c @@ -481,51 +481,6 @@ static unsigned long load_initrd(const char *name, unsigned long mem) /* We return the initrd size. */ return len; } - -/* Once we know how much memory we have we can construct simple linear page - * tables which set virtual == physical which will get the Guest far enough - * into the boot to create its own. - * - * We lay them out of the way, just below the initrd (which is why we need to - * know its size here). */ -static unsigned long setup_pagetables(unsigned long mem, - unsigned long initrd_size) -{ - unsigned long *pgdir, *linear; - unsigned int mapped_pages, i, linear_pages; - unsigned int ptes_per_page = getpagesize()/sizeof(void *); - - mapped_pages = mem/getpagesize(); - - /* Each PTE page can map ptes_per_page pages: how many do we need? */ - linear_pages = (mapped_pages + ptes_per_page-1)/ptes_per_page; - - /* We put the toplevel page directory page at the top of memory. */ - pgdir = from_guest_phys(mem) - initrd_size - getpagesize(); - - /* Now we use the next linear_pages pages as pte pages */ - linear = (void *)pgdir - linear_pages*getpagesize(); - - /* Linear mapping is easy: put every page's address into the mapping in - * order. PAGE_PRESENT contains the flags Present, Writable and - * Executable. */ - for (i = 0; i < mapped_pages; i++) - linear[i] = ((i * getpagesize()) | PAGE_PRESENT); - - /* The top level points to the linear page table pages above. */ - for (i = 0; i < mapped_pages; i += ptes_per_page) { - pgdir[i/ptes_per_page] - = ((to_guest_phys(linear) + i*sizeof(void *)) - | PAGE_PRESENT); - } - - verbose("Linear mapping of %u pages in %u pte pages at %#lx\n", - mapped_pages, linear_pages, to_guest_phys(linear)); - - /* We return the top level (guest-physical) address: the kernel needs - * to know where it is. */ - return to_guest_phys(pgdir); -} /*:*/ /* Simple routine to roll all the commandline arguments together with spaces @@ -548,13 +503,13 @@ static void concat(char *dst, char *args[]) /*L:185 This is where we actually tell the kernel to initialize the Guest. We * saw the arguments it expects when we looked at initialize() in lguest_user.c: - * the base of Guest "physical" memory, the top physical page to allow, the - * top level pagetable and the entry point for the Guest. */ -static int tell_kernel(unsigned long pgdir, unsigned long start) + * the base of Guest "physical" memory, the top physical page to allow and the + * entry point for the Guest. */ +static int tell_kernel(unsigned long start) { unsigned long args[] = { LHREQ_INITIALIZE, (unsigned long)guest_base, - guest_limit / getpagesize(), pgdir, start }; + guest_limit / getpagesize(), start }; int fd; verbose("Guest: %p - %p (%#lx)\n", @@ -1941,7 +1896,7 @@ int main(int argc, char *argv[]) { /* Memory, top-level pagetable, code startpoint and size of the * (optional) initrd. */ - unsigned long mem = 0, pgdir, start, initrd_size = 0; + unsigned long mem = 0, start, initrd_size = 0; /* Two temporaries and the /dev/lguest file descriptor. */ int i, c, lguest_fd; /* The boot information for the Guest. */ @@ -2040,9 +1995,6 @@ int main(int argc, char *argv[]) boot->hdr.type_of_loader = 0xFF; } - /* Set up the initial linear pagetables, starting below the initrd. */ - pgdir = setup_pagetables(mem, initrd_size); - /* The Linux boot header contains an "E820" memory map: ours is a * simple, single region. */ boot->e820_entries = 1; @@ -2064,7 +2016,7 @@ int main(int argc, char *argv[]) /* We tell the kernel to initialize the Guest: this returns the open * /dev/lguest file descriptor. */ - lguest_fd = tell_kernel(pgdir, start); + lguest_fd = tell_kernel(start); /* We clone off a thread, which wakes the Launcher whenever one of the * input file descriptors needs attention. We call this the Waker, and diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 5c7cef34c9e7..10b9bd35a8ff 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -30,21 +30,6 @@ ENTRY(lguest_entry) movl $lguest_data - __PAGE_OFFSET, %edx int $LGUEST_TRAP_ENTRY - /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl - * instruction uses %esi implicitly as the source for the copy we're - * about to do. */ - movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi - - /* Copy first 32 entries of page directory to __PAGE_OFFSET entries. - * This means the first 128M of kernel memory will be mapped at - * PAGE_OFFSET where the kernel expects to run. This will get it far - * enough through boot to switch to its own pagetables. */ - movl $32, %ecx - movl %esi, %edi - addl $((__PAGE_OFFSET >> 22) * 4), %edi - rep - movsl - /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h index 5faefeaf6790..f2c641e0bdde 100644 --- a/drivers/lguest/lg.h +++ b/drivers/lguest/lg.h @@ -164,7 +164,7 @@ void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt); void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt); /* page_tables.c: */ -int init_guest_pagetable(struct lguest *lg, unsigned long pgtable); +int init_guest_pagetable(struct lguest *lg); void free_guest_pagetable(struct lguest *lg); void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable); void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i); diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c index e73a000473cc..34bc017b8b3c 100644 --- a/drivers/lguest/lguest_user.c +++ b/drivers/lguest/lguest_user.c @@ -146,7 +146,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) return 0; } -/*L:020 The initialization write supplies 4 pointer sized (32 or 64 bit) +/*L:020 The initialization write supplies 3 pointer sized (32 or 64 bit) * values (in addition to the LHREQ_INITIALIZE value). These are: * * base: The start of the Guest-physical memory inside the Launcher memory. @@ -155,9 +155,6 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip) * allowed to access. The Guest memory lives inside the Launcher, so it sets * this to ensure the Guest can only reach its own memory. * - * pgdir: The (Guest-physical) address of the top of the initial Guest - * pagetables (which are set up by the Launcher). - * * start: The first instruction to execute ("eip" in x86-speak). */ static int initialize(struct file *file, const unsigned long __user *input) @@ -166,7 +163,7 @@ static int initialize(struct file *file, const unsigned long __user *input) * Guest. */ struct lguest *lg; int err; - unsigned long args[4]; + unsigned long args[3]; /* We grab the Big Lguest lock, which protects against multiple * simultaneous initializations. */ @@ -192,14 +189,14 @@ static int initialize(struct file *file, const unsigned long __user *input) lg->mem_base = (void __user *)args[0]; lg->pfn_limit = args[1]; - /* This is the first cpu (cpu 0) and it will start booting at args[3] */ - err = lg_cpu_start(&lg->cpus[0], 0, args[3]); + /* This is the first cpu (cpu 0) and it will start booting at args[2] */ + err = lg_cpu_start(&lg->cpus[0], 0, args[2]); if (err) goto release_guest; /* Initialize the Guest's shadow page tables, using the toplevel * address the Launcher gave us. This allocates memory, so can fail. */ - err = init_guest_pagetable(lg, args[2]); + err = init_guest_pagetable(lg); if (err) goto free_regs; diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c index 81d0c6053447..576a8318221c 100644 --- a/drivers/lguest/page_tables.c +++ b/drivers/lguest/page_tables.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "lg.h" /*M:008 We hold reference to pages, which prevents them from being swapped. @@ -581,15 +582,82 @@ void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 idx) release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx); } +/* Once we know how much memory we have we can construct simple identity + * (which set virtual == physical) and linear mappings + * which will get the Guest far enough into the boot to create its own. + * + * We lay them out of the way, just below the initrd (which is why we need to + * know its size here). */ +static unsigned long setup_pagetables(struct lguest *lg, + unsigned long mem, + unsigned long initrd_size) +{ + pgd_t __user *pgdir; + pte_t __user *linear; + unsigned int mapped_pages, i, linear_pages, phys_linear; + unsigned long mem_base = (unsigned long)lg->mem_base; + + /* We have mapped_pages frames to map, so we need + * linear_pages page tables to map them. */ + mapped_pages = mem / PAGE_SIZE; + linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE; + + /* We put the toplevel page directory page at the top of memory. */ + pgdir = (pgd_t *)(mem + mem_base - initrd_size - PAGE_SIZE); + + /* Now we use the next linear_pages pages as pte pages */ + linear = (void *)pgdir - linear_pages * PAGE_SIZE; + + /* Linear mapping is easy: put every page's address into the + * mapping in order. */ + for (i = 0; i < mapped_pages; i++) { + pte_t pte; + pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER)); + if (copy_to_user(&linear[i], &pte, sizeof(pte)) != 0) + return -EFAULT; + } + + /* The top level points to the linear page table pages above. + * We setup the identity and linear mappings here. */ + phys_linear = (unsigned long)linear - mem_base; + for (i = 0; i < mapped_pages; i += PTRS_PER_PTE) { + pgd_t pgd; + pgd = __pgd((phys_linear + i * sizeof(pte_t)) | + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER)); + + if (copy_to_user(&pgdir[i / PTRS_PER_PTE], &pgd, sizeof(pgd)) + || copy_to_user(&pgdir[pgd_index(PAGE_OFFSET) + + i / PTRS_PER_PTE], + &pgd, sizeof(pgd))) + return -EFAULT; + } + + /* We return the top level (guest-physical) address: remember where + * this is. */ + return (unsigned long)pgdir - mem_base; +} + /*H:500 (vii) Setting up the page tables initially. * * When a Guest is first created, the Launcher tells us where the toplevel of * its first page table is. We set some things up here: */ -int init_guest_pagetable(struct lguest *lg, unsigned long pgtable) +int init_guest_pagetable(struct lguest *lg) { + u64 mem; + u32 initrd_size; + struct boot_params __user *boot = (struct boot_params *)lg->mem_base; + + /* Get the Guest memory size and the ramdisk size from the boot header + * located at lg->mem_base (Guest address 0). */ + if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem)) + || get_user(initrd_size, &boot->hdr.ramdisk_size)) + return -EFAULT; + /* We start on the first shadow page table, and give it a blank PGD * page. */ - lg->pgdirs[0].gpgdir = pgtable; + lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size); + if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir)) + return lg->pgdirs[0].gpgdir; lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); if (!lg->pgdirs[0].pgdir) return -ENOMEM; diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h index bd0eba760522..a53407a4165c 100644 --- a/include/linux/lguest_launcher.h +++ b/include/linux/lguest_launcher.h @@ -54,7 +54,7 @@ struct lguest_vqconfig { /* Write command first word is a request. */ enum lguest_req { - LHREQ_INITIALIZE, /* + base, pfnlimit, pgdir, start */ + LHREQ_INITIALIZE, /* + base, pfnlimit, start */ LHREQ_GETDMA, /* No longer used */ LHREQ_IRQ, /* + irq */ LHREQ_BREAK, /* + on/off flag (on blocks until someone does off) */ From bda53cd510b6777ced652ba279020bb7b414b744 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Wed, 10 Dec 2008 17:45:39 +0000 Subject: [PATCH 17/17] lguest: struct device - replace bus_id with dev_name() bus_id is gradually being removed, so use dev_name() instead. Signed-off-by: Mark McLoughlin Cc: Kay Sievers Cc: Greg Kroah-Hartman Signed-off-by: Rusty Russell --- drivers/lguest/lguest_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index b02f6bcb64c4..915da6b8c924 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -272,7 +272,7 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev, * the interrupt as a source of randomness: it'd be nice to have that * back.. */ err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED, - vdev->dev.bus_id, vq); + dev_name(&vdev->dev), vq); if (err) goto destroy_vring;