From 20faa7c380c19c932d57be59bb2522bd9327a6c5 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 5 Nov 2005 10:20:56 +0000 Subject: [PATCH 01/22] [ARM] Documentation/arm/README: small update - egcs is not supported by kernel 2.6 - gcc 3.3 seems to be a good choice on ARM Signed-off-by: Adrian Bunk Signed-off-by: Russell King --- Documentation/arm/README | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Documentation/arm/README b/Documentation/arm/README index a6f718e90a86..5ed6f3530b86 100644 --- a/Documentation/arm/README +++ b/Documentation/arm/README @@ -8,10 +8,9 @@ Compilation of kernel --------------------- In order to compile ARM Linux, you will need a compiler capable of - generating ARM ELF code with GNU extensions. GCC 2.95.1, EGCS - 1.1.2, and GCC 3.3 are known to be good compilers. Fortunately, you - needn't guess. The kernel will report an error if your compiler is - a recognized offender. + generating ARM ELF code with GNU extensions. GCC 3.3 is known to be + a good compiler. Fortunately, you needn't guess. The kernel will report + an error if your compiler is a recognized offender. To build ARM Linux natively, you shouldn't have to alter the ARCH = line in the top level Makefile. However, if you don't have the ARM Linux ELF From 37c12e7497b6fe2b6a890814f0ff4edce696d862 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 5 Nov 2005 21:19:33 +0000 Subject: [PATCH 02/22] [DRIVER MODEL] Improved dynamically allocated platform_device interface Re-jig the simple platform device support to allow private data to be attached to a platform device, as well as allowing the parent device to be set. Example usage: pdev = platform_device_alloc("mydev", id); if (pdev) { err = platform_device_add_resources(pdev, &resources, ARRAY_SIZE(resources)); if (err == 0) err = platform_device_add_data(pdev, &platform_data, sizeof(platform_data)); if (err == 0) err = platform_device_add(pdev); } else { err = -ENOMEM; } if (err) platform_device_put(pdev); Signed-off-by: Russell King Acked-by: Greg Kroah-Hartman --- drivers/base/platform.c | 153 ++++++++++++++++++++++++++------ include/linux/platform_device.h | 6 ++ 2 files changed, 132 insertions(+), 27 deletions(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index d597c922af11..6d4736e89f1a 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -116,12 +116,115 @@ int platform_add_devices(struct platform_device **devs, int num) return ret; } +struct platform_object { + struct platform_device pdev; + char name[1]; +}; + /** - * platform_device_register - add a platform-level device + * platform_device_put + * @pdev: platform device to free + * + * Free all memory associated with a platform device. This function + * must _only_ be externally called in error cases. All other usage + * is a bug. + */ +void platform_device_put(struct platform_device *pdev) +{ + if (pdev) + put_device(&pdev->dev); +} +EXPORT_SYMBOL_GPL(platform_device_put); + +static void platform_device_release(struct device *dev) +{ + struct platform_object *pa = container_of(dev, struct platform_object, pdev.dev); + + kfree(pa->pdev.dev.platform_data); + kfree(pa->pdev.resource); + kfree(pa); +} + +/** + * platform_device_alloc + * @name: base name of the device we're adding + * @id: instance id + * + * Create a platform device object which can have other objects attached + * to it, and which will have attached objects freed when it is released. + */ +struct platform_device *platform_device_alloc(const char *name, unsigned int id) +{ + struct platform_object *pa; + + pa = kzalloc(sizeof(struct platform_object) + strlen(name), GFP_KERNEL); + if (pa) { + strcpy(pa->name, name); + pa->pdev.name = pa->name; + pa->pdev.id = id; + device_initialize(&pa->pdev.dev); + pa->pdev.dev.release = platform_device_release; + } + + return pa ? &pa->pdev : NULL; +} +EXPORT_SYMBOL_GPL(platform_device_alloc); + +/** + * platform_device_add_resources + * @pdev: platform device allocated by platform_device_alloc to add resources to + * @res: set of resources that needs to be allocated for the device + * @num: number of resources + * + * Add a copy of the resources to the platform device. The memory + * associated with the resources will be freed when the platform + * device is released. + */ +int platform_device_add_resources(struct platform_device *pdev, struct resource *res, unsigned int num) +{ + struct resource *r; + + r = kmalloc(sizeof(struct resource) * num, GFP_KERNEL); + if (r) { + memcpy(r, res, sizeof(struct resource) * num); + pdev->resource = r; + pdev->num_resources = num; + } + return r ? 0 : -ENOMEM; +} +EXPORT_SYMBOL_GPL(platform_device_add_resources); + +/** + * platform_device_add_data + * @pdev: platform device allocated by platform_device_alloc to add resources to + * @data: platform specific data for this platform device + * @size: size of platform specific data + * + * Add a copy of platform specific data to the platform device's platform_data + * pointer. The memory associated with the platform data will be freed + * when the platform device is released. + */ +int platform_device_add_data(struct platform_device *pdev, void *data, size_t size) +{ + void *d; + + d = kmalloc(size, GFP_KERNEL); + if (d) { + memcpy(d, data, size); + pdev->dev.platform_data = d; + } + return d ? 0 : -ENOMEM; +} +EXPORT_SYMBOL_GPL(platform_device_add_data); + +/** + * platform_device_add - add a platform device to device hierarchy * @pdev: platform device we're adding * + * This is part 2 of platform_device_register(), though may be called + * separately _iff_ pdev was allocated by platform_device_alloc(). */ -int platform_device_register(struct platform_device * pdev) +int platform_device_add(struct platform_device *pdev) { int i, ret = 0; @@ -174,6 +277,18 @@ int platform_device_register(struct platform_device * pdev) release_resource(&pdev->resource[i]); return ret; } +EXPORT_SYMBOL_GPL(platform_device_add); + +/** + * platform_device_register - add a platform-level device + * @pdev: platform device we're adding + * + */ +int platform_device_register(struct platform_device * pdev) +{ + device_initialize(&pdev->dev); + return platform_device_add(pdev); +} /** * platform_device_unregister - remove a platform-level device @@ -197,18 +312,6 @@ void platform_device_unregister(struct platform_device * pdev) } } -struct platform_object { - struct platform_device pdev; - struct resource resources[0]; -}; - -static void platform_device_release_simple(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - - kfree(container_of(pdev, struct platform_object, pdev)); -} - /** * platform_device_register_simple * @name: base name of the device we're adding @@ -225,33 +328,29 @@ static void platform_device_release_simple(struct device *dev) struct platform_device *platform_device_register_simple(char *name, unsigned int id, struct resource *res, unsigned int num) { - struct platform_object *pobj; + struct platform_device *pdev; int retval; - pobj = kzalloc(sizeof(*pobj) + sizeof(struct resource) * num, GFP_KERNEL); - if (!pobj) { + pdev = platform_device_alloc(name, id); + if (!pdev) { retval = -ENOMEM; goto error; } - pobj->pdev.name = name; - pobj->pdev.id = id; - pobj->pdev.dev.release = platform_device_release_simple; - if (num) { - memcpy(pobj->resources, res, sizeof(struct resource) * num); - pobj->pdev.resource = pobj->resources; - pobj->pdev.num_resources = num; + retval = platform_device_add_resources(pdev, res, num); + if (retval) + goto error; } - retval = platform_device_register(&pobj->pdev); + retval = platform_device_add(pdev); if (retval) goto error; - return &pobj->pdev; + return pdev; error: - kfree(pobj); + platform_device_put(pdev); return ERR_PTR(retval); } diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index a726225e0afe..1a165b7ae01b 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -37,4 +37,10 @@ extern int platform_add_devices(struct platform_device **, int); extern struct platform_device *platform_device_register_simple(char *, unsigned int, struct resource *, unsigned int); +extern struct platform_device *platform_device_alloc(const char *name, unsigned int id); +extern int platform_device_add_resources(struct platform_device *pdev, struct resource *res, unsigned int num); +extern int platform_device_add_data(struct platform_device *pdev, void *data, size_t size); +extern int platform_device_add(struct platform_device *pdev); +extern void platform_device_put(struct platform_device *pdev); + #endif /* _PLATFORM_DEVICE_H_ */ From 5d994b7f5d1c77acaa0b9b4c1b9f0f278605c309 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 5 Nov 2005 21:20:21 +0000 Subject: [PATCH 03/22] [DRIVER MODEL] Fix depca Release code in driver modules is a potential cause of oopsen. The device may be in use by a userspace process, which will keep a reference to the device. If the module is unloaded, the module text will be freed. Subsequently, when the last reference is dropped, the release code will be called, which no longer exists. Use generic platform device allocation/release code in modules. Signed-off-by: Russell King Acked-by: Greg Kroah-Hartman --- drivers/net/depca.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/net/depca.c b/drivers/net/depca.c index 4d26e5e7d18b..0d33a93df96b 100644 --- a/drivers/net/depca.c +++ b/drivers/net/depca.c @@ -1470,15 +1470,6 @@ static int __init depca_mca_probe(struct device *device) ** ISA bus I/O device probe */ -static void depca_platform_release (struct device *device) -{ - struct platform_device *pldev; - - /* free device */ - pldev = to_platform_device (device); - kfree (pldev); -} - static void __init depca_platform_probe (void) { int i; @@ -1491,19 +1482,16 @@ static void __init depca_platform_probe (void) * line, use it (if valid) */ if (io && io != depca_io_ports[i].iobase) continue; - - if (!(pldev = kmalloc (sizeof (*pldev), GFP_KERNEL))) + + pldev = platform_device_alloc(depca_string, i); + if (!pldev) continue; - memset (pldev, 0, sizeof (*pldev)); - pldev->name = depca_string; - pldev->id = i; pldev->dev.platform_data = (void *) depca_io_ports[i].iobase; - pldev->dev.release = depca_platform_release; depca_io_ports[i].device = pldev; - if (platform_device_register (pldev)) { - kfree (pldev); + if (platform_device_add(pldev)) { + platform_device_put(pldev); depca_io_ports[i].device = NULL; continue; } @@ -1515,6 +1503,7 @@ static void __init depca_platform_probe (void) * allocated structure */ depca_io_ports[i].device = NULL; + pldev->dev.platform_data = NULL; platform_device_unregister (pldev); } } @@ -2112,6 +2101,7 @@ static void __exit depca_module_exit (void) for (i = 0; depca_io_ports[i].iobase; i++) { if (depca_io_ports[i].device) { + depca_io_ports[i].device->dev.platform_data = NULL; platform_device_unregister (depca_io_ports[i].device); depca_io_ports[i].device = NULL; } From 95cb5d954ee656a0b048ea2298188569e0759336 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 5 Nov 2005 21:20:47 +0000 Subject: [PATCH 04/22] [DRIVER MODEL] Fix jazzsonic Release code in driver modules is a potential cause of oopsen. The device may be in use by a userspace process, which will keep a reference to the device. If the module is unloaded, the module text will be freed. Subsequently, when the last reference is dropped, the release code will be called, which no longer exists. Use generic platform device allocation/release code in modules. Signed-off-by: Russell King Acked-by: Greg Kroah-Hartman --- drivers/net/jazzsonic.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/net/jazzsonic.c b/drivers/net/jazzsonic.c index a74a5cfaf5bc..2fb3101cb33e 100644 --- a/drivers/net/jazzsonic.c +++ b/drivers/net/jazzsonic.c @@ -285,18 +285,8 @@ static struct device_driver jazz_sonic_driver = { .remove = __devexit_p(jazz_sonic_device_remove), }; -static void jazz_sonic_platform_release (struct device *device) -{ - struct platform_device *pldev; - - /* free device */ - pldev = to_platform_device (device); - kfree (pldev); -} - static int __init jazz_sonic_init_module(void) { - struct platform_device *pldev; int err; if ((err = driver_register(&jazz_sonic_driver))) { @@ -304,27 +294,19 @@ static int __init jazz_sonic_init_module(void) return err; } - jazz_sonic_device = NULL; - - if (!(pldev = kmalloc (sizeof (*pldev), GFP_KERNEL))) { + jazz_sonic_device = platform_device_alloc(jazz_sonic_string, 0); + if (!jazz_sonnic_device) goto out_unregister; - } - memset(pldev, 0, sizeof (*pldev)); - pldev->name = jazz_sonic_string; - pldev->id = 0; - pldev->dev.release = jazz_sonic_platform_release; - jazz_sonic_device = pldev; - - if (platform_device_register (pldev)) { - kfree(pldev); + if (platform_device_add(jazz_sonic_device)) { + platform_device_put(jazz_sonic_device); jazz_sonic_device = NULL; } return 0; out_unregister: - platform_device_unregister(pldev); + driver_unregister(&jazz_sonic_driver); return -ENOMEM; } From 09c6518ca0de24549a923891b2d335e8496d79a9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 5 Nov 2005 21:21:10 +0000 Subject: [PATCH 05/22] [DRIVER MODEL] Fix macsonic Release code in driver modules is a potential cause of oopsen. The device may be in use by a userspace process, which will keep a reference to the device. If the module is unloaded, the module text will be freed. Subsequently, when the last reference is dropped, the release code will be called, which no longer exists. Use generic platform device allocation/release code in modules. Signed-off-by: Russell King Acked-by: Greg Kroah-Hartman --- drivers/net/macsonic.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/net/macsonic.c b/drivers/net/macsonic.c index e9c999d7eb39..9ef4592aca03 100644 --- a/drivers/net/macsonic.c +++ b/drivers/net/macsonic.c @@ -599,18 +599,8 @@ static struct device_driver mac_sonic_driver = { .remove = __devexit_p(mac_sonic_device_remove), }; -static void mac_sonic_platform_release(struct device *device) -{ - struct platform_device *pldev; - - /* free device */ - pldev = to_platform_device (device); - kfree (pldev); -} - static int __init mac_sonic_init_module(void) { - struct platform_device *pldev; int err; if ((err = driver_register(&mac_sonic_driver))) { @@ -618,27 +608,20 @@ static int __init mac_sonic_init_module(void) return err; } - mac_sonic_device = NULL; - - if (!(pldev = kmalloc (sizeof (*pldev), GFP_KERNEL))) { + mac_sonic_device = platform_device_alloc(mac_sonic_string, 0); + if (!mac_sonic_device) { goto out_unregister; } - memset(pldev, 0, sizeof (*pldev)); - pldev->name = mac_sonic_string; - pldev->id = 0; - pldev->dev.release = mac_sonic_platform_release; - mac_sonic_device = pldev; - - if (platform_device_register (pldev)) { - kfree(pldev); + if (platform_device_add(mac_sonic_device)) { + platform_device_put(mac_sonic_device); mac_sonic_device = NULL; } return 0; out_unregister: - platform_device_unregister(pldev); + driver_unregister(&mac_sonic_driver); return -ENOMEM; } From 8d972a962177a261fc894f767fa3014f63d661e9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 5 Nov 2005 21:21:38 +0000 Subject: [PATCH 06/22] [DRIVER MODEL] Fix arcfb Release code in driver modules is a potential cause of oopsen. The device may be in use by a userspace process, which will keep a reference to the device. If the module is unloaded, the module text will be freed. Subsequently, when the last reference is dropped, the release code will be called, which no longer exists. Use generic platform device allocation/release code in modules. Signed-off-by: Russell King Acked-by: Greg Kroah-Hartman --- drivers/video/arcfb.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/video/arcfb.c b/drivers/video/arcfb.c index 126daff1c848..6aa9f824c185 100644 --- a/drivers/video/arcfb.c +++ b/drivers/video/arcfb.c @@ -502,10 +502,6 @@ static ssize_t arcfb_write(struct file *file, const char *buf, size_t count, return err; } -static void arcfb_platform_release(struct device *device) -{ -} - static struct fb_ops arcfb_ops = { .owner = THIS_MODULE, .fb_open = arcfb_open, @@ -624,13 +620,7 @@ static struct device_driver arcfb_driver = { .remove = arcfb_remove, }; -static struct platform_device arcfb_device = { - .name = "arcfb", - .id = 0, - .dev = { - .release = arcfb_platform_release, - } -}; +static struct platform_device *arcfb_device; static int __init arcfb_init(void) { @@ -641,9 +631,16 @@ static int __init arcfb_init(void) ret = driver_register(&arcfb_driver); if (!ret) { - ret = platform_device_register(&arcfb_device); - if (ret) + arcfb_device = platform_device_alloc("arcfb", 0); + if (arcfb_device) { + ret = platform_device_add(arcfb_device); + } else { + ret = -ENOMEM; + } + if (ret) { + platform_device_put(arcfb_device); driver_unregister(&arcfb_driver); + } } return ret; @@ -651,7 +648,7 @@ static int __init arcfb_init(void) static void __exit arcfb_exit(void) { - platform_device_unregister(&arcfb_device); + platform_device_unregister(arcfb_device); driver_unregister(&arcfb_driver); } From abbf268ae8f51e19779cdf3f5fbb8144f1a5fbc3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 5 Nov 2005 21:22:13 +0000 Subject: [PATCH 07/22] [DRIVER MODEL] Fix gbefb Statically allocated devices in module data is a potential cause of oopsen. The device may be in use by a userspace process, which will keep a reference to the device. If the module is unloaded, the module data will be freed. Subsequent use of the platform device will cause a kernel oops. Use generic platform device allocation/release code in modules. Signed-off-by: Russell King Acked-by: Greg Kroah-Hartman --- drivers/video/gbefb.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index 316bfe994811..ed853bef19e9 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -1260,24 +1260,30 @@ static struct device_driver gbefb_driver = { .remove = __devexit_p(gbefb_remove), }; -static struct platform_device gbefb_device = { - .name = "gbefb", -}; +static struct platform_device *gbefb_device; int __init gbefb_init(void) { int ret = driver_register(&gbefb_driver); if (!ret) { - ret = platform_device_register(&gbefb_device); - if (ret) + gbefb_device = platform_device_alloc("gbefb", 0); + if (gbefb_device) { + ret = platform_device_add(gbefb_device); + } else { + ret = -ENOMEM; + } + if (ret) { + platform_device_put(gbefb_device); driver_unregister(&gbefb_driver); + } } return ret; } void __exit gbefb_exit(void) { - driver_unregister(&gbefb_driver); + platform_device_unregister(gbefb_device); + driver_unregister(&gbefb_driver); } module_init(gbefb_init); From 2c119aa8091a15a87920f09aa0f17e05960fe11b Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 5 Nov 2005 21:22:39 +0000 Subject: [PATCH 08/22] [DRIVER MODEL] Fix sgivwfb Statically allocated devices in module data is a potential cause of oopsen. The device may be in use by a userspace process, which will keep a reference to the device. If the module is unloaded, the module data will be freed. Subsequent use of the platform device will cause a kernel oops. Use generic platform device allocation/release code in modules. Signed-off-by: Russell King Acked-by: Greg Kroah-Hartman --- drivers/video/sgivwfb.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/video/sgivwfb.c b/drivers/video/sgivwfb.c index cf5106eab2d5..5ce81f44c769 100644 --- a/drivers/video/sgivwfb.c +++ b/drivers/video/sgivwfb.c @@ -751,10 +751,6 @@ int __init sgivwfb_setup(char *options) /* * Initialisation */ -static void sgivwfb_release(struct device *device) -{ -} - static int __init sgivwfb_probe(struct device *device) { struct platform_device *dev = to_platform_device(device); @@ -859,13 +855,7 @@ static struct device_driver sgivwfb_driver = { .remove = sgivwfb_remove, }; -static struct platform_device sgivwfb_device = { - .name = "sgivwfb", - .id = 0, - .dev = { - .release = sgivwfb_release, - } -}; +static struct platform_device *sgivwfb_device; int __init sgivwfb_init(void) { @@ -880,9 +870,15 @@ int __init sgivwfb_init(void) #endif ret = driver_register(&sgivwfb_driver); if (!ret) { - ret = platform_device_register(&sgivwfb_device); - if (ret) + sgivwfb_device = platform_device_alloc("sgivwfb", 0); + if (sgivwfb_device) { + ret = platform_device_add(sgivwfb_device); + } else + ret = -ENOMEM; + if (ret) { driver_unregister(&sgivwfb_driver); + platform_device_put(sgivwfb_device); + } } return ret; } @@ -894,7 +890,7 @@ MODULE_LICENSE("GPL"); static void __exit sgivwfb_exit(void) { - platform_device_unregister(&sgivwfb_device); + platform_device_unregister(sgivwfb_device); driver_unregister(&sgivwfb_driver); } From 21c614a7899046ab108b3d327d76c33443a8ebf2 Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Sun, 6 Nov 2005 09:07:03 +0000 Subject: [PATCH 09/22] [SERIAL] Support Au1x00 8250 UARTs using the generic 8250 driver. The offsets of the registers are in a different place, and some parts cannot handle a full set of modem control signals. Signed-off-by: Pantelis Antoniou Signed-off-by: Russell King --- drivers/serial/8250.c | 73 +++++++++++++++++++++++-- drivers/serial/8250.h | 1 + drivers/serial/8250_au1x00.c | 102 +++++++++++++++++++++++++++++++++++ drivers/serial/Kconfig | 8 +++ drivers/serial/Makefile | 1 + drivers/serial/serial_core.c | 1 + include/linux/serial_8250.h | 1 + include/linux/serial_core.h | 1 + 8 files changed, 185 insertions(+), 3 deletions(-) create mode 100644 drivers/serial/8250_au1x00.c diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index f47d2c454e33..186e96c47b3d 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -251,9 +251,53 @@ static const struct serial8250_config uart_config[] = { }, }; +#ifdef CONFIG_SERIAL_8250_AU1X00 + +/* Au1x00 UART hardware has a weird register layout */ +static const u8 au_io_in_map[] = { + [UART_RX] = 0, + [UART_IER] = 2, + [UART_IIR] = 3, + [UART_LCR] = 5, + [UART_MCR] = 6, + [UART_LSR] = 7, + [UART_MSR] = 8, +}; + +static const u8 au_io_out_map[] = { + [UART_TX] = 1, + [UART_IER] = 2, + [UART_FCR] = 4, + [UART_LCR] = 5, + [UART_MCR] = 6, +}; + +/* sane hardware needs no mapping */ +static inline int map_8250_in_reg(struct uart_8250_port *up, int offset) +{ + if (up->port.iotype != UPIO_AU) + return offset; + return au_io_in_map[offset]; +} + +static inline int map_8250_out_reg(struct uart_8250_port *up, int offset) +{ + if (up->port.iotype != UPIO_AU) + return offset; + return au_io_out_map[offset]; +} + +#else + +/* sane hardware needs no mapping */ +#define map_8250_in_reg(up, offset) (offset) +#define map_8250_out_reg(up, offset) (offset) + +#endif + static _INLINE_ unsigned int serial_in(struct uart_8250_port *up, int offset) { - offset <<= up->port.regshift; + offset = map_8250_in_reg(up, offset) << up->port.regshift; switch (up->port.iotype) { case UPIO_HUB6: @@ -266,6 +310,11 @@ static _INLINE_ unsigned int serial_in(struct uart_8250_port *up, int offset) case UPIO_MEM32: return readl(up->port.membase + offset); +#ifdef CONFIG_SERIAL_8250_AU1X00 + case UPIO_AU: + return __raw_readl(up->port.membase + offset); +#endif + default: return inb(up->port.iobase + offset); } @@ -274,7 +323,7 @@ static _INLINE_ unsigned int serial_in(struct uart_8250_port *up, int offset) static _INLINE_ void serial_out(struct uart_8250_port *up, int offset, int value) { - offset <<= up->port.regshift; + offset = map_8250_out_reg(up, offset) << up->port.regshift; switch (up->port.iotype) { case UPIO_HUB6: @@ -290,6 +339,12 @@ serial_out(struct uart_8250_port *up, int offset, int value) writel(value, up->port.membase + offset); break; +#ifdef CONFIG_SERIAL_8250_AU1X00 + case UPIO_AU: + __raw_writel(value, up->port.membase + offset); + break; +#endif + default: outb(value, up->port.iobase + offset); } @@ -910,6 +965,13 @@ static void autoconfig(struct uart_8250_port *up, unsigned int probeflags) } } #endif + +#ifdef CONFIG_SERIAL_8250_AU1X00 + /* if access method is AU, it is a 16550 with a quirk */ + if (up->port.type == PORT_16550A && up->port.iotype == UPIO_AU) + up->bugs |= UART_BUG_NOMSR; +#endif + serial_outp(up, UART_LCR, save_lcr); if (up->capabilities != uart_config[up->port.type].flags) { @@ -1057,6 +1119,10 @@ static void serial8250_enable_ms(struct uart_port *port) { struct uart_8250_port *up = (struct uart_8250_port *)port; + /* no MSR capabilities */ + if (up->bugs & UART_BUG_NOMSR) + return; + up->ier |= UART_IER_MSI; serial_out(up, UART_IER, up->ier); } @@ -1774,7 +1840,8 @@ serial8250_set_termios(struct uart_port *port, struct termios *termios, * CTS flow control flag and modem status interrupts */ up->ier &= ~UART_IER_MSI; - if (UART_ENABLE_MS(&up->port, termios->c_cflag)) + if (!(up->bugs & UART_BUG_NOMSR) && + UART_ENABLE_MS(&up->port, termios->c_cflag)) up->ier |= UART_IER_MSI; if (up->capabilities & UART_CAP_UUE) up->ier |= UART_IER_UUE | UART_IER_RTOIE; diff --git a/drivers/serial/8250.h b/drivers/serial/8250.h index b1b459efda52..a607b98016db 100644 --- a/drivers/serial/8250.h +++ b/drivers/serial/8250.h @@ -49,6 +49,7 @@ struct serial8250_config { #define UART_BUG_QUOT (1 << 0) /* UART has buggy quot LSB */ #define UART_BUG_TXEN (1 << 1) /* UART has buggy TX IIR status */ +#define UART_BUG_NOMSR (1 << 2) /* UART has buggy MSR status bits (Au1x00) */ #if defined(__i386__) && (defined(CONFIG_M386) || defined(CONFIG_M486)) #define _INLINE_ inline diff --git a/drivers/serial/8250_au1x00.c b/drivers/serial/8250_au1x00.c new file mode 100644 index 000000000000..06ae8fbcc947 --- /dev/null +++ b/drivers/serial/8250_au1x00.c @@ -0,0 +1,102 @@ +/* + * Serial Device Initialisation for Au1x00 + * + * (C) Copyright Embedded Alley Solutions, Inc 2005 + * Author: Pantelis Antoniou + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "8250.h" + +#define PORT(_base, _irq) \ + { \ + .iobase = _base, \ + .membase = (void __iomem *)_base,\ + .mapbase = _base, \ + .irq = _irq, \ + .uartclk = 0, /* filled */ \ + .regshift = 2, \ + .iotype = UPIO_AU, \ + .flags = UPF_SKIP_TEST | \ + UPF_IOREMAP, \ + } + +static struct plat_serial8250_port au1x00_data[] = { +#if defined(CONFIG_SOC_AU1000) + PORT(UART0_ADDR, AU1000_UART0_INT), + PORT(UART1_ADDR, AU1000_UART1_INT), + PORT(UART2_ADDR, AU1000_UART2_INT), + PORT(UART3_ADDR, AU1000_UART3_INT), +#elif defined(CONFIG_SOC_AU1500) + PORT(UART0_ADDR, AU1500_UART0_INT), + PORT(UART3_ADDR, AU1500_UART3_INT), +#elif defined(CONFIG_SOC_AU1100) + PORT(UART0_ADDR, AU1100_UART0_INT), + PORT(UART1_ADDR, AU1100_UART1_INT), + PORT(UART2_ADDR, AU1100_UART2_INT), + PORT(UART3_ADDR, AU1100_UART3_INT), +#elif defined(CONFIG_SOC_AU1550) + PORT(UART0_ADDR, AU1550_UART0_INT), + PORT(UART1_ADDR, AU1550_UART1_INT), + PORT(UART2_ADDR, AU1550_UART2_INT), + PORT(UART3_ADDR, AU1550_UART3_INT), +#elif defined(CONFIG_SOC_AU1200) + PORT(UART0_ADDR, AU1200_UART0_INT), + PORT(UART1_ADDR, AU1200_UART1_INT), +#endif + { }, +}; + +static struct platform_device au1x00_device = { + .name = "serial8250", + .id = PLAT8250_DEV_AU1X00, + .dev = { + .platform_data = au1x00_data, + }, +}; + +static int __init au1x00_init(void) +{ + int i; + unsigned int uartclk; + + /* get uart clock */ + uartclk = get_au1x00_uart_baud_base() * 16; + + /* fill up uartclk */ + for (i = 0; au1x00_data[i].flags ; i++) + au1x00_data[i].uartclk = uartclk; + + return platform_device_register(&au1x00_device); +} + +/* XXX: Yes, I know this doesn't yet work. */ +static void __exit au1x00_exit(void) +{ + platform_device_unregister(&au1x00_device); +} + +module_init(au1x00_init); +module_exit(au1x00_exit); + +MODULE_AUTHOR("Pantelis Antoniou "); +MODULE_DESCRIPTION("8250 serial probe module for Au1x000 cards"); +MODULE_LICENSE("GPL"); diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index b745a1b9e835..ff36f0c9fdad 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -207,6 +207,14 @@ config SERIAL_8250_ACORN system, say Y to this option. The driver can handle 1, 2, or 3 port cards. If unsure, say N. +config SERIAL_8250_AU1X00 + bool "AU1X00 serial port support" + depends on SERIAL_8250 != n && SOC_AU1X00 + help + If you have an Au1x00 board and want to use the serial port, say Y + to this option. The driver can handle 1 or 2 serial ports. + If unsure, say N. + comment "Non-8250 serial port support" config SERIAL_AMBA_PL010 diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index 11c7dc483f93..d7c7c7180e33 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_SERIAL_8250_ACCENT) += 8250_accent.o obj-$(CONFIG_SERIAL_8250_BOCA) += 8250_boca.o obj-$(CONFIG_SERIAL_8250_HUB6) += 8250_hub6.o obj-$(CONFIG_SERIAL_8250_MCA) += 8250_mca.o +obj-$(CONFIG_SERIAL_8250_AU1X00) += 8250_au1x00.o obj-$(CONFIG_SERIAL_AMBA_PL010) += amba-pl010.o obj-$(CONFIG_SERIAL_AMBA_PL011) += amba-pl011.o obj-$(CONFIG_SERIAL_CLPS711X) += clps711x.o diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c index 0745ce782974..427a23858076 100644 --- a/drivers/serial/serial_core.c +++ b/drivers/serial/serial_core.c @@ -1959,6 +1959,7 @@ uart_report_port(struct uart_driver *drv, struct uart_port *port) break; case UPIO_MEM: case UPIO_MEM32: + case UPIO_AU: snprintf(address, sizeof(address), "MMIO 0x%lx", port->mapbase); break; diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 2b799d40d669..cee302aefdb7 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -42,6 +42,7 @@ enum { PLAT8250_DEV_BOCA, PLAT8250_DEV_HUB6, PLAT8250_DEV_MCA, + PLAT8250_DEV_AU1X00, }; /* diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 9d2579230689..a3ac92b19aca 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -211,6 +211,7 @@ struct uart_port { #define UPIO_HUB6 (1) #define UPIO_MEM (2) #define UPIO_MEM32 (3) +#define UPIO_AU (4) /* Au1x00 type IO */ unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ From 2dd34b488a99135ad2a529e33087ddd6a09e992a Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 30 Oct 2005 22:42:11 +0100 Subject: [PATCH 10/22] [PATCH] kbuild: permanently fix kernel configuration include mess Include autoconf.h into every kernel compilation via the gcc command line using -imacros. This ensures that we have the kernel configuration included from the start, rather than relying on each file having #include as appropriate. History has shown that this is something which is difficult to get right. Since we now include the kernel configuration automatically, make configcheck becomes meaningless, so remove it. Signed-off-by: Russell King Signed-off-by: Andrew Morton Signed-off-by: Sam Ravnborg --- Makefile | 8 ++------ include/linux/config.h | 4 +++- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 79601320ac3e..2dac8010c142 100644 --- a/Makefile +++ b/Makefile @@ -346,7 +346,8 @@ AFLAGS_KERNEL = # Use LINUXINCLUDE when you must reference the include/ directory. # Needed to be compatible with the O= option LINUXINCLUDE := -Iinclude \ - $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) + $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \ + -imacros include/linux/autoconf.h CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) @@ -1249,11 +1250,6 @@ tags: FORCE # Scripts to check various things for consistency # --------------------------------------------------------------------------- -configcheck: - find * $(RCS_FIND_IGNORE) \ - -name '*.[hcS]' -type f -print | sort \ - | xargs $(PERL) -w scripts/checkconfig.pl - includecheck: find * $(RCS_FIND_IGNORE) \ -name '*.[hcS]' -type f -print | sort \ diff --git a/include/linux/config.h b/include/linux/config.h index 9d1c14f7ad6d..a91f5e55b525 100644 --- a/include/linux/config.h +++ b/include/linux/config.h @@ -1,6 +1,8 @@ #ifndef _LINUX_CONFIG_H #define _LINUX_CONFIG_H - +/* This file is no longer in use and kept only for backward compatibility. + * autoconf.h is now included via -imacros on the commandline + */ #include #endif From ab919c06144cfb11c05b5b5cd291daa96ac2e423 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 6 Nov 2005 11:05:21 +0100 Subject: [PATCH 11/22] kconfig: fix xconfig on fedora 2 & 3 (x86_64) From: Than Ngo qt as installed on fedora core (2 and 3) does not work with vanilla kernel. The linker fails to locate the qt lib: Actual Results: # make xconfig HOSTLD scripts/kconfig/qconf /usr/bin/ld: cannot find -lqt collect2: ld returned 1 exit status Than Ngo has provided following fix for the bug. Cc: Than Ngo Acked-by: Dave Jones Signed-off-by: Sam Ravnborg --- scripts/kconfig/Makefile | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile index 0dd96919de3e..455aeabd95dd 100644 --- a/scripts/kconfig/Makefile +++ b/scripts/kconfig/Makefile @@ -129,7 +129,7 @@ endif HOSTCFLAGS_lex.zconf.o := -I$(src) HOSTCFLAGS_zconf.tab.o := -I$(src) -HOSTLOADLIBES_qconf = -L$(QTLIBPATH) -Wl,-rpath,$(QTLIBPATH) -l$(QTLIB) -ldl +HOSTLOADLIBES_qconf = -L$(QTLIBPATH) -Wl,-rpath,$(QTLIBPATH) -l$(LIBS_QT) -ldl HOSTCXXFLAGS_qconf.o = -I$(QTDIR)/include -D LKC_DIRECT_LINK HOSTLOADLIBES_gconf = `pkg-config gtk+-2.0 gmodule-2.0 libglade-2.0 --libs` @@ -163,11 +163,16 @@ $(obj)/.tmp_qtcheck: false; \ fi; \ LIBPATH=$$DIR/lib; LIB=qt; \ - $(HOSTCXX) -print-multi-os-directory > /dev/null 2>&1 && \ - LIBPATH=$$DIR/lib/$$($(HOSTCXX) -print-multi-os-directory); \ - if [ -f $$LIBPATH/libqt-mt.so ]; then LIB=qt-mt; fi; \ + if [ -f $$QTLIB/libqt-mt.so ] ; then \ + LIB=qt-mt; \ + LIBPATH=$$QTLIB; \ + else \ + $(HOSTCXX) -print-multi-os-directory > /dev/null 2>&1 && \ + LIBPATH=$$DIR/lib/$$($(HOSTCXX) -print-multi-os-directory); \ + if [ -f $$LIBPATH/libqt-mt.so ]; then LIB=qt-mt; fi; \ + fi; \ echo "QTDIR=$$DIR" > $@; echo "QTLIBPATH=$$LIBPATH" >> $@; \ - echo "QTLIB=$$LIB" >> $@; \ + echo "LIBS_QT=$$LIB" >> $@; \ if [ ! -x $$DIR/bin/moc -a -x /usr/bin/moc ]; then \ echo "*"; \ echo "* Unable to find $$DIR/bin/moc, using /usr/bin/moc instead."; \ From 8459c159f7de832eaf888398d2abf466c388dfa6 Mon Sep 17 00:00:00 2001 From: Dirk Opfer Date: Sun, 6 Nov 2005 14:27:52 +0000 Subject: [PATCH 12/22] [ARM] 3088/1: PXA: Add machine support for the Sharp SL-6000x series of PDAs Patch from Dirk Opfer This patch adds basic machine support for the Sharp SL-6000x (Tosa) PDAs. Signed-off-by: Dirk Opfer Signed-off-by: Richard Purdie Signed-off-by: Russell King --- MAINTAINERS | 5 + arch/arm/mach-pxa/Kconfig | 9 +- arch/arm/mach-pxa/Makefile | 1 + arch/arm/mach-pxa/tosa.c | 162 +++++++++++++++++++++++++++++++ include/asm-arm/arch-pxa/tosa.h | 166 ++++++++++++++++++++++++++++++++ 5 files changed, 341 insertions(+), 2 deletions(-) create mode 100644 arch/arm/mach-pxa/tosa.c create mode 100644 include/asm-arm/arch-pxa/tosa.h diff --git a/MAINTAINERS b/MAINTAINERS index 983f9e9aed61..23337f88c508 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -297,6 +297,11 @@ P: Richard Purdie M: rpurdie@rpsys.net S: Maintained +ARM/TOSA MACHINE SUPPORT +P: Dirk Opfer +M: dirk@opfer-online.de +S: Maintained + ARM/PLEB SUPPORT P: Peter Chubb M: pleb@gelato.unsw.edu.au diff --git a/arch/arm/mach-pxa/Kconfig b/arch/arm/mach-pxa/Kconfig index 3e5f69bb5ac4..b380a438e68f 100644 --- a/arch/arm/mach-pxa/Kconfig +++ b/arch/arm/mach-pxa/Kconfig @@ -27,7 +27,8 @@ config PXA_SHARPSL Say Y here if you intend to run this kernel on a Sharp Zaurus SL-5600 (Poodle), SL-C700 (Corgi), SL-C750 (Shepherd), SL-C760 (Husky), SL-C1000 (Akita), - SL-C3000 (Spitz) or SL-C3100 (Borzoi) handheld computer. + SL-C3000 (Spitz), SL-C3100 (Borzoi) or SL-C6000x (Tosa) + handheld computer. endchoice @@ -37,7 +38,7 @@ choice prompt "Select target Sharp Zaurus device range" config PXA_SHARPSL_25x - bool "Sharp PXA25x models (SL-5600 and SL-C7xx)" + bool "Sharp PXA25x models (SL-5600, SL-C7xx and SL-C6000x)" select PXA25x config PXA_SHARPSL_27x @@ -80,6 +81,10 @@ config MACH_BORZOI depends PXA_SHARPSL_27x select PXA_SHARP_Cxx00 +config MACH_TOSA + bool "Enable Sharp SL-6000x (Tosa) Support" + depends PXA_SHARPSL + config PXA25x bool help diff --git a/arch/arm/mach-pxa/Makefile b/arch/arm/mach-pxa/Makefile index f609a0f232cb..8bc72d07cea8 100644 --- a/arch/arm/mach-pxa/Makefile +++ b/arch/arm/mach-pxa/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_ARCH_PXA_IDP) += idp.o obj-$(CONFIG_PXA_SHARP_C7xx) += corgi.o corgi_ssp.o corgi_lcd.o ssp.o obj-$(CONFIG_PXA_SHARP_Cxx00) += spitz.o corgi_ssp.o corgi_lcd.o ssp.o obj-$(CONFIG_MACH_POODLE) += poodle.o +obj-$(CONFIG_MACH_TOSA) += tosa.o # Support for blinky lights led-y := leds.o diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c new file mode 100644 index 000000000000..400609f8b6a8 --- /dev/null +++ b/arch/arm/mach-pxa/tosa.c @@ -0,0 +1,162 @@ +/* + * Support for Sharp SL-C6000x PDAs + * Model: (Tosa) + * + * Copyright (c) 2005 Dirk Opfer + * + * Based on code written by Sharp/Lineo for 2.4 kernels + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "generic.h" + + +/* + * SCOOP Device + */ +static struct resource tosa_scoop_resources[] = { + [0] = { + .start = TOSA_CF_PHYS, + .end = TOSA_CF_PHYS + 0xfff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct scoop_config tosa_scoop_setup = { + .io_dir = TOSA_SCOOP_IO_DIR, + .io_out = TOSA_SCOOP_IO_OUT, + +}; + +struct platform_device tosascoop_device = { + .name = "sharp-scoop", + .id = 0, + .dev = { + .platform_data = &tosa_scoop_setup, + }, + .num_resources = ARRAY_SIZE(tosa_scoop_resources), + .resource = tosa_scoop_resources, +}; + + +/* + * SCOOP Device Jacket + */ +static struct resource tosa_scoop_jc_resources[] = { + [0] = { + .start = TOSA_SCOOP_PHYS + 0x40, + .end = TOSA_SCOOP_PHYS + 0xfff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct scoop_config tosa_scoop_jc_setup = { + .io_dir = TOSA_SCOOP_JC_IO_DIR, + .io_out = TOSA_SCOOP_JC_IO_OUT, +}; + +struct platform_device tosascoop_jc_device = { + .name = "sharp-scoop", + .id = 1, + .dev = { + .platform_data = &tosa_scoop_jc_setup, + .parent = &tosascoop_device.dev, + }, + .num_resources = ARRAY_SIZE(tosa_scoop_jc_resources), + .resource = tosa_scoop_jc_resources, +}; + +static struct scoop_pcmcia_dev tosa_pcmcia_scoop[] = { +{ + .dev = &tosascoop_device.dev, + .irq = TOSA_IRQ_GPIO_CF_IRQ, + .cd_irq = TOSA_IRQ_GPIO_CF_CD, + .cd_irq_str = "PCMCIA0 CD", +},{ + .dev = &tosascoop_jc_device.dev, + .irq = TOSA_IRQ_GPIO_JC_CF_IRQ, + .cd_irq = -1, +}, +}; + + +static struct platform_device *devices[] __initdata = { + &tosascoop_device, + &tosascoop_jc_device, +}; + +static void __init tosa_init(void) +{ + pxa_gpio_mode(TOSA_GPIO_ON_RESET | GPIO_IN); + pxa_gpio_mode(TOSA_GPIO_TC6393_INT | GPIO_IN); + + /* setup sleep mode values */ + PWER = 0x00000002; + PFER = 0x00000000; + PRER = 0x00000002; + PGSR0 = 0x00000000; + PGSR1 = 0x00FF0002; + PGSR2 = 0x00014000; + PCFR |= PCFR_OPDE; + + // enable batt_fault + PMCR = 0x01; + + platform_add_devices(devices, ARRAY_SIZE(devices)); + + scoop_num = 2; + scoop_devs = &tosa_pcmcia_scoop[0]; +} + +static void __init fixup_tosa(struct machine_desc *desc, + struct tag *tags, char **cmdline, struct meminfo *mi) +{ + sharpsl_save_param(); + mi->nr_banks=1; + mi->bank[0].start = 0xa0000000; + mi->bank[0].node = 0; + mi->bank[0].size = (64*1024*1024); +} + +MACHINE_START(TOSA, "SHARP Tosa") + .phys_ram = 0xa0000000, + .phys_io = 0x40000000, + .io_pg_offst = (io_p2v(0x40000000) >> 18) & 0xfffc, + .fixup = fixup_tosa, + .map_io = pxa_map_io, + .init_irq = pxa_init_irq, + .init_machine = tosa_init, + .timer = &pxa_timer, +MACHINE_END diff --git a/include/asm-arm/arch-pxa/tosa.h b/include/asm-arm/arch-pxa/tosa.h new file mode 100644 index 000000000000..c3364a2c4758 --- /dev/null +++ b/include/asm-arm/arch-pxa/tosa.h @@ -0,0 +1,166 @@ +/* + * Hardware specific definitions for Sharp SL-C6000x series of PDAs + * + * Copyright (c) 2005 Dirk Opfer + * + * Based on Sharp's 2.4 kernel patches + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#ifndef _ASM_ARCH_TOSA_H_ +#define _ASM_ARCH_TOSA_H_ 1 + +/* TOSA Chip selects */ +#define TOSA_LCDC_PHYS PXA_CS4_PHYS +/* Internel Scoop */ +#define TOSA_CF_PHYS (PXA_CS2_PHYS + 0x00800000) +/* Jacket Scoop */ +#define TOSA_SCOOP_PHYS (PXA_CS5_PHYS + 0x00800000) + +/* + * SCOOP2 internal GPIOs + */ +#define TOSA_SCOOP_PXA_VCORE1 SCOOP_GPCR_PA11 +#define TOSA_SCOOP_TC6393_REST_IN SCOOP_GPCR_PA12 +#define TOSA_SCOOP_IR_POWERDWN SCOOP_GPCR_PA13 +#define TOSA_SCOOP_SD_WP SCOOP_GPCR_PA14 +#define TOSA_SCOOP_PWR_ON SCOOP_GPCR_PA15 +#define TOSA_SCOOP_AUD_PWR_ON SCOOP_GPCR_PA16 +#define TOSA_SCOOP_BT_RESET SCOOP_GPCR_PA17 +#define TOSA_SCOOP_BT_PWR_EN SCOOP_GPCR_PA18 +#define TOSA_SCOOP_AC_IN_OL SCOOP_GPCR_PA19 + +/* GPIO Direction 1 : output mode / 0:input mode */ +#define TOSA_SCOOP_IO_DIR ( TOSA_SCOOP_PXA_VCORE1 | TOSA_SCOOP_TC6393_REST_IN | \ + TOSA_SCOOP_IR_POWERDWN | TOSA_SCOOP_PWR_ON | TOSA_SCOOP_AUD_PWR_ON |\ + TOSA_SCOOP_BT_RESET | TOSA_SCOOP_BT_PWR_EN ) +/* GPIO out put level when init 1: Hi */ +#define TOSA_SCOOP_IO_OUT ( TOSA_SCOOP_TC6393_REST_IN ) + +/* + * SCOOP2 jacket GPIOs + */ +#define TOSA_SCOOP_JC_BT_LED SCOOP_GPCR_PA11 +#define TOSA_SCOOP_JC_NOTE_LED SCOOP_GPCR_PA12 +#define TOSA_SCOOP_JC_CHRG_ERR_LED SCOOP_GPCR_PA13 +#define TOSA_SCOOP_JC_USB_PULLUP SCOOP_GPCR_PA14 +#define TOSA_SCOOP_JC_TC6393_SUSPEND SCOOP_GPCR_PA15 +#define TOSA_SCOOP_JC_TC3693_L3V_ON SCOOP_GPCR_PA16 +#define TOSA_SCOOP_JC_WLAN_DETECT SCOOP_GPCR_PA17 +#define TOSA_SCOOP_JC_WLAN_LED SCOOP_GPCR_PA18 +#define TOSA_SCOOP_JC_CARD_LIMIT_SEL SCOOP_GPCR_PA19 + +/* GPIO Direction 1 : output mode / 0:input mode */ +#define TOSA_SCOOP_JC_IO_DIR ( TOSA_SCOOP_JC_BT_LED | TOSA_SCOOP_JC_NOTE_LED | \ + TOSA_SCOOP_JC_CHRG_ERR_LED | TOSA_SCOOP_JC_USB_PULLUP | \ + TOSA_SCOOP_JC_TC6393_SUSPEND | TOSA_SCOOP_JC_TC3693_L3V_ON | \ + TOSA_SCOOP_JC_WLAN_LED | TOSA_SCOOP_JC_CARD_LIMIT_SEL ) +/* GPIO out put level when init 1: Hi */ +#define TOSA_SCOOP_JC_IO_OUT ( 0 ) + +/* + * Timing Generator + */ +#define TG_PNLCTL 0x00 +#define TG_TPOSCTL 0x01 +#define TG_DUTYCTL 0x02 +#define TG_GPOSR 0x03 +#define TG_GPODR1 0x04 +#define TG_GPODR2 0x05 +#define TG_PINICTL 0x06 +#define TG_HPOSCTL 0x07 + +/* + * LED + */ +#define TOSA_SCOOP_LED_BLUE TOSA_SCOOP_GPCR_PA11 +#define TOSA_SCOOP_LED_GREEN TOSA_SCOOP_GPCR_PA12 +#define TOSA_SCOOP_LED_ORANGE TOSA_SCOOP_GPCR_PA13 +#define TOSA_SCOOP_LED_WLAN TOSA_SCOOP_GPCR_PA18 + + +/* + * PXA GPIOs + */ +#define TOSA_GPIO_POWERON (0) +#define TOSA_GPIO_RESET (1) +#define TOSA_GPIO_AC_IN (2) +#define TOSA_GPIO_RECORD_BTN (3) +#define TOSA_GPIO_SYNC (4) /* Cradle SYNC Button */ +#define TOSA_GPIO_USB_IN (5) +#define TOSA_GPIO_JACKET_DETECT (7) +#define TOSA_GPIO_nSD_DETECT (9) +#define TOSA_GPIO_nSD_INT (10) +#define TOSA_GPIO_TC6393_CLK (11) +#define TOSA_GPIO_BAT1_CRG (12) +#define TOSA_GPIO_CF_CD (13) +#define TOSA_GPIO_BAT0_CRG (14) +#define TOSA_GPIO_TC6393_INT (15) +#define TOSA_GPIO_BAT0_LOW (17) +#define TOSA_GPIO_TC6393_RDY (18) +#define TOSA_GPIO_ON_RESET (19) +#define TOSA_GPIO_EAR_IN (20) +#define TOSA_GPIO_CF_IRQ (21) /* CF slot0 Ready */ +#define TOSA_GPIO_ON_KEY (22) +#define TOSA_GPIO_VGA_LINE (27) +#define TOSA_GPIO_TP_INT (32) /* Touch Panel pen down interrupt */ +#define TOSA_GPIO_JC_CF_IRQ (36) /* CF slot1 Ready */ +#define TOSA_GPIO_BAT_LOCKED (38) /* Battery locked */ +#define TOSA_GPIO_TG_SPI_SCLK (81) +#define TOSA_GPIO_TG_SPI_CS (82) +#define TOSA_GPIO_TG_SPI_MOSI (83) +#define TOSA_GPIO_BAT1_LOW (84) + +#define TOSA_GPIO_HP_IN GPIO_EAR_IN + +#define TOSA_GPIO_MAIN_BAT_LOW GPIO_BAT0_LOW + +#define TOSA_KEY_STROBE_NUM (11) +#define TOSA_KEY_SENSE_NUM (7) + +#define TOSA_GPIO_HIGH_STROBE_BIT (0xfc000000) +#define TOSA_GPIO_LOW_STROBE_BIT (0x0000001f) +#define TOSA_GPIO_ALL_SENSE_BIT (0x00000fe0) +#define TOSA_GPIO_ALL_SENSE_RSHIFT (5) +#define TOSA_GPIO_STROBE_BIT(a) GPIO_bit(58+(a)) +#define TOSA_GPIO_SENSE_BIT(a) GPIO_bit(69+(a)) +#define TOSA_GAFR_HIGH_STROBE_BIT (0xfff00000) +#define TOSA_GAFR_LOW_STROBE_BIT (0x000003ff) +#define TOSA_GAFR_ALL_SENSE_BIT (0x00fffc00) +#define TOSA_GPIO_KEY_SENSE(a) (69+(a)) +#define TOSA_GPIO_KEY_STROBE(a) (58+(a)) + +/* + * Interrupts + */ +#define TOSA_IRQ_GPIO_WAKEUP IRQ_GPIO(TOSA_GPIO_WAKEUP) +#define TOSA_IRQ_GPIO_AC_IN IRQ_GPIO(TOSA_GPIO_AC_IN) +#define TOSA_IRQ_GPIO_RECORD_BTN IRQ_GPIO(TOSA_GPIO_RECORD_BTN) +#define TOSA_IRQ_GPIO_SYNC IRQ_GPIO(TOSA_GPIO_SYNC) +#define TOSA_IRQ_GPIO_USB_IN IRQ_GPIO(TOSA_GPIO_USB_IN) +#define TOSA_IRQ_GPIO_JACKET_DETECT IRQ_GPIO(TOSA_GPIO_JACKET_DETECT) +#define TOSA_IRQ_GPIO_nSD_INT IRQ_GPIO(TOSA_GPIO_nSD_INT) +#define TOSA_IRQ_GPIO_nSD_DETECT IRQ_GPIO(TOSA_GPIO_nSD_DETECT) +#define TOSA_IRQ_GPIO_BAT1_CRG IRQ_GPIO(TOSA_GPIO_BAT1_CRG) +#define TOSA_IRQ_GPIO_CF_CD IRQ_GPIO(TOSA_GPIO_CF_CD) +#define TOSA_IRQ_GPIO_BAT0_CRG IRQ_GPIO(TOSA_GPIO_BAT0_CRG) +#define TOSA_IRQ_GPIO_TC6393_INT IRQ_GPIO(TOSA_GPIO_TC6393_INT) +#define TOSA_IRQ_GPIO_BAT0_LOW IRQ_GPIO(TOSA_GPIO_BAT0_LOW) +#define TOSA_IRQ_GPIO_EAR_IN IRQ_GPIO(TOSA_GPIO_EAR_IN) +#define TOSA_IRQ_GPIO_CF_IRQ IRQ_GPIO(TOSA_GPIO_CF_IRQ) +#define TOSA_IRQ_GPIO_ON_KEY IRQ_GPIO(TOSA_GPIO_ON_KEY) +#define TOSA_IRQ_GPIO_VGA_LINE IRQ_GPIO(TOSA_GPIO_VGA_LINE) +#define TOSA_IRQ_GPIO_TP_INT IRQ_GPIO(TOSA_GPIO_TP_INT) +#define TOSA_IRQ_GPIO_JC_CF_IRQ IRQ_GPIO(TOSA_GPIO_JC_CF_IRQ) +#define TOSA_IRQ_GPIO_BAT_LOCKED IRQ_GPIO(TOSA_GPIO_BAT_LOCKED) +#define TOSA_IRQ_GPIO_BAT1_LOW IRQ_GPIO(TOSA_GPIO_BAT1_LOW) +#define TOSA_IRQ_GPIO_KEY_SENSE(a) IRQ_GPIO(69+(a)) + +#define TOSA_IRQ_GPIO_MAIN_BAT_LOW IRQ_GPIO(TOSA_GPIO_MAIN_BAT_LOW) + +extern struct platform_device tosascoop_jc_device; +extern struct platform_device tosascoop_device; +#endif /* _ASM_ARCH_TOSA_H_ */ From 84613387cb60bc760a4588822cd61fb88e1d7fad Mon Sep 17 00:00:00 2001 From: Alessandro Zummo Date: Sun, 6 Nov 2005 14:34:12 +0000 Subject: [PATCH 13/22] [ARM] 3089/1: ixp4xx AHB/PCI endianness fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch from Alessandro Zummo This patch fixes AHB/PCI endianness problems when the processor is in little-endian mode. The patch configures the CSR register closely following the directives in [1], paragraph 4.1, page 19. According to the considerations in [1], page 11, while the AHB bus supports both endian modes, on the IXP4XX it always uses big-endian. The PCI bus is connected to the South AHB. A wrong setting in the CSR register will thus cause a malfunctional PCI bus. A schematic diagram of the bus interconnections on the IXP4XX can be found in [1], page 18. The patch has been verified to work on the NSLU2 in both LE and BE modes. The author is Peter Korsgaard. [1] Intel® IXP4XX Product Line of Network Processors and IXC1100 Control Plane Processor: Understanding Big Endian and Little Endian Modes http://www.intel.com/design/network/applnots/25423701.pdf Signed-off-by: Alessandro Zummo Signed-off-by: Deepak Saxena Signed-off-by: Russell King --- arch/arm/mach-ixp4xx/common-pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 2b544363c078..9795da270e3a 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -427,7 +427,7 @@ void __init ixp4xx_pci_preinit(void) #ifdef __ARMEB__ *PCI_CSR = PCI_CSR_IC | PCI_CSR_ABE | PCI_CSR_PDS | PCI_CSR_ADS; #else - *PCI_CSR = PCI_CSR_IC; + *PCI_CSR = PCI_CSR_IC | PCI_CSR_ABE; #endif pr_debug("DONE\n"); From 7240f1f183f085f6b7af44ec274b5b6123dfdead Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sun, 6 Nov 2005 14:34:13 +0000 Subject: [PATCH 14/22] [ARM] 3114/1: use ixp2000_reg_wrb in ixp2000 uengine loader Patch from Lennert Buytenhek Make the uengine loader use ixp2000_reg_wrb in the right places. Signed-off-by: Lennert Buytenhek Signed-off-by: Russell King --- arch/arm/mach-ixp2000/uengine.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-ixp2000/uengine.c b/arch/arm/mach-ixp2000/uengine.c index 43e234349d4a..ec4e007a22ef 100644 --- a/arch/arm/mach-ixp2000/uengine.c +++ b/arch/arm/mach-ixp2000/uengine.c @@ -91,8 +91,8 @@ EXPORT_SYMBOL(ixp2000_uengine_csr_write); void ixp2000_uengine_reset(u32 uengine_mask) { - ixp2000_reg_write(IXP2000_RESET1, uengine_mask & ixp2000_uengine_mask); - ixp2000_reg_write(IXP2000_RESET1, 0); + ixp2000_reg_wrb(IXP2000_RESET1, uengine_mask & ixp2000_uengine_mask); + ixp2000_reg_wrb(IXP2000_RESET1, 0); } EXPORT_SYMBOL(ixp2000_uengine_reset); @@ -452,21 +452,20 @@ static int __init ixp2000_uengine_init(void) /* * Reset microengines. */ - ixp2000_reg_write(IXP2000_RESET1, ixp2000_uengine_mask); - ixp2000_reg_write(IXP2000_RESET1, 0); + ixp2000_uengine_reset(ixp2000_uengine_mask); /* * Synchronise timestamp counters across all microengines. */ value = ixp2000_reg_read(IXP2000_MISC_CONTROL); - ixp2000_reg_write(IXP2000_MISC_CONTROL, value & ~0x80); + ixp2000_reg_wrb(IXP2000_MISC_CONTROL, value & ~0x80); for (uengine = 0; uengine < 32; uengine++) { if (ixp2000_uengine_mask & (1 << uengine)) { ixp2000_uengine_csr_write(uengine, TIMESTAMP_LOW, 0); ixp2000_uengine_csr_write(uengine, TIMESTAMP_HIGH, 0); } } - ixp2000_reg_write(IXP2000_MISC_CONTROL, value | 0x80); + ixp2000_reg_wrb(IXP2000_MISC_CONTROL, value | 0x80); return 0; } From b7ec479553b8755dd95ee988a957cbf2aef351dc Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 6 Nov 2005 14:42:37 +0000 Subject: [PATCH 15/22] [ARM] 3115/1: small optimizations to exception vector entry code Patch from Nicolas Pitre Since we know the value of cpsr on entry, we can replace the bic+orr with a single eor. Also remove a possible result delay (at least on XScale). Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index be439cab92c6..a511ec5b11a3 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -785,7 +785,7 @@ __kuser_helper_end: * SP points to a minimal amount of processor-private memory, the address * of which is copied into r0 for the mode specific abort handler. */ - .macro vector_stub, name, correction=0 + .macro vector_stub, name, mode, correction=0 .align 5 vector_\name: @@ -805,15 +805,14 @@ vector_\name: @ Prepare for SVC32 mode. IRQs remain disabled. @ mrs r0, cpsr - bic r0, r0, #MODE_MASK - orr r0, r0, #SVC_MODE + eor r0, r0, #(\mode ^ SVC_MODE) msr spsr_cxsf, r0 @ @ the branch table must immediately follow this code @ - mov r0, sp and lr, lr, #0x0f + mov r0, sp ldr lr, [pc, lr, lsl #2] movs pc, lr @ branch to handler in SVC mode .endm @@ -823,7 +822,7 @@ __stubs_start: /* * Interrupt dispatcher */ - vector_stub irq, 4 + vector_stub irq, IRQ_MODE, 4 .long __irq_usr @ 0 (USR_26 / USR_32) .long __irq_invalid @ 1 (FIQ_26 / FIQ_32) @@ -846,7 +845,7 @@ __stubs_start: * Data abort dispatcher * Enter in ABT mode, spsr = USR CPSR, lr = USR PC */ - vector_stub dabt, 8 + vector_stub dabt, ABT_MODE, 8 .long __dabt_usr @ 0 (USR_26 / USR_32) .long __dabt_invalid @ 1 (FIQ_26 / FIQ_32) @@ -869,7 +868,7 @@ __stubs_start: * Prefetch abort dispatcher * Enter in ABT mode, spsr = USR CPSR, lr = USR PC */ - vector_stub pabt, 4 + vector_stub pabt, ABT_MODE, 4 .long __pabt_usr @ 0 (USR_26 / USR_32) .long __pabt_invalid @ 1 (FIQ_26 / FIQ_32) @@ -892,7 +891,7 @@ __stubs_start: * Undef instr entry dispatcher * Enter in UND mode, spsr = SVC/USR CPSR, lr = SVC/USR PC */ - vector_stub und + vector_stub und, UND_MODE .long __und_usr @ 0 (USR_26 / USR_32) .long __und_invalid @ 1 (FIQ_26 / FIQ_32) From 756c7b748926b0baec6d2a921c3711679282c8fd Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Sun, 6 Nov 2005 15:03:23 +0000 Subject: [PATCH 16/22] [ARM] 3113/1: PXA: Allow machines to override (and also reuse) pxa pm functions Patch from Richard Purdie Update the PXA pm.c file to allow machines (such as the Sharp Zaurus) to override the standard pm functions but reuse/wrap them where needed. The init call is made slightly earlier to give machine code an init level to override them in removing any race. Signed-off-by: Richard Purdie Signed-off-by: Russell King --- arch/arm/mach-pxa/pm.c | 16 ++++++++++++---- include/asm-arm/arch-pxa/pm.h | 12 ++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 include/asm-arm/arch-pxa/pm.h diff --git a/arch/arm/mach-pxa/pm.c b/arch/arm/mach-pxa/pm.c index ac4dd4336160..f74b9af112dc 100644 --- a/arch/arm/mach-pxa/pm.c +++ b/arch/arm/mach-pxa/pm.c @@ -12,6 +12,7 @@ */ #include #include +#include #include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -72,7 +74,7 @@ enum { SLEEP_SAVE_START = 0, }; -static int pxa_pm_enter(suspend_state_t state) +int pxa_pm_enter(suspend_state_t state) { unsigned long sleep_save[SLEEP_SAVE_SIZE]; unsigned long checksum = 0; @@ -191,6 +193,8 @@ static int pxa_pm_enter(suspend_state_t state) return 0; } +EXPORT_SYMBOL_GPL(pxa_pm_enter); + unsigned long sleep_phys_sp(void *sp) { return virt_to_phys(sp); @@ -199,21 +203,25 @@ unsigned long sleep_phys_sp(void *sp) /* * Called after processes are frozen, but before we shut down devices. */ -static int pxa_pm_prepare(suspend_state_t state) +int pxa_pm_prepare(suspend_state_t state) { extern int pxa_cpu_pm_prepare(suspend_state_t state); return pxa_cpu_pm_prepare(state); } +EXPORT_SYMBOL_GPL(pxa_pm_prepare); + /* * Called after devices are re-setup, but before processes are thawed. */ -static int pxa_pm_finish(suspend_state_t state) +int pxa_pm_finish(suspend_state_t state) { return 0; } +EXPORT_SYMBOL_GPL(pxa_pm_finish); + /* * Set to PM_DISK_FIRMWARE so we can quickly veto suspend-to-disk. */ @@ -230,4 +238,4 @@ static int __init pxa_pm_init(void) return 0; } -late_initcall(pxa_pm_init); +device_initcall(pxa_pm_init); diff --git a/include/asm-arm/arch-pxa/pm.h b/include/asm-arm/arch-pxa/pm.h new file mode 100644 index 000000000000..7a8a1cdf430d --- /dev/null +++ b/include/asm-arm/arch-pxa/pm.h @@ -0,0 +1,12 @@ +/* + * Copyright (c) 2005 Richard Purdie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +extern int pxa_pm_prepare(suspend_state_t state); +extern int pxa_pm_enter(suspend_state_t state); +extern int pxa_pm_finish(suspend_state_t state); From d3997abf699655d2ec012e944fb34668cc3ec6d7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 6 Nov 2005 15:45:00 +0000 Subject: [PATCH 17/22] [ARM] Fix another use of // as a comment // disagrees with ld's script parsing ability. Don't use it. Signed-off-by: Russell King --- include/asm-arm/arch-iop3xx/iop331.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-arm/arch-iop3xx/iop331.h b/include/asm-arm/arch-iop3xx/iop331.h index 96adffd8bad2..fbf0cc11bdd9 100644 --- a/include/asm-arm/arch-iop3xx/iop331.h +++ b/include/asm-arm/arch-iop3xx/iop331.h @@ -42,7 +42,7 @@ /* this can be 128M if OMWTVR1 is set */ #define IOP331_PCI_MEM_WINDOW_SIZE 0x04000000 /* 64M outbound window */ -//#define IOP331_PCI_MEM_WINDOW_SIZE (~*IOP331_IALR1 + 1) +/* #define IOP331_PCI_MEM_WINDOW_SIZE (~*IOP331_IALR1 + 1) */ #define IOP331_PCI_LOWER_MEM_PA 0x80000000 #define IOP331_PCI_LOWER_MEM_BA (*IOP331_OMWTVR0) #define IOP331_PCI_UPPER_MEM_PA (IOP331_PCI_LOWER_MEM_PA + IOP331_PCI_MEM_WINDOW_SIZE - 1) From 4299051ebe89ab1eeadeaf4cf06ce63421412232 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 6 Nov 2005 15:46:57 +0000 Subject: [PATCH 18/22] [ARM] Fix missing declaration of cache_is_vivt() Signed-off-by: Russell King --- arch/arm/kernel/ecard.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c index dceb826bd216..c445b0a848b2 100644 --- a/arch/arm/kernel/ecard.c +++ b/arch/arm/kernel/ecard.c @@ -41,6 +41,7 @@ #include #include +#include #include #include #include From 4fe15ba08fdb280536bd7019e8505969c4ac6852 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 6 Nov 2005 19:47:04 +0000 Subject: [PATCH 19/22] [ARM] Fix second missing declaration of cache_is_vivt() Signed-off-by: Russell King --- arch/arm/kernel/ecard.c | 1 - include/asm-arm/mmu_context.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c index c445b0a848b2..dceb826bd216 100644 --- a/arch/arm/kernel/ecard.c +++ b/arch/arm/kernel/ecard.c @@ -41,7 +41,6 @@ #include #include -#include #include #include #include diff --git a/include/asm-arm/mmu_context.h b/include/asm-arm/mmu_context.h index 57b8def83d41..3d4b810d8c38 100644 --- a/include/asm-arm/mmu_context.h +++ b/include/asm-arm/mmu_context.h @@ -13,6 +13,7 @@ #ifndef __ASM_ARM_MMU_CONTEXT_H #define __ASM_ARM_MMU_CONTEXT_H +#include #include #if __LINUX_ARM_ARCH__ >= 6 From 32f8b97ca39421057d8adef05b7219127355d60c Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 6 Nov 2005 19:49:21 +0000 Subject: [PATCH 20/22] [ARM] Don't call dump_cpu_info unless we're booting We don't want to call dump_cpu_info() from cpu_init() after boot since it produces a lot of unnecessary noise - since cpu_init() gets called on resume and hotplug cpu insertion events. Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c9b69771f92e..a6d7fb81330e 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -338,7 +338,8 @@ void cpu_init(void) BUG(); } - dump_cpu_info(cpu); + if (system_state == SYSTEM_BOOTING) + dump_cpu_info(cpu); /* * setup stacks for re-entrant exception handlers From 1555972231f3202f00e04f7c42d2db858e11b874 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 6 Nov 2005 21:41:08 +0000 Subject: [PATCH 21/22] [ARM] Fix /proc/cpuinfo format for ARM SMP glibc expects to count lines beginning with "processor" to determine the number of processors, not lines beginning with "Processor". So, give glibc the format it expects. Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index a6d7fb81330e..85774165e9fd 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -839,7 +839,12 @@ static int c_show(struct seq_file *m, void *v) #if defined(CONFIG_SMP) for_each_online_cpu(i) { - seq_printf(m, "Processor\t: %d\n", i); + /* + * glibc reads /proc/cpuinfo to determine the number of + * online processors, looking for lines beginning with + * "processor". Give glibc what it expects. + */ + seq_printf(m, "processor\t: %d\n", i); seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n", per_cpu(cpu_data, i).loops_per_jiffy / (500000UL/HZ), (per_cpu(cpu_data, i).loops_per_jiffy / (5000UL/HZ)) % 100); From 3c726f8dee6f55e96475574e9f645327e461884c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 7 Nov 2005 11:06:55 +1100 Subject: [PATCH 22/22] [PATCH] ppc64: support 64k pages Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 9 + arch/powerpc/kernel/asm-offsets.c | 3 + arch/powerpc/kernel/cputable.c | 4 +- arch/powerpc/kernel/head_64.S | 300 ++++++++--- arch/powerpc/kernel/lparmap.c | 2 +- arch/powerpc/kernel/process.c | 6 +- arch/powerpc/kernel/prom.c | 76 ++- arch/powerpc/kernel/setup_64.c | 33 +- arch/powerpc/lib/copypage_64.S | 2 +- arch/powerpc/lib/copyuser_64.S | 4 +- arch/powerpc/mm/hash_low_64.S | 613 ++++++++++++++++++++++- arch/powerpc/mm/hash_native_64.c | 369 ++++++++------ arch/powerpc/mm/hash_utils_64.c | 534 +++++++++++++++----- arch/powerpc/mm/hugetlbpage.c | 132 ++--- arch/powerpc/mm/init_64.c | 18 +- arch/powerpc/mm/mem.c | 56 +-- arch/powerpc/mm/pgtable_64.c | 22 +- arch/powerpc/mm/ppc_mmu_32.c | 15 + arch/powerpc/mm/slb.c | 102 +++- arch/powerpc/mm/slb_low.S | 224 ++++++--- arch/powerpc/mm/stab.c | 30 +- arch/powerpc/mm/tlb_64.c | 32 +- arch/powerpc/platforms/iseries/htab.c | 65 ++- arch/powerpc/platforms/iseries/hvlog.c | 4 +- arch/powerpc/platforms/iseries/iommu.c | 96 ++-- arch/powerpc/platforms/iseries/setup.c | 13 +- arch/powerpc/platforms/iseries/vio.c | 39 +- arch/powerpc/platforms/iseries/viopath.c | 16 +- arch/powerpc/platforms/pseries/lpar.c | 113 +++-- arch/ppc64/Kconfig | 13 + arch/ppc64/kernel/asm-offsets.c | 3 + arch/ppc64/kernel/head.S | 300 ++++++++--- arch/ppc64/kernel/pacaData.c | 2 +- arch/ppc64/kernel/prom.c | 94 ++-- include/asm-powerpc/cputable.h | 3 + include/asm-powerpc/iommu.h | 5 + include/asm-powerpc/machdep.h | 10 +- include/asm-powerpc/prom.h | 8 + include/asm-powerpc/system.h | 2 +- include/asm-powerpc/thread_info.h | 20 +- include/asm-powerpc/tlbflush.h | 9 +- include/asm-ppc64/mmu.h | 222 ++++---- include/asm-ppc64/mmu_context.h | 15 +- include/asm-ppc64/paca.h | 13 +- include/asm-ppc64/page.h | 147 ++++-- include/asm-ppc64/pgalloc.h | 47 +- include/asm-ppc64/pgtable-4k.h | 88 ++++ include/asm-ppc64/pgtable-64k.h | 87 ++++ include/asm-ppc64/pgtable.h | 160 ++---- include/asm-ppc64/prom.h | 8 + include/asm-ppc64/system.h | 2 +- mm/hugetlb.c | 3 + 52 files changed, 2965 insertions(+), 1228 deletions(-) create mode 100644 include/asm-ppc64/pgtable-4k.h create mode 100644 include/asm-ppc64/pgtable-64k.h diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index f4e25c648fbb..ca7acb0c79f0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -603,6 +603,15 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES +config PPC_64K_PAGES + bool "64k page size" + help + This option changes the kernel logical page size to 64k. On machines + without processor support for 64k pages, the kernel will simulate + them by loading each individual 4k page on demand transparently, + while on hardware with such support, it will be used to map + normal application pages. + config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on PPC64 && SMP diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index bc5a3689cc05..b75757251994 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -125,6 +125,9 @@ int main(void) DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); +#ifdef CONFIG_PPC_64K_PAGES + DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir)); +#endif #ifdef CONFIG_HUGETLB_PAGE DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b91345fa0805..33c63bcf69f8 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -240,7 +240,7 @@ struct cpu_spec cpu_specs[] = { .oprofile_model = &op_model_power4, #endif }, - { /* Power5 */ + { /* Power5 GR */ .pvr_mask = 0xffff0000, .pvr_value = 0x003a0000, .cpu_name = "POWER5 (gr)", @@ -255,7 +255,7 @@ struct cpu_spec cpu_specs[] = { .oprofile_model = &op_model_power4, #endif }, - { /* Power5 */ + { /* Power5 GS */ .pvr_mask = 0xffff0000, .pvr_value = 0x003b0000, .cpu_name = "POWER5 (gs)", diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 45d81976987f..16ab40daa738 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -195,11 +195,11 @@ exception_marker: #define EX_R12 24 #define EX_R13 32 #define EX_SRR0 40 -#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */ #define EX_DAR 48 -#define EX_LR 48 /* SLB miss saves LR, but not DAR */ #define EX_DSISR 56 #define EX_CCR 60 +#define EX_R3 64 +#define EX_LR 72 #define EXCEPTION_PROLOG_PSERIES(area, label) \ mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ @@ -419,17 +419,22 @@ data_access_slb_pSeries: mtspr SPRN_SPRG1,r13 RUNLATCH_ON(r13) mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_DAR std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + mfcr r9 +#ifdef __DISABLED__ + /* Keep that around for when we re-implement dynamic VSIDs */ + cmpdi r3,0 + bge slb_miss_user_pseries +#endif /* __DISABLED__ */ std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r9,SPRN_SPRG1 - std r9,PACA_EXSLB+EX_R13(r13) - mfcr r9 + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ - mfspr r3,SPRN_DAR - b .do_slb_miss /* Rel. branch works in real mode */ + b .slb_miss_realmode /* Rel. branch works in real mode */ STD_EXCEPTION_PSERIES(0x400, instruction_access) @@ -440,17 +445,22 @@ instruction_access_slb_pSeries: mtspr SPRN_SPRG1,r13 RUNLATCH_ON(r13) mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + mfcr r9 +#ifdef __DISABLED__ + /* Keep that around for when we re-implement dynamic VSIDs */ + cmpdi r3,0 + bge slb_miss_user_pseries +#endif /* __DISABLED__ */ std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r9,SPRN_SPRG1 - std r9,PACA_EXSLB+EX_R13(r13) - mfcr r9 + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - b .do_slb_miss /* Rel. branch works in real mode */ + b .slb_miss_realmode /* Rel. branch works in real mode */ STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) STD_EXCEPTION_PSERIES(0x600, alignment) @@ -508,6 +518,38 @@ _GLOBAL(do_stab_bolted_pSeries) mfspr r12,SPRN_SPRG2 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) +/* + * We have some room here we use that to put + * the peries slb miss user trampoline code so it's reasonably + * away from slb_miss_user_common to avoid problems with rfid + * + * This is used for when the SLB miss handler has to go virtual, + * which doesn't happen for now anymore but will once we re-implement + * dynamic VSIDs for shared page tables + */ +#ifdef __DISABLED__ +slb_miss_user_pseries: + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfspr r10,SPRG1 + ld r11,PACA_EXSLB+EX_R9(r13) + ld r12,PACA_EXSLB+EX_R3(r13) + std r10,PACA_EXGEN+EX_R13(r13) + std r11,PACA_EXGEN+EX_R9(r13) + std r12,PACA_EXGEN+EX_R3(r13) + clrrdi r12,r13,32 + mfmsr r10 + mfspr r11,SRR0 /* save SRR0 */ + ori r12,r12,slb_miss_user_common@l /* virt addr of handler */ + ori r10,r10,MSR_IR|MSR_DR|MSR_RI + mtspr SRR0,r12 + mfspr r12,SRR1 /* and SRR1 */ + mtspr SRR1,r10 + rfid + b . /* prevent spec. execution */ +#endif /* __DISABLED__ */ + /* * Vectors for the FWNMI option. Share common code. */ @@ -559,22 +601,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) .globl data_access_slb_iSeries data_access_slb_iSeries: mtspr SPRN_SPRG1,r13 /* save r13 */ - EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) - ld r12,PACALPPACA+LPPACASRR1(r13) mfspr r3,SPRN_DAR - b .do_slb_miss + std r9,PACA_EXSLB+EX_R9(r13) + mfcr r9 +#ifdef __DISABLED__ + cmpdi r3,0 + bge slb_miss_user_iseries +#endif + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) + ld r12,PACALPPACA+LPPACASRR1(r13); + b .slb_miss_realmode STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) .globl instruction_access_slb_iSeries instruction_access_slb_iSeries: mtspr SPRN_SPRG1,r13 /* save r13 */ - EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) - ld r12,PACALPPACA+LPPACASRR1(r13) - ld r3,PACALPPACA+LPPACASRR0(r13) - b .do_slb_miss + ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ + std r9,PACA_EXSLB+EX_R9(r13) + mfcr r9 +#ifdef __DISABLED__ + cmpdi r3,0 + bge .slb_miss_user_iseries +#endif + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) + ld r12,PACALPPACA+LPPACASRR1(r13); + b .slb_miss_realmode + +#ifdef __DISABLED__ +slb_miss_user_iseries: + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfspr r10,SPRG1 + ld r11,PACA_EXSLB+EX_R9(r13) + ld r12,PACA_EXSLB+EX_R3(r13) + std r10,PACA_EXGEN+EX_R13(r13) + std r11,PACA_EXGEN+EX_R9(r13) + std r12,PACA_EXGEN+EX_R3(r13) + EXCEPTION_PROLOG_ISERIES_2 + b slb_miss_user_common +#endif MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) @@ -809,6 +888,126 @@ instruction_access_common: li r5,0x400 b .do_hash_page /* Try to handle as hpte fault */ +/* + * Here is the common SLB miss user that is used when going to virtual + * mode for SLB misses, that is currently not used + */ +#ifdef __DISABLED__ + .align 7 + .globl slb_miss_user_common +slb_miss_user_common: + mflr r10 + std r3,PACA_EXGEN+EX_DAR(r13) + stw r9,PACA_EXGEN+EX_CCR(r13) + std r10,PACA_EXGEN+EX_LR(r13) + std r11,PACA_EXGEN+EX_SRR0(r13) + bl .slb_allocate_user + + ld r10,PACA_EXGEN+EX_LR(r13) + ld r3,PACA_EXGEN+EX_R3(r13) + lwz r9,PACA_EXGEN+EX_CCR(r13) + ld r11,PACA_EXGEN+EX_SRR0(r13) + mtlr r10 + beq- slb_miss_fault + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_user_slb + mfmsr r10 + +.machine push +.machine "power4" + mtcrf 0x80,r9 +.machine pop + + clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */ + mtmsrd r10,1 + + mtspr SRR0,r11 + mtspr SRR1,r12 + + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + rfid + b . + +slb_miss_fault: + EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN) + ld r4,PACA_EXGEN+EX_DAR(r13) + li r5,0 + std r4,_DAR(r1) + std r5,_DSISR(r1) + b .handle_page_fault + +unrecov_user_slb: + EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + +#endif /* __DISABLED__ */ + + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r12 contain the saved SRR1, SRR0 is still ready for return + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(slb_miss_realmode) + mflr r10 + + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + bl .slb_allocate_realmode + + /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ +#ifdef CONFIG_PPC_ISERIES + ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ +#endif /* CONFIG_PPC_ISERIES */ + + mtlr r10 + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_slb + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + +#ifdef CONFIG_PPC_ISERIES + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +#endif /* CONFIG_PPC_ISERIES */ + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + .align 7 .globl hardware_interrupt_common .globl hardware_interrupt_entry @@ -1138,62 +1337,6 @@ _GLOBAL(do_stab_bolted) rfid b . /* prevent speculative execution */ -/* - * r13 points to the PACA, r9 contains the saved CR, - * r11 and r12 contain the saved SRR0 and SRR1. - * r3 has the faulting address - * r9 - r13 are saved in paca->exslb. - * r3 is saved in paca->slb_r3 - * We assume we aren't going to take any exceptions during this procedure. - */ -_GLOBAL(do_slb_miss) - mflr r10 - - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - - bl .slb_allocate /* handle it */ - - /* All done -- return from exception. */ - - ld r10,PACA_EXSLB+EX_LR(r13) - ld r3,PACA_EXSLB+EX_R3(r13) - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ -#ifdef CONFIG_PPC_ISERIES - ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ -#endif /* CONFIG_PPC_ISERIES */ - - mtlr r10 - - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ - beq- unrecov_slb - -.machine push -.machine "power4" - mtcrf 0x80,r9 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop - -#ifdef CONFIG_PPC_ISERIES - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 -#endif /* CONFIG_PPC_ISERIES */ - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ - -unrecov_slb: - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - DISABLE_INTS - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - /* * Space for CPU0's segment table. * @@ -1569,7 +1712,10 @@ _GLOBAL(__secondary_start) #endif /* Initialize the first segment table (or SLB) entry */ ld r3,PACASTABVIRT(r13) /* get addr of segment table */ +BEGIN_FTR_SECTION bl .stab_initialize +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + bl .slb_initialize /* Initialize the kernel stack. Just a repeat for iSeries. */ LOADADDR(r3,current_set) diff --git a/arch/powerpc/kernel/lparmap.c b/arch/powerpc/kernel/lparmap.c index eded971d1bf9..5a05a797485f 100644 --- a/arch/powerpc/kernel/lparmap.c +++ b/arch/powerpc/kernel/lparmap.c @@ -25,7 +25,7 @@ const struct LparMap __attribute__((__section__(".text"))) xLparMap = { .xRanges = { { .xPages = HvPagesToMap, .xOffset = 0, - .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - PAGE_SHIFT), + .xVPN = KERNEL_VSID(KERNELBASE) << (SID_SHIFT - HW_PAGE_SHIFT), }, }, }; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 96843211cc5c..7f64f0464d44 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -554,12 +554,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, #ifdef CONFIG_PPC64 if (cpu_has_feature(CPU_FTR_SLB)) { unsigned long sp_vsid = get_kernel_vsid(sp); + unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; sp_vsid <<= SLB_VSID_SHIFT; - sp_vsid |= SLB_VSID_KERNEL; - if (cpu_has_feature(CPU_FTR_16M_PAGE)) - sp_vsid |= SLB_VSID_L; - + sp_vsid |= SLB_VSID_KERNEL | llp; p->thread.ksp_vsid = sp_vsid; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index eec2da695508..3675ef4bac90 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -724,10 +724,10 @@ static inline char *find_flat_dt_string(u32 offset) * used to extract the memory informations at boot before we can * unflatten the tree */ -static int __init scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data) +int __init of_scan_flat_dt(int (*it)(unsigned long node, + const char *uname, int depth, + void *data), + void *data) { unsigned long p = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; @@ -784,8 +784,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node, * This function can be used within scan_flattened_dt callback to get * access to properties */ -static void* __init get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size) +void* __init of_get_flat_dt_prop(unsigned long node, const char *name, + unsigned long *size) { unsigned long p = node; @@ -1087,7 +1087,7 @@ void __init unflatten_device_tree(void) static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - char *type = get_flat_dt_prop(node, "device_type", NULL); + char *type = of_get_flat_dt_prop(node, "device_type", NULL); u32 *prop; unsigned long size = 0; @@ -1095,19 +1095,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; -#ifdef CONFIG_PPC_PSERIES - /* On LPAR, look for the first ibm,pft-size property for the hash table size - */ - if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) { - u32 *pft_size; - pft_size = get_flat_dt_prop(node, "ibm,pft-size", NULL); - if (pft_size != NULL) { - /* pft_size[0] is the NUMA CEC cookie */ - ppc64_pft_size = pft_size[1]; - } - } -#endif - boot_cpuid = 0; boot_cpuid_phys = 0; if (initial_boot_params && initial_boot_params->version >= 2) { @@ -1117,8 +1104,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, boot_cpuid_phys = initial_boot_params->boot_cpuid_phys; } else { /* Check if it's the boot-cpu, set it's hw index now */ - if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { - prop = get_flat_dt_prop(node, "reg", NULL); + if (of_get_flat_dt_prop(node, + "linux,boot-cpu", NULL) != NULL) { + prop = of_get_flat_dt_prop(node, "reg", NULL); if (prop != NULL) boot_cpuid_phys = *prop; } @@ -1127,14 +1115,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node, #ifdef CONFIG_ALTIVEC /* Check if we have a VMX and eventually update CPU features */ - prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", &size); + prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", &size); if (prop && (*prop) > 0) { cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; } /* Same goes for Apple's "altivec" property */ - prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL); if (prop) { cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; @@ -1147,7 +1135,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * this by looking at the size of the ibm,ppc-interrupt-server#s * property */ - prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", + prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &size); cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; if (prop && ((size / sizeof(u32)) > 1)) @@ -1170,7 +1158,7 @@ static int __init early_init_dt_scan_chosen(unsigned long node, return 0; /* get platform type */ - prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL); if (prop == NULL) return 0; #ifdef CONFIG_PPC64 @@ -1183,21 +1171,21 @@ static int __init early_init_dt_scan_chosen(unsigned long node, #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ - if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) + if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) iommu_is_off = 1; - if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) + if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) iommu_force_on = 1; #endif - lprop = get_flat_dt_prop(node, "linux,memory-limit", NULL); + lprop = of_get_flat_dt_prop(node, "linux,memory-limit", NULL); if (lprop) memory_limit = *lprop; #ifdef CONFIG_PPC64 - lprop = get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); if (lprop) tce_alloc_start = *lprop; - lprop = get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + lprop = of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); if (lprop) tce_alloc_end = *lprop; #endif @@ -1209,9 +1197,9 @@ static int __init early_init_dt_scan_chosen(unsigned long node, { u64 *basep, *entryp; - basep = get_flat_dt_prop(node, "linux,rtas-base", NULL); - entryp = get_flat_dt_prop(node, "linux,rtas-entry", NULL); - prop = get_flat_dt_prop(node, "linux,rtas-size", NULL); + basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL); + entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); + prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL); if (basep && entryp && prop) { rtas.base = *basep; rtas.entry = *entryp; @@ -1232,11 +1220,11 @@ static int __init early_init_dt_scan_root(unsigned long node, if (depth != 0) return 0; - prop = get_flat_dt_prop(node, "#size-cells", NULL); + prop = of_get_flat_dt_prop(node, "#size-cells", NULL); dt_root_size_cells = (prop == NULL) ? 1 : *prop; DBG("dt_root_size_cells = %x\n", dt_root_size_cells); - prop = get_flat_dt_prop(node, "#address-cells", NULL); + prop = of_get_flat_dt_prop(node, "#address-cells", NULL); dt_root_addr_cells = (prop == NULL) ? 2 : *prop; DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); @@ -1271,7 +1259,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) static int __init early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data) { - char *type = get_flat_dt_prop(node, "device_type", NULL); + char *type = of_get_flat_dt_prop(node, "device_type", NULL); cell_t *reg, *endp; unsigned long l; @@ -1279,7 +1267,7 @@ static int __init early_init_dt_scan_memory(unsigned long node, if (type == NULL || strcmp(type, "memory") != 0) return 0; - reg = (cell_t *)get_flat_dt_prop(node, "reg", &l); + reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l); if (reg == NULL) return 0; @@ -1343,12 +1331,12 @@ void __init early_init_devtree(void *params) * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - scan_flat_dt(early_init_dt_scan_chosen, NULL); + of_scan_flat_dt(early_init_dt_scan_chosen, NULL); /* Scan memory nodes and rebuild LMBs */ lmb_init(); - scan_flat_dt(early_init_dt_scan_root, NULL); - scan_flat_dt(early_init_dt_scan_memory, NULL); + of_scan_flat_dt(early_init_dt_scan_root, NULL); + of_scan_flat_dt(early_init_dt_scan_memory, NULL); lmb_enforce_memory_limit(memory_limit); lmb_analyze(); #ifdef CONFIG_PPC64 @@ -1363,10 +1351,10 @@ void __init early_init_devtree(void *params) DBG("Scanning CPUs ...\n"); - /* Retreive hash table size from flattened tree plus other - * CPU related informations (altivec support, boot CPU ID, ...) + /* Retreive CPU related informations from the flat tree + * (altivec support, boot CPU ID, ...) */ - scan_flat_dt(early_init_dt_scan_cpus, NULL); + of_scan_flat_dt(early_init_dt_scan_cpus, NULL); DBG(" <- early_init_devtree()\n"); } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6b52cce872be..b0994050024f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -276,18 +276,23 @@ void __init early_setup(unsigned long dt_ptr) DBG("Found, Initializing memory management...\n"); - /* - * Initialize stab / SLB management - */ - if (!firmware_has_feature(FW_FEATURE_ISERIES)) - stab_initialize(lpaca->stab_real); - /* * Initialize the MMU Hash table and create the linear mapping - * of memory + * of memory. Has to be done before stab/slb initialization as + * this is currently where the page size encoding is obtained */ htab_initialize(); + /* + * Initialize stab / SLB management except on iSeries + */ + if (!firmware_has_feature(FW_FEATURE_ISERIES)) { + if (cpu_has_feature(CPU_FTR_SLB)) + slb_initialize(); + else + stab_initialize(lpaca->stab_real); + } + DBG(" <- early_setup()\n"); } @@ -552,10 +557,12 @@ static void __init irqstack_early_init(void) * SLB misses on them. */ for_each_cpu(i) { - softirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); - hardirq_ctx[i] = (struct thread_info *)__va(lmb_alloc_base(THREAD_SIZE, - THREAD_SIZE, 0x10000000)); + softirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc_base(THREAD_SIZE, + THREAD_SIZE, 0x10000000)); + hardirq_ctx[i] = (struct thread_info *) + __va(lmb_alloc_base(THREAD_SIZE, + THREAD_SIZE, 0x10000000)); } } #else @@ -583,8 +590,8 @@ static void __init emergency_stack_init(void) limit = min(0x10000000UL, lmb.rmo_size); for_each_cpu(i) - paca[i].emergency_sp = __va(lmb_alloc_base(PAGE_SIZE, 128, - limit)) + PAGE_SIZE; + paca[i].emergency_sp = + __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE; } /* diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 733d61618bbf..40523b140109 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -11,7 +11,7 @@ #include #include -_GLOBAL(copy_page) +_GLOBAL(copy_4K_page) std r31,-8(1) std r30,-16(1) std r29,-24(1) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index a0b3fbbd6fb1..6d69ef39b7df 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user) std r4,-16(r1) std r5,-8(r1) dcbt 0,r4 - beq .Lcopy_page + beq .Lcopy_page_4K andi. r6,r6,7 mtcrf 0x01,r5 blt cr1,.Lshort_copy @@ -366,7 +366,7 @@ _GLOBAL(__copy_tofrom_user) * above (following the .Ldst_aligned label) but it runs slightly * slower on POWER3. */ -.Lcopy_page: +.Lcopy_page_4K: std r31,-32(1) std r30,-40(1) std r29,-48(1) diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index d6ed9102eeea..e0d02c4a2615 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -1,7 +1,7 @@ /* * ppc64 MMU hashtable management routines * - * (c) Copyright IBM Corp. 2003 + * (c) Copyright IBM Corp. 2003, 2005 * * Maintained by: Benjamin Herrenschmidt * @@ -10,6 +10,7 @@ * described in the kernel's COPYING file. */ +#include #include #include #include @@ -42,14 +43,24 @@ /* Save non-volatile offsets */ #define STK_REG(i) (112 + ((i)-14)*8) + +#ifndef CONFIG_PPC_64K_PAGES + +/***************************************************************************** + * * + * 4K SW & 4K HW pages implementation * + * * + *****************************************************************************/ + + /* - * _hash_page(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local) + * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, + * pte_t *ptep, unsigned long trap, int local) * - * Adds a page to the hash table. This is the non-LPAR version for now + * Adds a 4K page to the hash table in a segment of 4K pages only */ -_GLOBAL(__hash_page) +_GLOBAL(__hash_page_4K) mflr r0 std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) @@ -88,7 +99,8 @@ _GLOBAL(__hash_page) /* If so, just bail out and refault if needed. Someone else * is changing this PTE anyway and might hash it. */ - bne- bail_ok + bne- htab_bail_ok + /* Prepare new PTE value (turn access RW into DIRTY, then * add BUSY,HASHPTE and ACCESSED) */ @@ -118,10 +130,10 @@ _GLOBAL(__hash_page) /* Convert linux PTE bits into HW equivalents */ andi. r3,r30,0x1fe /* Get basic set of flags */ - xori r3,r3,HW_NO_EXEC /* _PAGE_EXEC -> NOEXEC */ + xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */ rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ - and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY -> r0 bit 30 */ + and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ andc r0,r30,r0 /* r0 = pte & ~r0 */ rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ @@ -158,19 +170,21 @@ htab_insert_pte: andc r30,r30,r0 ori r30,r30,_PAGE_HASHPTE - /* page number in r5 */ - rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT + /* physical address r5 */ + rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT + sldi r5,r5,PAGE_SHIFT /* Calculate primary group hash */ and r0,r28,r27 - rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */ /* Call ppc_md.hpte_insert */ - ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ + ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ mr r4,r29 /* Retreive va */ - li r6,0 /* no vflags */ + li r7,0 /* !bolted, !secondary */ + li r8,MMU_PAGE_4K /* page size */ _GLOBAL(htab_call_hpte_insert1) - bl . /* Will be patched by htab_finish_init() */ + bl . /* Patched by htab_finish_init() */ cmpdi 0,r3,0 bge htab_pte_insert_ok /* Insertion successful */ cmpdi 0,r3,-2 /* Critical failure */ @@ -178,19 +192,21 @@ _GLOBAL(htab_call_hpte_insert1) /* Now try secondary slot */ - /* page number in r5 */ - rldicl r5,r31,64-PTE_SHIFT,PTE_SHIFT + /* physical address r5 */ + rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT + sldi r5,r5,PAGE_SHIFT /* Calculate secondary group hash */ andc r0,r27,r28 rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ /* Call ppc_md.hpte_insert */ - ld r7,STK_PARM(r4)(r1) /* Retreive new pp bits */ + ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ mr r4,r29 /* Retreive va */ - li r6,HPTE_V_SECONDARY@l /* secondary slot */ + li r7,HPTE_V_SECONDARY /* !bolted, secondary */ + li r8,MMU_PAGE_4K /* page size */ _GLOBAL(htab_call_hpte_insert2) - bl . /* Will be patched by htab_finish_init() */ + bl . /* Patched by htab_finish_init() */ cmpdi 0,r3,0 bge+ htab_pte_insert_ok /* Insertion successful */ cmpdi 0,r3,-2 /* Critical failure */ @@ -207,14 +223,14 @@ _GLOBAL(htab_call_hpte_insert2) rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ /* Call ppc_md.hpte_remove */ _GLOBAL(htab_call_hpte_remove) - bl . /* Will be patched by htab_finish_init() */ + bl . /* Patched by htab_finish_init() */ /* Try all again */ b htab_insert_pte -bail_ok: +htab_bail_ok: li r3,0 - b bail + b htab_bail htab_pte_insert_ok: /* Insert slot number & secondary bit in PTE */ @@ -227,7 +243,7 @@ htab_write_out_pte: ld r6,STK_PARM(r6)(r1) std r30,0(r6) li r3, 0 -bail: +htab_bail: ld r27,STK_REG(r27)(r1) ld r28,STK_REG(r28)(r1) ld r29,STK_REG(r29)(r1) @@ -256,10 +272,10 @@ htab_modify_pte: /* Call ppc_md.hpte_updatepp */ mr r5,r29 /* va */ - li r6,0 /* large is 0 */ + li r6,MMU_PAGE_4K /* page size */ ld r7,STK_PARM(r8)(r1) /* get "local" param */ _GLOBAL(htab_call_hpte_updatepp) - bl . /* Will be patched by htab_finish_init() */ + bl . /* Patched by htab_finish_init() */ /* if we failed because typically the HPTE wasn't really here * we try an insertion. @@ -276,13 +292,556 @@ htab_wrong_access: /* Bail out clearing reservation */ stdcx. r31,0,r6 li r3,1 - b bail + b htab_bail htab_pte_insert_failure: /* Bail out restoring old PTE */ ld r6,STK_PARM(r6)(r1) std r31,0(r6) li r3,-1 - b bail + b htab_bail +#else /* CONFIG_PPC_64K_PAGES */ + + +/***************************************************************************** + * * + * 64K SW & 4K or 64K HW in a 4K segment pages implementation * + * * + *****************************************************************************/ + +/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, + * pte_t *ptep, unsigned long trap, int local) + */ + +/* + * For now, we do NOT implement Admixed pages + */ +_GLOBAL(__hash_page_4K) + mflr r0 + std r0,16(r1) + stdu r1,-STACKFRAMESIZE(r1) + /* Save all params that we need after a function call */ + std r6,STK_PARM(r6)(r1) + std r8,STK_PARM(r8)(r1) + + /* Add _PAGE_PRESENT to access */ + ori r4,r4,_PAGE_PRESENT + + /* Save non-volatile registers. + * r31 will hold "old PTE" + * r30 is "new PTE" + * r29 is "va" + * r28 is a hash value + * r27 is hashtab mask (maybe dynamic patched instead ?) + * r26 is the hidx mask + * r25 is the index in combo page + */ + std r25,STK_REG(r25)(r1) + std r26,STK_REG(r26)(r1) + std r27,STK_REG(r27)(r1) + std r28,STK_REG(r28)(r1) + std r29,STK_REG(r29)(r1) + std r30,STK_REG(r30)(r1) + std r31,STK_REG(r31)(r1) + + /* Step 1: + * + * Check permissions, atomically mark the linux PTE busy + * and hashed. + */ +1: + ldarx r31,0,r6 + /* Check access rights (access & ~(pte_val(*ptep))) */ + andc. r0,r4,r31 + bne- htab_wrong_access + /* Check if PTE is busy */ + andi. r0,r31,_PAGE_BUSY + /* If so, just bail out and refault if needed. Someone else + * is changing this PTE anyway and might hash it. + */ + bne- htab_bail_ok + /* Prepare new PTE value (turn access RW into DIRTY, then + * add BUSY and ACCESSED) + */ + rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ + or r30,r30,r31 + ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + /* Write the linux PTE atomically (setting busy) */ + stdcx. r30,0,r6 + bne- 1b + isync + + /* Step 2: + * + * Insert/Update the HPTE in the hash table. At this point, + * r4 (access) is re-useable, we use it for the new HPTE flags + */ + + /* Load the hidx index */ + rldicl r25,r3,64-12,60 + + /* Calc va and put it in r29 */ + rldicr r29,r5,28,63-28 /* r29 = (vsid << 28) */ + rldicl r3,r3,0,36 /* r3 = (ea & 0x0fffffff) */ + or r29,r3,r29 /* r29 = va + + /* Calculate hash value for primary slot and store it in r28 */ + rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */ + rldicl r0,r3,64-12,48 /* (ea >> 12) & 0xffff */ + xor r28,r5,r0 + + /* Convert linux PTE bits into HW equivalents */ + andi. r3,r30,0x1fe /* Get basic set of flags */ + xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */ + rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ + rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ + and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ + andc r0,r30,r0 /* r0 = pte & ~r0 */ + rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ + + /* We eventually do the icache sync here (maybe inline that + * code rather than call a C function...) + */ +BEGIN_FTR_SECTION + mr r4,r30 + mr r5,r7 + bl .hash_page_do_lazy_icache +END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) + + /* At this point, r3 contains new PP bits, save them in + * place of "access" in the param area (sic) + */ + std r3,STK_PARM(r4)(r1) + + /* Get htab_hash_mask */ + ld r4,htab_hash_mask@got(2) + ld r27,0(r4) /* htab_hash_mask -> r27 */ + + /* Check if we may already be in the hashtable, in this case, we + * go to out-of-line code to try to modify the HPTE. We look for + * the bit at (1 >> (index + 32)) + */ + andi. r0,r31,_PAGE_HASHPTE + li r26,0 /* Default hidx */ + beq htab_insert_pte + ld r6,STK_PARM(r6)(r1) + ori r26,r6,0x8000 /* Load the hidx mask */ + ld r26,0(r26) + addi r5,r25,36 /* Check actual HPTE_SUB bit, this */ + rldcr. r0,r31,r5,0 /* must match pgtable.h definition */ + bne htab_modify_pte + +htab_insert_pte: + /* real page number in r5, PTE RPN value + index */ + rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT + sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + add r5,r5,r25 + sldi r5,r5,HW_PAGE_SHIFT + + /* Calculate primary group hash */ + and r0,r28,r27 + rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + + /* Call ppc_md.hpte_insert */ + ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ + mr r4,r29 /* Retreive va */ + li r7,0 /* !bolted, !secondary */ + li r8,MMU_PAGE_4K /* page size */ +_GLOBAL(htab_call_hpte_insert1) + bl . /* patched by htab_finish_init() */ + cmpdi 0,r3,0 + bge htab_pte_insert_ok /* Insertion successful */ + cmpdi 0,r3,-2 /* Critical failure */ + beq- htab_pte_insert_failure + + /* Now try secondary slot */ + + /* real page number in r5, PTE RPN value + index */ + rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT + sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT + add r5,r5,r25 + sldi r5,r5,HW_PAGE_SHIFT + + /* Calculate secondary group hash */ + andc r0,r27,r28 + rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ + + /* Call ppc_md.hpte_insert */ + ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ + mr r4,r29 /* Retreive va */ + li r7,HPTE_V_SECONDARY /* !bolted, secondary */ + li r8,MMU_PAGE_4K /* page size */ +_GLOBAL(htab_call_hpte_insert2) + bl . /* patched by htab_finish_init() */ + cmpdi 0,r3,0 + bge+ htab_pte_insert_ok /* Insertion successful */ + cmpdi 0,r3,-2 /* Critical failure */ + beq- htab_pte_insert_failure + + /* Both are full, we need to evict something */ + mftb r0 + /* Pick a random group based on TB */ + andi. r0,r0,1 + mr r5,r28 + bne 2f + not r5,r5 +2: and r0,r5,r27 + rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + /* Call ppc_md.hpte_remove */ +_GLOBAL(htab_call_hpte_remove) + bl . /* patched by htab_finish_init() */ + + /* Try all again */ + b htab_insert_pte + +htab_bail_ok: + li r3,0 + b htab_bail + +htab_pte_insert_ok: + /* Insert slot number & secondary bit in PTE second half, + * clear _PAGE_BUSY and set approriate HPTE slot bit + */ + ld r6,STK_PARM(r6)(r1) + li r0,_PAGE_BUSY + andc r30,r30,r0 + /* HPTE SUB bit */ + li r0,1 + subfic r5,r25,27 /* Must match bit position in */ + sld r0,r0,r5 /* pgtable.h */ + or r30,r30,r0 + /* hindx */ + sldi r5,r25,2 + sld r3,r3,r5 + li r4,0xf + sld r4,r4,r5 + andc r26,r26,r4 + or r26,r26,r3 + ori r5,r6,0x8000 + std r26,0(r5) + lwsync + std r30,0(r6) + li r3, 0 +htab_bail: + ld r25,STK_REG(r25)(r1) + ld r26,STK_REG(r26)(r1) + ld r27,STK_REG(r27)(r1) + ld r28,STK_REG(r28)(r1) + ld r29,STK_REG(r29)(r1) + ld r30,STK_REG(r30)(r1) + ld r31,STK_REG(r31)(r1) + addi r1,r1,STACKFRAMESIZE + ld r0,16(r1) + mtlr r0 + blr + +htab_modify_pte: + /* Keep PP bits in r4 and slot idx from the PTE around in r3 */ + mr r4,r3 + sldi r5,r25,2 + srd r3,r26,r5 + + /* Secondary group ? if yes, get a inverted hash value */ + mr r5,r28 + andi. r0,r3,0x8 /* page secondary ? */ + beq 1f + not r5,r5 +1: andi. r3,r3,0x7 /* extract idx alone */ + + /* Calculate proper slot value for ppc_md.hpte_updatepp */ + and r0,r5,r27 + rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + add r3,r0,r3 /* add slot idx */ + + /* Call ppc_md.hpte_updatepp */ + mr r5,r29 /* va */ + li r6,MMU_PAGE_4K /* page size */ + ld r7,STK_PARM(r8)(r1) /* get "local" param */ +_GLOBAL(htab_call_hpte_updatepp) + bl . /* patched by htab_finish_init() */ + + /* if we failed because typically the HPTE wasn't really here + * we try an insertion. + */ + cmpdi 0,r3,-1 + beq- htab_insert_pte + + /* Clear the BUSY bit and Write out the PTE */ + li r0,_PAGE_BUSY + andc r30,r30,r0 + ld r6,STK_PARM(r6)(r1) + std r30,0(r6) + li r3,0 + b htab_bail + +htab_wrong_access: + /* Bail out clearing reservation */ + stdcx. r31,0,r6 + li r3,1 + b htab_bail + +htab_pte_insert_failure: + /* Bail out restoring old PTE */ + ld r6,STK_PARM(r6)(r1) + std r31,0(r6) + li r3,-1 + b htab_bail + + +/***************************************************************************** + * * + * 64K SW & 64K HW in a 64K segment pages implementation * + * * + *****************************************************************************/ + +_GLOBAL(__hash_page_64K) + mflr r0 + std r0,16(r1) + stdu r1,-STACKFRAMESIZE(r1) + /* Save all params that we need after a function call */ + std r6,STK_PARM(r6)(r1) + std r8,STK_PARM(r8)(r1) + + /* Add _PAGE_PRESENT to access */ + ori r4,r4,_PAGE_PRESENT + + /* Save non-volatile registers. + * r31 will hold "old PTE" + * r30 is "new PTE" + * r29 is "va" + * r28 is a hash value + * r27 is hashtab mask (maybe dynamic patched instead ?) + */ + std r27,STK_REG(r27)(r1) + std r28,STK_REG(r28)(r1) + std r29,STK_REG(r29)(r1) + std r30,STK_REG(r30)(r1) + std r31,STK_REG(r31)(r1) + + /* Step 1: + * + * Check permissions, atomically mark the linux PTE busy + * and hashed. + */ +1: + ldarx r31,0,r6 + /* Check access rights (access & ~(pte_val(*ptep))) */ + andc. r0,r4,r31 + bne- ht64_wrong_access + /* Check if PTE is busy */ + andi. r0,r31,_PAGE_BUSY + /* If so, just bail out and refault if needed. Someone else + * is changing this PTE anyway and might hash it. + */ + bne- ht64_bail_ok + /* Prepare new PTE value (turn access RW into DIRTY, then + * add BUSY,HASHPTE and ACCESSED) + */ + rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ + or r30,r30,r31 + ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + /* Write the linux PTE atomically (setting busy) */ + stdcx. r30,0,r6 + bne- 1b + isync + + /* Step 2: + * + * Insert/Update the HPTE in the hash table. At this point, + * r4 (access) is re-useable, we use it for the new HPTE flags + */ + + /* Calc va and put it in r29 */ + rldicr r29,r5,28,63-28 + rldicl r3,r3,0,36 + or r29,r3,r29 + + /* Calculate hash value for primary slot and store it in r28 */ + rldicl r5,r5,0,25 /* vsid & 0x0000007fffffffff */ + rldicl r0,r3,64-16,52 /* (ea >> 16) & 0xfff */ + xor r28,r5,r0 + + /* Convert linux PTE bits into HW equivalents */ + andi. r3,r30,0x1fe /* Get basic set of flags */ + xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */ + rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */ + rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */ + and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/ + andc r0,r30,r0 /* r0 = pte & ~r0 */ + rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */ + + /* We eventually do the icache sync here (maybe inline that + * code rather than call a C function...) + */ +BEGIN_FTR_SECTION + mr r4,r30 + mr r5,r7 + bl .hash_page_do_lazy_icache +END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) + + /* At this point, r3 contains new PP bits, save them in + * place of "access" in the param area (sic) + */ + std r3,STK_PARM(r4)(r1) + + /* Get htab_hash_mask */ + ld r4,htab_hash_mask@got(2) + ld r27,0(r4) /* htab_hash_mask -> r27 */ + + /* Check if we may already be in the hashtable, in this case, we + * go to out-of-line code to try to modify the HPTE + */ + andi. r0,r31,_PAGE_HASHPTE + bne ht64_modify_pte + +ht64_insert_pte: + /* Clear hpte bits in new pte (we also clear BUSY btw) and + * add _PAGE_HASHPTE + */ + lis r0,_PAGE_HPTEFLAGS@h + ori r0,r0,_PAGE_HPTEFLAGS@l + andc r30,r30,r0 + ori r30,r30,_PAGE_HASHPTE + + /* Phyical address in r5 */ + rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT + sldi r5,r5,PAGE_SHIFT + + /* Calculate primary group hash */ + and r0,r28,r27 + rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + + /* Call ppc_md.hpte_insert */ + ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ + mr r4,r29 /* Retreive va */ + li r7,0 /* !bolted, !secondary */ + li r8,MMU_PAGE_64K +_GLOBAL(ht64_call_hpte_insert1) + bl . /* patched by htab_finish_init() */ + cmpdi 0,r3,0 + bge ht64_pte_insert_ok /* Insertion successful */ + cmpdi 0,r3,-2 /* Critical failure */ + beq- ht64_pte_insert_failure + + /* Now try secondary slot */ + + /* Phyical address in r5 */ + rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT + sldi r5,r5,PAGE_SHIFT + + /* Calculate secondary group hash */ + andc r0,r27,r28 + rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */ + + /* Call ppc_md.hpte_insert */ + ld r6,STK_PARM(r4)(r1) /* Retreive new pp bits */ + mr r4,r29 /* Retreive va */ + li r7,HPTE_V_SECONDARY /* !bolted, secondary */ + li r8,MMU_PAGE_64K +_GLOBAL(ht64_call_hpte_insert2) + bl . /* patched by htab_finish_init() */ + cmpdi 0,r3,0 + bge+ ht64_pte_insert_ok /* Insertion successful */ + cmpdi 0,r3,-2 /* Critical failure */ + beq- ht64_pte_insert_failure + + /* Both are full, we need to evict something */ + mftb r0 + /* Pick a random group based on TB */ + andi. r0,r0,1 + mr r5,r28 + bne 2f + not r5,r5 +2: and r0,r5,r27 + rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + /* Call ppc_md.hpte_remove */ +_GLOBAL(ht64_call_hpte_remove) + bl . /* patched by htab_finish_init() */ + + /* Try all again */ + b ht64_insert_pte + +ht64_bail_ok: + li r3,0 + b ht64_bail + +ht64_pte_insert_ok: + /* Insert slot number & secondary bit in PTE */ + rldimi r30,r3,12,63-15 + + /* Write out the PTE with a normal write + * (maybe add eieio may be good still ?) + */ +ht64_write_out_pte: + ld r6,STK_PARM(r6)(r1) + std r30,0(r6) + li r3, 0 +ht64_bail: + ld r27,STK_REG(r27)(r1) + ld r28,STK_REG(r28)(r1) + ld r29,STK_REG(r29)(r1) + ld r30,STK_REG(r30)(r1) + ld r31,STK_REG(r31)(r1) + addi r1,r1,STACKFRAMESIZE + ld r0,16(r1) + mtlr r0 + blr + +ht64_modify_pte: + /* Keep PP bits in r4 and slot idx from the PTE around in r3 */ + mr r4,r3 + rlwinm r3,r31,32-12,29,31 + + /* Secondary group ? if yes, get a inverted hash value */ + mr r5,r28 + andi. r0,r31,_PAGE_F_SECOND + beq 1f + not r5,r5 +1: + /* Calculate proper slot value for ppc_md.hpte_updatepp */ + and r0,r5,r27 + rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */ + add r3,r0,r3 /* add slot idx */ + + /* Call ppc_md.hpte_updatepp */ + mr r5,r29 /* va */ + li r6,MMU_PAGE_64K + ld r7,STK_PARM(r8)(r1) /* get "local" param */ +_GLOBAL(ht64_call_hpte_updatepp) + bl . /* patched by htab_finish_init() */ + + /* if we failed because typically the HPTE wasn't really here + * we try an insertion. + */ + cmpdi 0,r3,-1 + beq- ht64_insert_pte + + /* Clear the BUSY bit and Write out the PTE */ + li r0,_PAGE_BUSY + andc r30,r30,r0 + b ht64_write_out_pte + +ht64_wrong_access: + /* Bail out clearing reservation */ + stdcx. r31,0,r6 + li r3,1 + b ht64_bail + +ht64_pte_insert_failure: + /* Bail out restoring old PTE */ + ld r6,STK_PARM(r6)(r1) + std r31,0(r6) + li r3,-1 + b ht64_bail + + +#endif /* CONFIG_PPC_64K_PAGES */ + + +/***************************************************************************** + * * + * Huge pages implementation is in hugetlbpage.c * + * * + *****************************************************************************/ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 174d14576c28..d96bcfe4c6f6 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -9,6 +9,9 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + +#undef DEBUG_LOW + #include #include #include @@ -22,11 +25,84 @@ #include #include #include +#include + +#ifdef DEBUG_LOW +#define DBG_LOW(fmt...) udbg_printf(fmt) +#else +#define DBG_LOW(fmt...) +#endif #define HPTE_LOCK_BIT 3 static DEFINE_SPINLOCK(native_tlbie_lock); +static inline void __tlbie(unsigned long va, unsigned int psize) +{ + unsigned int penc; + + /* clear top 16 bits, non SLS segment */ + va &= ~(0xffffULL << 48); + + switch (psize) { + case MMU_PAGE_4K: + va &= ~0xffful; + asm volatile("tlbie %0,0" : : "r" (va) : "memory"); + break; + default: + penc = mmu_psize_defs[psize].penc; + va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); + va |= (0x7f >> (8 - penc)) << 12; + asm volatile("tlbie %0,1" : : "r" (va) : "memory"); + break; + } +} + +static inline void __tlbiel(unsigned long va, unsigned int psize) +{ + unsigned int penc; + + /* clear top 16 bits, non SLS segment */ + va &= ~(0xffffULL << 48); + + switch (psize) { + case MMU_PAGE_4K: + va &= ~0xffful; + asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" + : : "r"(va) : "memory"); + break; + default: + penc = mmu_psize_defs[psize].penc; + va &= ~((1ul << mmu_psize_defs[psize].shift) - 1); + va |= (0x7f >> (8 - penc)) << 12; + asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)" + : : "r"(va) : "memory"); + break; + } + +} + +static inline void tlbie(unsigned long va, int psize, int local) +{ + unsigned int use_local = local && cpu_has_feature(CPU_FTR_TLBIEL); + int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); + + if (use_local) + use_local = mmu_psize_defs[psize].tlbiel; + if (lock_tlbie && !use_local) + spin_lock(&native_tlbie_lock); + asm volatile("ptesync": : :"memory"); + if (use_local) { + __tlbiel(va, psize); + asm volatile("ptesync": : :"memory"); + } else { + __tlbie(va, psize); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); + } + if (lock_tlbie && !use_local) + spin_unlock(&native_tlbie_lock); +} + static inline void native_lock_hpte(hpte_t *hptep) { unsigned long *word = &hptep->v; @@ -48,13 +124,19 @@ static inline void native_unlock_hpte(hpte_t *hptep) } long native_hpte_insert(unsigned long hpte_group, unsigned long va, - unsigned long prpn, unsigned long vflags, - unsigned long rflags) + unsigned long pa, unsigned long rflags, + unsigned long vflags, int psize) { hpte_t *hptep = htab_address + hpte_group; unsigned long hpte_v, hpte_r; int i; + if (!(vflags & HPTE_V_BOLTED)) { + DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx," + " rflags=%lx, vflags=%lx, psize=%d)\n", + hpte_group, va, pa, rflags, vflags, psize); + } + for (i = 0; i < HPTES_PER_GROUP; i++) { if (! (hptep->v & HPTE_V_VALID)) { /* retry with lock held */ @@ -70,10 +152,13 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va, if (i == HPTES_PER_GROUP) return -1; - hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; - if (vflags & HPTE_V_LARGE) - va &= ~(1UL << HPTE_V_AVPN_SHIFT); - hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; + hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID; + hpte_r = hpte_encode_r(pa, psize) | rflags; + + if (!(vflags & HPTE_V_BOLTED)) { + DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", + i, hpte_v, hpte_r); + } hptep->r = hpte_r; /* Guarantee the second dword is visible before the valid bit */ @@ -96,6 +181,8 @@ static long native_hpte_remove(unsigned long hpte_group) int slot_offset; unsigned long hpte_v; + DBG_LOW(" remove(group=%lx)\n", hpte_group); + /* pick a random entry to start at */ slot_offset = mftb() & 0x7; @@ -126,34 +213,51 @@ static long native_hpte_remove(unsigned long hpte_group) return i; } -static inline void set_pp_bit(unsigned long pp, hpte_t *addr) +static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int psize, int local) { - unsigned long old; - unsigned long *p = &addr->r; + hpte_t *hptep = htab_address + slot; + unsigned long hpte_v, want_v; + int ret = 0; - __asm__ __volatile__( - "1: ldarx %0,0,%3\n\ - rldimi %0,%2,0,61\n\ - stdcx. %0,0,%3\n\ - bne 1b" - : "=&r" (old), "=m" (*p) - : "r" (pp), "r" (p), "m" (*p) - : "cc"); + want_v = hpte_encode_v(va, psize); + + DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)", + va, want_v & HPTE_V_AVPN, slot, newpp); + + native_lock_hpte(hptep); + + hpte_v = hptep->v; + + /* Even if we miss, we need to invalidate the TLB */ + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { + DBG_LOW(" -> miss\n"); + native_unlock_hpte(hptep); + ret = -1; + } else { + DBG_LOW(" -> hit\n"); + /* Update the HPTE */ + hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | + (newpp & (HPTE_R_PP | HPTE_R_N)); + native_unlock_hpte(hptep); + } + + /* Ensure it is out of the tlb too. */ + tlbie(va, psize, local); + + return ret; } -/* - * Only works on small pages. Yes its ugly to have to check each slot in - * the group but we only use this during bootup. - */ -static long native_hpte_find(unsigned long vpn) +static long native_hpte_find(unsigned long va, int psize) { hpte_t *hptep; unsigned long hash; unsigned long i, j; long slot; - unsigned long hpte_v; + unsigned long want_v, hpte_v; - hash = hpt_hash(vpn, 0); + hash = hpt_hash(va, mmu_psize_defs[psize].shift); + want_v = hpte_encode_v(va, psize); for (j = 0; j < 2; j++) { slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; @@ -161,7 +265,7 @@ static long native_hpte_find(unsigned long vpn) hptep = htab_address + slot; hpte_v = hptep->v; - if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID) && ( !!(hpte_v & HPTE_V_SECONDARY) == j)) { /* HPTE matches */ @@ -177,118 +281,90 @@ static long native_hpte_find(unsigned long vpn) return -1; } -static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long va, int large, int local) -{ - hpte_t *hptep = htab_address + slot; - unsigned long hpte_v; - unsigned long avpn = va >> 23; - int ret = 0; - - if (large) - avpn &= ~1; - - native_lock_hpte(hptep); - - hpte_v = hptep->v; - - /* Even if we miss, we need to invalidate the TLB */ - if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) - || !(hpte_v & HPTE_V_VALID)) { - native_unlock_hpte(hptep); - ret = -1; - } else { - set_pp_bit(newpp, hptep); - native_unlock_hpte(hptep); - } - - /* Ensure it is out of the tlb too */ - if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { - tlbiel(va); - } else { - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); - - if (lock_tlbie) - spin_lock(&native_tlbie_lock); - tlbie(va, large); - if (lock_tlbie) - spin_unlock(&native_tlbie_lock); - } - - return ret; -} - /* * Update the page protection bits. Intended to be used to create * guard pages for kernel data structures on pages which are bolted * in the HPT. Assumes pages being operated on will not be stolen. - * Does not work on large pages. * * No need to lock here because we should be the only user. */ -static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) +static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, + int psize) { - unsigned long vsid, va, vpn, flags = 0; + unsigned long vsid, va; long slot; hpte_t *hptep; - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); vsid = get_kernel_vsid(ea); va = (vsid << 28) | (ea & 0x0fffffff); - vpn = va >> PAGE_SHIFT; - slot = native_hpte_find(vpn); + slot = native_hpte_find(va, psize); if (slot == -1) panic("could not find page to bolt\n"); hptep = htab_address + slot; - set_pp_bit(newpp, hptep); + /* Update the HPTE */ + hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | + (newpp & (HPTE_R_PP | HPTE_R_N)); - /* Ensure it is out of the tlb too */ - if (lock_tlbie) - spin_lock_irqsave(&native_tlbie_lock, flags); - tlbie(va, 0); - if (lock_tlbie) - spin_unlock_irqrestore(&native_tlbie_lock, flags); + /* Ensure it is out of the tlb too. */ + tlbie(va, psize, 0); } static void native_hpte_invalidate(unsigned long slot, unsigned long va, - int large, int local) + int psize, int local) { hpte_t *hptep = htab_address + slot; unsigned long hpte_v; - unsigned long avpn = va >> 23; + unsigned long want_v; unsigned long flags; - int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); - - if (large) - avpn &= ~1; local_irq_save(flags); - native_lock_hpte(hptep); + DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot); + + want_v = hpte_encode_v(va, psize); + native_lock_hpte(hptep); hpte_v = hptep->v; /* Even if we miss, we need to invalidate the TLB */ - if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) - || !(hpte_v & HPTE_V_VALID)) { + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) native_unlock_hpte(hptep); - } else { + else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; + + /* Invalidate the TLB */ + tlbie(va, psize, local); + + local_irq_restore(flags); +} + +/* + * XXX This need fixing based on page size. It's only used by + * native_hpte_clear() for now which needs fixing too so they + * make a good pair... + */ +static unsigned long slot2va(unsigned long hpte_v, unsigned long slot) +{ + unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v); + unsigned long va; + + va = avpn << 23; + + if (! (hpte_v & HPTE_V_LARGE)) { + unsigned long vpi, pteg; + + pteg = slot / HPTES_PER_GROUP; + if (hpte_v & HPTE_V_SECONDARY) + pteg = ~pteg; + + vpi = ((va >> 28) ^ pteg) & htab_hash_mask; + + va |= vpi << PAGE_SHIFT; } - /* Invalidate the tlb */ - if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { - tlbiel(va); - } else { - if (lock_tlbie) - spin_lock(&native_tlbie_lock); - tlbie(va, large); - if (lock_tlbie) - spin_unlock(&native_tlbie_lock); - } - local_irq_restore(flags); + return va; } /* @@ -298,6 +374,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long va, * * TODO: add batching support when enabled. remember, no dynamic memory here, * athough there is the control page available... + * + * XXX FIXME: 4k only for now ! */ static void native_hpte_clear(void) { @@ -327,7 +405,7 @@ static void native_hpte_clear(void) if (hpte_v & HPTE_V_VALID) { hptep->v = 0; - tlbie(slot2va(hpte_v, slot), hpte_v & HPTE_V_LARGE); + tlbie(slot2va(hpte_v, slot), MMU_PAGE_4K, 0); } } @@ -335,59 +413,59 @@ static void native_hpte_clear(void) local_irq_restore(flags); } +/* + * Batched hash table flush, we batch the tlbie's to avoid taking/releasing + * the lock all the time + */ static void native_flush_hash_range(unsigned long number, int local) { - unsigned long va, vpn, hash, secondary, slot, flags, avpn; - int i, j; + unsigned long va, hash, index, hidx, shift, slot; hpte_t *hptep; unsigned long hpte_v; + unsigned long want_v; + unsigned long flags; + real_pte_t pte; struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); - unsigned long large = batch->large; + unsigned long psize = batch->psize; + int i; local_irq_save(flags); - j = 0; for (i = 0; i < number; i++) { - va = batch->vaddr[j]; - if (large) - vpn = va >> HPAGE_SHIFT; - else - vpn = va >> PAGE_SHIFT; - hash = hpt_hash(vpn, large); - secondary = (pte_val(batch->pte[i]) & _PAGE_SECONDARY) >> 15; - if (secondary) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (pte_val(batch->pte[i]) & _PAGE_GROUP_IX) >> 12; + va = batch->vaddr[i]; + pte = batch->pte[i]; - hptep = htab_address + slot; - - avpn = va >> 23; - if (large) - avpn &= ~0x1UL; - - native_lock_hpte(hptep); - - hpte_v = hptep->v; - - /* Even if we miss, we need to invalidate the TLB */ - if ((HPTE_V_AVPN_VAL(hpte_v) != avpn) - || !(hpte_v & HPTE_V_VALID)) { - native_unlock_hpte(hptep); - } else { - /* Invalidate the hpte. NOTE: this also unlocks it */ - hptep->v = 0; - } - - j++; + pte_iterate_hashed_subpages(pte, psize, va, index, shift) { + hash = hpt_hash(va, shift); + hidx = __rpte_to_hidx(pte, index); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + hptep = htab_address + slot; + want_v = hpte_encode_v(va, psize); + native_lock_hpte(hptep); + hpte_v = hptep->v; + if (!HPTE_V_COMPARE(hpte_v, want_v) || + !(hpte_v & HPTE_V_VALID)) + native_unlock_hpte(hptep); + else + hptep->v = 0; + } pte_iterate_hashed_end(); } - if (cpu_has_feature(CPU_FTR_TLBIEL) && !large && local) { + if (cpu_has_feature(CPU_FTR_TLBIEL) && + mmu_psize_defs[psize].tlbiel && local) { asm volatile("ptesync":::"memory"); + for (i = 0; i < number; i++) { + va = batch->vaddr[i]; + pte = batch->pte[i]; - for (i = 0; i < j; i++) - __tlbiel(batch->vaddr[i]); - + pte_iterate_hashed_subpages(pte, psize, va, index, + shift) { + __tlbiel(va, psize); + } pte_iterate_hashed_end(); + } asm volatile("ptesync":::"memory"); } else { int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); @@ -396,10 +474,15 @@ static void native_flush_hash_range(unsigned long number, int local) spin_lock(&native_tlbie_lock); asm volatile("ptesync":::"memory"); + for (i = 0; i < number; i++) { + va = batch->vaddr[i]; + pte = batch->pte[i]; - for (i = 0; i < j; i++) - __tlbie(batch->vaddr[i], large); - + pte_iterate_hashed_subpages(pte, psize, va, index, + shift) { + __tlbie(va, psize); + } pte_iterate_hashed_end(); + } asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 6e9e05cce02c..b2f3dbca6952 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -19,6 +19,7 @@ */ #undef DEBUG +#undef DEBUG_LOW #include #include @@ -59,6 +60,15 @@ #define DBG(fmt...) #endif +#ifdef DEBUG_LOW +#define DBG_LOW(fmt...) udbg_printf(fmt) +#else +#define DBG_LOW(fmt...) +#endif + +#define KB (1024) +#define MB (1024*KB) + /* * Note: pte --> Linux PTE * HPTE --> PowerPC Hashed Page Table Entry @@ -77,91 +87,290 @@ extern unsigned long dart_tablebase; hpte_t *htab_address; unsigned long htab_hash_mask; - unsigned long _SDR1; +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +int mmu_linear_psize = MMU_PAGE_4K; +int mmu_virtual_psize = MMU_PAGE_4K; +#ifdef CONFIG_HUGETLB_PAGE +int mmu_huge_psize = MMU_PAGE_16M; +unsigned int HPAGE_SHIFT; +#endif -#define KB (1024) -#define MB (1024*KB) +/* There are definitions of page sizes arrays to be used when none + * is provided by the firmware. + */ -static inline void loop_forever(void) +/* Pre-POWER4 CPUs (4k pages only) + */ +struct mmu_psize_def mmu_psize_defaults_old[] = { + [MMU_PAGE_4K] = { + .shift = 12, + .sllp = 0, + .penc = 0, + .avpnm = 0, + .tlbiel = 0, + }, +}; + +/* POWER4, GPUL, POWER5 + * + * Support for 16Mb large pages + */ +struct mmu_psize_def mmu_psize_defaults_gp[] = { + [MMU_PAGE_4K] = { + .shift = 12, + .sllp = 0, + .penc = 0, + .avpnm = 0, + .tlbiel = 1, + }, + [MMU_PAGE_16M] = { + .shift = 24, + .sllp = SLB_VSID_L, + .penc = 0, + .avpnm = 0x1UL, + .tlbiel = 0, + }, +}; + + +int htab_bolt_mapping(unsigned long vstart, unsigned long vend, + unsigned long pstart, unsigned long mode, int psize) { - volatile unsigned long x = 1; - for(;x;x|=1) - ; -} - -static inline void create_pte_mapping(unsigned long start, unsigned long end, - unsigned long mode, int large) -{ - unsigned long addr; - unsigned int step; + unsigned long vaddr, paddr; + unsigned int step, shift; unsigned long tmp_mode; - unsigned long vflags; + int ret = 0; - if (large) { - step = 16*MB; - vflags = HPTE_V_BOLTED | HPTE_V_LARGE; - } else { - step = 4*KB; - vflags = HPTE_V_BOLTED; - } + shift = mmu_psize_defs[psize].shift; + step = 1 << shift; - for (addr = start; addr < end; addr += step) { + for (vaddr = vstart, paddr = pstart; vaddr < vend; + vaddr += step, paddr += step) { unsigned long vpn, hash, hpteg; - unsigned long vsid = get_kernel_vsid(addr); - unsigned long va = (vsid << 28) | (addr & 0xfffffff); - int ret = -1; - - if (large) - vpn = va >> HPAGE_SHIFT; - else - vpn = va >> PAGE_SHIFT; - + unsigned long vsid = get_kernel_vsid(vaddr); + unsigned long va = (vsid << 28) | (vaddr & 0x0fffffff); + vpn = va >> shift; tmp_mode = mode; /* Make non-kernel text non-executable */ - if (!in_kernel_text(addr)) - tmp_mode = mode | HW_NO_EXEC; - - hash = hpt_hash(vpn, large); + if (!in_kernel_text(vaddr)) + tmp_mode = mode | HPTE_R_N; + hash = hpt_hash(va, shift); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + /* The crap below can be cleaned once ppd_md.probe() can + * set up the hash callbacks, thus we can just used the + * normal insert callback here. + */ #ifdef CONFIG_PPC_ISERIES - if (systemcfg->platform & PLATFORM_ISERIES_LPAR) - ret = iSeries_hpte_bolt_or_insert(hpteg, va, - virt_to_abs(addr) >> PAGE_SHIFT, - vflags, tmp_mode); + if (systemcfg->platform == PLATFORM_ISERIES_LPAR) + ret = iSeries_hpte_insert(hpteg, va, + virt_to_abs(paddr), + tmp_mode, + HPTE_V_BOLTED, + psize); else #endif #ifdef CONFIG_PPC_PSERIES if (systemcfg->platform & PLATFORM_LPAR) ret = pSeries_lpar_hpte_insert(hpteg, va, - virt_to_abs(addr) >> PAGE_SHIFT, - vflags, tmp_mode); + virt_to_abs(paddr), + tmp_mode, + HPTE_V_BOLTED, + psize); else #endif #ifdef CONFIG_PPC_MULTIPLATFORM ret = native_hpte_insert(hpteg, va, - virt_to_abs(addr) >> PAGE_SHIFT, - vflags, tmp_mode); + virt_to_abs(paddr), + tmp_mode, HPTE_V_BOLTED, + psize); #endif - - if (ret == -1) { - ppc64_terminate_msg(0x20, "create_pte_mapping"); - loop_forever(); - } + if (ret < 0) + break; } + return ret < 0 ? ret : 0; } -static unsigned long get_hashtable_size(void) +static int __init htab_dt_scan_page_sizes(unsigned long node, + const char *uname, int depth, + void *data) +{ + char *type = of_get_flat_dt_prop(node, "device_type", NULL); + u32 *prop; + unsigned long size = 0; + + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; + + prop = (u32 *)of_get_flat_dt_prop(node, + "ibm,segment-page-sizes", &size); + if (prop != NULL) { + DBG("Page sizes from device-tree:\n"); + size /= 4; + cur_cpu_spec->cpu_features &= ~(CPU_FTR_16M_PAGE); + while(size > 0) { + unsigned int shift = prop[0]; + unsigned int slbenc = prop[1]; + unsigned int lpnum = prop[2]; + unsigned int lpenc = 0; + struct mmu_psize_def *def; + int idx = -1; + + size -= 3; prop += 3; + while(size > 0 && lpnum) { + if (prop[0] == shift) + lpenc = prop[1]; + prop += 2; size -= 2; + lpnum--; + } + switch(shift) { + case 0xc: + idx = MMU_PAGE_4K; + break; + case 0x10: + idx = MMU_PAGE_64K; + break; + case 0x14: + idx = MMU_PAGE_1M; + break; + case 0x18: + idx = MMU_PAGE_16M; + cur_cpu_spec->cpu_features |= CPU_FTR_16M_PAGE; + break; + case 0x22: + idx = MMU_PAGE_16G; + break; + } + if (idx < 0) + continue; + def = &mmu_psize_defs[idx]; + def->shift = shift; + if (shift <= 23) + def->avpnm = 0; + else + def->avpnm = (1 << (shift - 23)) - 1; + def->sllp = slbenc; + def->penc = lpenc; + /* We don't know for sure what's up with tlbiel, so + * for now we only set it for 4K and 64K pages + */ + if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K) + def->tlbiel = 1; + else + def->tlbiel = 0; + + DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, " + "tlbiel=%d, penc=%d\n", + idx, shift, def->sllp, def->avpnm, def->tlbiel, + def->penc); + } + return 1; + } + return 0; +} + + +static void __init htab_init_page_sizes(void) +{ + int rc; + + /* Default to 4K pages only */ + memcpy(mmu_psize_defs, mmu_psize_defaults_old, + sizeof(mmu_psize_defaults_old)); + + /* + * Try to find the available page sizes in the device-tree + */ + rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL); + if (rc != 0) /* Found */ + goto found; + + /* + * Not in the device-tree, let's fallback on known size + * list for 16M capable GP & GR + */ + if ((systemcfg->platform != PLATFORM_ISERIES_LPAR) && + cpu_has_feature(CPU_FTR_16M_PAGE)) + memcpy(mmu_psize_defs, mmu_psize_defaults_gp, + sizeof(mmu_psize_defaults_gp)); + found: + /* + * Pick a size for the linear mapping. Currently, we only support + * 16M, 1M and 4K which is the default + */ + if (mmu_psize_defs[MMU_PAGE_16M].shift) + mmu_linear_psize = MMU_PAGE_16M; + else if (mmu_psize_defs[MMU_PAGE_1M].shift) + mmu_linear_psize = MMU_PAGE_1M; + + /* + * Pick a size for the ordinary pages. Default is 4K, we support + * 64K if cache inhibited large pages are supported by the + * processor + */ +#ifdef CONFIG_PPC_64K_PAGES + if (mmu_psize_defs[MMU_PAGE_64K].shift && + cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) + mmu_virtual_psize = MMU_PAGE_64K; +#endif + + printk(KERN_INFO "Page orders: linear mapping = %d, others = %d\n", + mmu_psize_defs[mmu_linear_psize].shift, + mmu_psize_defs[mmu_virtual_psize].shift); + +#ifdef CONFIG_HUGETLB_PAGE + /* Init large page size. Currently, we pick 16M or 1M depending + * on what is available + */ + if (mmu_psize_defs[MMU_PAGE_16M].shift) + mmu_huge_psize = MMU_PAGE_16M; + else if (mmu_psize_defs[MMU_PAGE_1M].shift) + mmu_huge_psize = MMU_PAGE_1M; + + /* Calculate HPAGE_SHIFT and sanity check it */ + if (mmu_psize_defs[mmu_huge_psize].shift > 16 && + mmu_psize_defs[mmu_huge_psize].shift < 28) + HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift; + else + HPAGE_SHIFT = 0; /* No huge pages dude ! */ +#endif /* CONFIG_HUGETLB_PAGE */ +} + +static int __init htab_dt_scan_pftsize(unsigned long node, + const char *uname, int depth, + void *data) +{ + char *type = of_get_flat_dt_prop(node, "device_type", NULL); + u32 *prop; + + /* We are scanning "cpu" nodes only */ + if (type == NULL || strcmp(type, "cpu") != 0) + return 0; + + prop = (u32 *)of_get_flat_dt_prop(node, "ibm,pft-size", NULL); + if (prop != NULL) { + /* pft_size[0] is the NUMA CEC cookie */ + ppc64_pft_size = prop[1]; + return 1; + } + return 0; +} + +static unsigned long __init htab_get_table_size(void) { unsigned long rnd_mem_size, pteg_count; - /* If hash size wasn't obtained in prom.c, we calculate it now based on - * the total RAM size + /* If hash size isn't already provided by the platform, we try to + * retreive it from the device-tree. If it's not there neither, we + * calculate it now based on the total RAM size */ + if (ppc64_pft_size == 0) + of_scan_flat_dt(htab_dt_scan_pftsize, NULL); if (ppc64_pft_size) return 1UL << ppc64_pft_size; @@ -181,17 +390,21 @@ void __init htab_initialize(void) unsigned long table, htab_size_bytes; unsigned long pteg_count; unsigned long mode_rw; - int i, use_largepages = 0; unsigned long base = 0, size = 0; + int i; + extern unsigned long tce_alloc_start, tce_alloc_end; DBG(" -> htab_initialize()\n"); + /* Initialize page sizes */ + htab_init_page_sizes(); + /* * Calculate the required size of the htab. We want the number of * PTEGs to equal one half the number of real pages. */ - htab_size_bytes = get_hashtable_size(); + htab_size_bytes = htab_get_table_size(); pteg_count = htab_size_bytes >> 7; /* For debug, make the HTAB 1/8 as big as it normally would be. */ @@ -211,14 +424,11 @@ void __init htab_initialize(void) * the absolute address space. */ table = lmb_alloc(htab_size_bytes, htab_size_bytes); + BUG_ON(table == 0); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); - if ( !table ) { - ppc64_terminate_msg(0x20, "hpt space"); - loop_forever(); - } htab_address = abs_to_virt(table); /* htab absolute addr + encoded htabsize */ @@ -234,8 +444,6 @@ void __init htab_initialize(void) * _NOT_ map it to avoid cache paradoxes as it's remapped non * cacheable later on */ - if (cpu_has_feature(CPU_FTR_16M_PAGE)) - use_largepages = 1; /* create bolted the linear mapping in the hash table */ for (i=0; i < lmb.memory.cnt; i++) { @@ -246,27 +454,32 @@ void __init htab_initialize(void) #ifdef CONFIG_U3_DART /* Do not map the DART space. Fortunately, it will be aligned - * in such a way that it will not cross two lmb regions and will - * fit within a single 16Mb page. - * The DART space is assumed to be a full 16Mb region even if we - * only use 2Mb of that space. We will use more of it later for - * AGP GART. We have to use a full 16Mb large page. + * in such a way that it will not cross two lmb regions and + * will fit within a single 16Mb page. + * The DART space is assumed to be a full 16Mb region even if + * we only use 2Mb of that space. We will use more of it later + * for AGP GART. We have to use a full 16Mb large page. */ DBG("DART base: %lx\n", dart_tablebase); if (dart_tablebase != 0 && dart_tablebase >= base && dart_tablebase < (base + size)) { if (base != dart_tablebase) - create_pte_mapping(base, dart_tablebase, mode_rw, - use_largepages); + BUG_ON(htab_bolt_mapping(base, dart_tablebase, + base, mode_rw, + mmu_linear_psize)); if ((base + size) > (dart_tablebase + 16*MB)) - create_pte_mapping(dart_tablebase + 16*MB, base + size, - mode_rw, use_largepages); + BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, + base + size, + dart_tablebase+16*MB, + mode_rw, + mmu_linear_psize)); continue; } #endif /* CONFIG_U3_DART */ - create_pte_mapping(base, base + size, mode_rw, use_largepages); - } + BUG_ON(htab_bolt_mapping(base, base + size, base, + mode_rw, mmu_linear_psize)); + } /* * If we have a memory_limit and we've allocated TCEs then we need to @@ -282,8 +495,9 @@ void __init htab_initialize(void) if (base + size >= tce_alloc_start) tce_alloc_start = base + size + 1; - create_pte_mapping(tce_alloc_start, tce_alloc_end, - mode_rw, use_largepages); + BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end, + tce_alloc_start, mode_rw, + mmu_linear_psize)); } DBG(" <- htab_initialize()\n"); @@ -298,9 +512,6 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) { struct page *page; - if (!pfn_valid(pte_pfn(pte))) - return pp; - page = pte_page(pte); /* page is dirty */ @@ -309,7 +520,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) __flush_dcache_icache(page_address(page)); set_bit(PG_arch_1, &page->flags); } else - pp |= HW_NO_EXEC; + pp |= HPTE_R_N; } return pp; } @@ -325,94 +536,169 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) unsigned long vsid; struct mm_struct *mm; pte_t *ptep; - int ret; - int user_region = 0; - int local = 0; cpumask_t tmp; + int rc, user_region = 0, local = 0; - if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) - return 1; + DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", + ea, access, trap); + if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) { + DBG_LOW(" out of pgtable range !\n"); + return 1; + } + + /* Get region & vsid */ switch (REGION_ID(ea)) { case USER_REGION_ID: user_region = 1; mm = current->mm; - if (! mm) + if (! mm) { + DBG_LOW(" user region with no mm !\n"); return 1; - + } vsid = get_vsid(mm->context.id, ea); break; case VMALLOC_REGION_ID: mm = &init_mm; vsid = get_kernel_vsid(ea); break; -#if 0 - case KERNEL_REGION_ID: - /* - * Should never get here - entire 0xC0... region is bolted. - * Send the problem up to do_page_fault - */ -#endif default: /* Not a valid range * Send the problem up to do_page_fault */ return 1; - break; } + DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); + /* Get pgdir */ pgdir = mm->pgd; - if (pgdir == NULL) return 1; + /* Check CPU locality */ tmp = cpumask_of_cpu(smp_processor_id()); if (user_region && cpus_equal(mm->cpu_vm_mask, tmp)) local = 1; - /* Is this a huge page ? */ - if (unlikely(in_hugepage_area(mm->context, ea))) - ret = hash_huge_page(mm, access, ea, vsid, local); - else { - ptep = find_linux_pte(pgdir, ea); - if (ptep == NULL) - return 1; - ret = __hash_page(ea, access, vsid, ptep, trap, local); + /* Handle hugepage regions */ + if (unlikely(in_hugepage_area(mm->context, ea))) { + DBG_LOW(" -> huge page !\n"); + return hash_huge_page(mm, access, ea, vsid, local); } - return ret; + /* Get PTE and page size from page tables */ + ptep = find_linux_pte(pgdir, ea); + if (ptep == NULL || !pte_present(*ptep)) { + DBG_LOW(" no PTE !\n"); + return 1; + } + +#ifndef CONFIG_PPC_64K_PAGES + DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); +#else + DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), + pte_val(*(ptep + PTRS_PER_PTE))); +#endif + /* Pre-check access permissions (will be re-checked atomically + * in __hash_page_XX but this pre-check is a fast path + */ + if (access & ~pte_val(*ptep)) { + DBG_LOW(" no access !\n"); + return 1; + } + + /* Do actual hashing */ +#ifndef CONFIG_PPC_64K_PAGES + rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); +#else + if (mmu_virtual_psize == MMU_PAGE_64K) + rc = __hash_page_64K(ea, access, vsid, ptep, trap, local); + else + rc = __hash_page_4K(ea, access, vsid, ptep, trap, local); +#endif /* CONFIG_PPC_64K_PAGES */ + +#ifndef CONFIG_PPC_64K_PAGES + DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); +#else + DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep), + pte_val(*(ptep + PTRS_PER_PTE))); +#endif + DBG_LOW(" -> rc=%d\n", rc); + return rc; } -void flush_hash_page(unsigned long va, pte_t pte, int local) +void hash_preload(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap) { - unsigned long vpn, hash, secondary, slot; - unsigned long huge = pte_huge(pte); + unsigned long vsid; + void *pgdir; + pte_t *ptep; + cpumask_t mask; + unsigned long flags; + int local = 0; - if (huge) - vpn = va >> HPAGE_SHIFT; + /* We don't want huge pages prefaulted for now + */ + if (unlikely(in_hugepage_area(mm->context, ea))) + return; + + DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," + " trap=%lx\n", mm, mm->pgd, ea, access, trap); + + /* Get PTE, VSID, access mask */ + pgdir = mm->pgd; + if (pgdir == NULL) + return; + ptep = find_linux_pte(pgdir, ea); + if (!ptep) + return; + vsid = get_vsid(mm->context.id, ea); + + /* Hash it in */ + local_irq_save(flags); + mask = cpumask_of_cpu(smp_processor_id()); + if (cpus_equal(mm->cpu_vm_mask, mask)) + local = 1; +#ifndef CONFIG_PPC_64K_PAGES + __hash_page_4K(ea, access, vsid, ptep, trap, local); +#else + if (mmu_virtual_psize == MMU_PAGE_64K) + __hash_page_64K(ea, access, vsid, ptep, trap, local); else - vpn = va >> PAGE_SHIFT; - hash = hpt_hash(vpn, huge); - secondary = (pte_val(pte) & _PAGE_SECONDARY) >> 15; - if (secondary) - hash = ~hash; - slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (pte_val(pte) & _PAGE_GROUP_IX) >> 12; + __hash_page_4K(ea, access, vsid, ptep, trap, local); +#endif /* CONFIG_PPC_64K_PAGES */ + local_irq_restore(flags); +} - ppc_md.hpte_invalidate(slot, va, huge, local); +void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int local) +{ + unsigned long hash, index, shift, hidx, slot; + + DBG_LOW("flush_hash_page(va=%016x)\n", va); + pte_iterate_hashed_subpages(pte, psize, va, index, shift) { + hash = hpt_hash(va, shift); + hidx = __rpte_to_hidx(pte, index); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx); + ppc_md.hpte_invalidate(slot, va, psize, local); + } pte_iterate_hashed_end(); } void flush_hash_range(unsigned long number, int local) { - if (ppc_md.flush_hash_range) { + if (ppc_md.flush_hash_range) ppc_md.flush_hash_range(number, local); - } else { + else { int i; struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); for (i = 0; i < number; i++) - flush_hash_page(batch->vaddr[i], batch->pte[i], local); + flush_hash_page(batch->vaddr[i], batch->pte[i], + batch->psize, local); } } @@ -452,6 +738,18 @@ void __init htab_finish_init(void) extern unsigned int *htab_call_hpte_remove; extern unsigned int *htab_call_hpte_updatepp; +#ifdef CONFIG_PPC_64K_PAGES + extern unsigned int *ht64_call_hpte_insert1; + extern unsigned int *ht64_call_hpte_insert2; + extern unsigned int *ht64_call_hpte_remove; + extern unsigned int *ht64_call_hpte_updatepp; + + make_bl(ht64_call_hpte_insert1, ppc_md.hpte_insert); + make_bl(ht64_call_hpte_insert2, ppc_md.hpte_insert); + make_bl(ht64_call_hpte_remove, ppc_md.hpte_remove); + make_bl(ht64_call_hpte_updatepp, ppc_md.hpte_updatepp); +#endif /* CONFIG_PPC_64K_PAGES */ + make_bl(htab_call_hpte_insert1, ppc_md.hpte_insert); make_bl(htab_call_hpte_insert2, ppc_md.hpte_insert); make_bl(htab_call_hpte_remove, ppc_md.hpte_remove); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0ea0994ed974..0073a04047e4 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pu = pud_offset(pg, addr); if (!pud_none(*pu)) { pm = pmd_offset(pu, addr); +#ifdef CONFIG_PPC_64K_PAGES + /* Currently, we use the normal PTE offset within full + * size PTE pages, thus our huge PTEs are scattered in + * the PTE page and we do waste some. We may change + * that in the future, but the current mecanism keeps + * things much simpler + */ + if (!pmd_none(*pm)) { + /* Note: pte_offset_* are all equivalent on + * ppc64 as we don't have HIGHMEM + */ + pt = pte_offset_kernel(pm, addr); + return pt; + } +#else /* CONFIG_PPC_64K_PAGES */ + /* On 4k pages, we put huge PTEs in the PMD page */ pt = (pte_t *)pm; - BUG_ON(!pmd_none(*pm) - && !(pte_present(*pt) && pte_huge(*pt))); return pt; +#endif /* CONFIG_PPC_64K_PAGES */ } } @@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) if (pu) { pm = pmd_alloc(mm, pu, addr); if (pm) { +#ifdef CONFIG_PPC_64K_PAGES + /* See comment in huge_pte_offset. Note that if we ever + * want to put the page size in the PMD, we would have + * to open code our own pte_alloc* function in order + * to populate and set the size atomically + */ + pt = pte_alloc_map(mm, pm, addr); +#else /* CONFIG_PPC_64K_PAGES */ pt = (pte_t *)pm; - BUG_ON(!pmd_none(*pm) - && !(pte_present(*pt) && pte_huge(*pt))); +#endif /* CONFIG_PPC_64K_PAGES */ return pt; } } @@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) return NULL; } -#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) - void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - int i; - if (pte_present(*ptep)) { - pte_clear(mm, addr, ptep); + /* We open-code pte_clear because we need to pass the right + * argument to hpte_update (huge / !huge) + */ + unsigned long old = pte_update(ptep, ~0UL); + if (old & _PAGE_HASHPTE) + hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); flush_tlb_pending(); } - - for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { - *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); - ptep++; - } + *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long old = pte_update(ptep, ~0UL); - int i; if (old & _PAGE_HASHPTE) - hpte_update(mm, addr, old, 0); - - for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) - ptep[i] = __pte(0); + hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); + *ptep = __pte(0); return __pte(old); } @@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, int lastshift; u16 areamask, curareas; + if (HPAGE_SHIFT == 0) + return -EINVAL; if (len & ~HPAGE_MASK) return -EINVAL; @@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, unsigned long ea, unsigned long vsid, int local) { pte_t *ptep; - unsigned long va, vpn; - pte_t old_pte, new_pte; - unsigned long rflags, prpn; + unsigned long old_pte, new_pte; + unsigned long va, rflags, pa; long slot; int err = 1; - spin_lock(&mm->page_table_lock); - ptep = huge_pte_offset(mm, ea); /* Search the Linux page table for a match with va */ va = (vsid << 28) | (ea & 0x0fffffff); - vpn = va >> HPAGE_SHIFT; /* * If no pte found or not present, send the problem up to @@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, if (unlikely(!ptep || pte_none(*ptep))) goto out; -/* BUG_ON(pte_bad(*ptep)); */ - /* * Check the user's access rights to the page. If access should be * prevented then send the problem up to do_page_fault. @@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, */ - old_pte = *ptep; - new_pte = old_pte; + do { + old_pte = pte_val(*ptep); + if (old_pte & _PAGE_BUSY) + goto out; + new_pte = old_pte | _PAGE_BUSY | + _PAGE_ACCESSED | _PAGE_HASHPTE; + } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, + old_pte, new_pte)); - rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); + rflags = 0x2 | (!(new_pte & _PAGE_RW)); /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ - rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); + rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); /* Check if pte already has an hpte (case 2) */ - if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { + if (unlikely(old_pte & _PAGE_HASHPTE)) { /* There MIGHT be an HPTE for this pte */ unsigned long hash, slot; - hash = hpt_hash(vpn, 1); - if (pte_val(old_pte) & _PAGE_SECONDARY) + hash = hpt_hash(va, HPAGE_SHIFT); + if (old_pte & _PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; + slot += (old_pte & _PAGE_F_GIX) >> 12; if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) - pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; + old_pte &= ~_PAGE_HPTEFLAGS; } - if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { - unsigned long hash = hpt_hash(vpn, 1); + if (likely(!(old_pte & _PAGE_HASHPTE))) { + unsigned long hash = hpt_hash(va, HPAGE_SHIFT); unsigned long hpte_group; - prpn = pte_pfn(old_pte); + pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - /* Update the linux pte with the HPTE slot */ - pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; - pte_val(new_pte) |= _PAGE_HASHPTE; + /* clear HPTE slot informations in new PTE */ + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; /* Add in WIMG bits */ /* XXX We should store these in the pte */ + /* --BenH: I think they are ... */ rflags |= _PAGE_COHERENT; - slot = ppc_md.hpte_insert(hpte_group, va, prpn, - HPTE_V_LARGE, rflags); + /* Insert into the hash table, primary slot */ + slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, + mmu_huge_psize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { - pte_val(new_pte) |= _PAGE_SECONDARY; + new_pte |= _PAGE_F_SECOND; hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, va, prpn, - HPTE_V_LARGE | + slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, HPTE_V_SECONDARY, - rflags); + mmu_huge_psize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * @@ -726,20 +744,18 @@ repeat: if (unlikely(slot == -2)) panic("hash_huge_page: pte_insert failed\n"); - pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; - - /* - * No need to use ldarx/stdcx here because all who - * might be updating the pte will hold the - * page_table_lock - */ - *ptep = new_pte; + new_pte |= (slot << 12) & _PAGE_F_GIX; } + /* + * No need to use ldarx/stdcx here because all who + * might be updating the pte will hold the + * page_table_lock + */ + *ptep = __pte(new_pte & ~_PAGE_BUSY); + err = 0; out: - spin_unlock(&mm->page_table_lock); - return err; } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index b0fc822ec29f..dfe7fa37b41a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -188,12 +188,21 @@ static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) memset(addr, 0, kmem_cache_size(cache)); } +#ifdef CONFIG_PPC_64K_PAGES +static const int pgtable_cache_size[2] = { + PTE_TABLE_SIZE, PGD_TABLE_SIZE +}; +static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { + "pte_pmd_cache", "pgd_cache", +}; +#else static const int pgtable_cache_size[2] = { PTE_TABLE_SIZE, PMD_TABLE_SIZE }; static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { "pgd_pte_cache", "pud_pmd_cache", }; +#endif /* CONFIG_PPC_64K_PAGES */ kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; @@ -201,19 +210,14 @@ void pgtable_cache_init(void) { int i; - BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); - BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); - BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); - BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); - for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { int size = pgtable_cache_size[i]; const char *name = pgtable_cache_name[i]; pgtable_cache[i] = kmem_cache_create(name, size, size, - SLAB_HWCACHE_ALIGN - | SLAB_MUST_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN | + SLAB_MUST_HWCACHE_ALIGN, zero_ctor, NULL); if (! pgtable_cache[i]) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 117b00012e14..7faa46b71f21 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -61,6 +61,9 @@ int init_bootmem_done; int mem_init_done; unsigned long memory_limit; +extern void hash_preload(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap); + /* * This is called by /dev/mem to know if a given address has to * be mapped non-cacheable or not @@ -493,18 +496,10 @@ EXPORT_SYMBOL(flush_icache_user_range); void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) { - /* handle i-cache coherency */ - unsigned long pfn = pte_pfn(pte); -#ifdef CONFIG_PPC32 - pmd_t *pmd; -#else - unsigned long vsid; - void *pgdir; - pte_t *ptep; - int local = 0; - cpumask_t tmp; - unsigned long flags; +#ifdef CONFIG_PPC_STD_MMU + unsigned long access = 0, trap; #endif + unsigned long pfn = pte_pfn(pte); /* handle i-cache coherency */ if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && @@ -535,30 +530,21 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ if (!pte_young(pte) || address >= TASK_SIZE) return; -#ifdef CONFIG_PPC32 - if (Hash == 0) + + /* We try to figure out if we are coming from an instruction + * access fault and pass that down to __hash_page so we avoid + * double-faulting on execution of fresh text. We have to test + * for regs NULL since init will get here first thing at boot + * + * We also avoid filling the hash if not coming from a fault + */ + if (current->thread.regs == NULL) return; - pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address); - if (!pmd_none(*pmd)) - add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd)); -#else - pgdir = vma->vm_mm->pgd; - if (pgdir == NULL) + trap = TRAP(current->thread.regs); + if (trap == 0x400) + access |= _PAGE_EXEC; + else if (trap != 0x300) return; - - ptep = find_linux_pte(pgdir, address); - if (!ptep) - return; - - vsid = get_vsid(vma->vm_mm->context.id, address); - - local_irq_save(flags); - tmp = cpumask_of_cpu(smp_processor_id()); - if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) - local = 1; - - __hash_page(address, 0, vsid, ptep, 0x300, local); - local_irq_restore(flags); -#endif -#endif + hash_preload(vma->vm_mm, address, access, trap); +#endif /* CONFIG_PPC_STD_MMU */ } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index b79a78206135..51b786940971 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -101,7 +101,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) pud_t *pudp; pmd_t *pmdp; pte_t *ptep; - unsigned long vsid; if (mem_init_done) { pgdp = pgd_offset_k(ea); @@ -117,28 +116,15 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); } else { - unsigned long va, vpn, hash, hpteg; - /* * If the mm subsystem is not fully up, we cannot create a * linux page table entry for this mapping. Simply bolt an * entry in the hardware page table. + * */ - vsid = get_kernel_vsid(ea); - va = (vsid << 28) | (ea & 0xFFFFFFF); - vpn = va >> PAGE_SHIFT; - - hash = hpt_hash(vpn, 0); - - hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); - - /* Panic if a pte grpup is full */ - if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, - HPTE_V_BOLTED, - _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX) - == -1) { - panic("map_io_page: could not insert mapping"); - } + if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags, + mmu_virtual_psize)) + panic("Can't map bolted IO mapping"); } return 0; } diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index cef9e83cc7e9..d137abd241ff 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -178,6 +178,21 @@ void __init setbat(int index, unsigned long virt, unsigned long phys, bat_addrs[index].phys = phys; } +/* + * Preload a translation in the hash table + */ +void hash_preload(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap) +{ + pmd_t *pmd; + + if (Hash == 0) + return; + pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address); + if (!pmd_none(*pmd)) + add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd)); +} + /* * Initialize the hash table and patch the instructions in hashtable.S. */ diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 0473953f6a37..60e852f2f8e5 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -14,14 +14,32 @@ * 2 of the License, or (at your option) any later version. */ +#undef DEBUG + #include #include #include #include #include #include +#include -extern void slb_allocate(unsigned long ea); +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +extern void slb_allocate_realmode(unsigned long ea); +extern void slb_allocate_user(unsigned long ea); + +static void slb_allocate(unsigned long ea) +{ + /* Currently, we do real mode for all SLBs including user, but + * that will change if we bring back dynamic VSIDs + */ + slb_allocate_realmode(ea); +} static inline unsigned long mk_esid_data(unsigned long ea, unsigned long slot) { @@ -46,13 +64,15 @@ static void slb_flush_and_rebolt(void) { /* If you change this make sure you change SLB_NUM_BOLTED * appropriately too. */ - unsigned long ksp_flags = SLB_VSID_KERNEL; + unsigned long linear_llp, virtual_llp, lflags, vflags; unsigned long ksp_esid_data; WARN_ON(!irqs_disabled()); - if (cpu_has_feature(CPU_FTR_16M_PAGE)) - ksp_flags |= SLB_VSID_L; + linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; + virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp; + lflags = SLB_VSID_KERNEL | linear_llp; + vflags = SLB_VSID_KERNEL | virtual_llp; ksp_esid_data = mk_esid_data(get_paca()->kstack, 2); if ((ksp_esid_data & ESID_MASK) == KERNELBASE) @@ -67,9 +87,9 @@ static void slb_flush_and_rebolt(void) /* Slot 2 - kernel stack */ "slbmte %2,%3\n" "isync" - :: "r"(mk_vsid_data(VMALLOCBASE, SLB_VSID_KERNEL)), + :: "r"(mk_vsid_data(VMALLOCBASE, vflags)), "r"(mk_esid_data(VMALLOCBASE, 1)), - "r"(mk_vsid_data(ksp_esid_data, ksp_flags)), + "r"(mk_vsid_data(ksp_esid_data, lflags)), "r"(ksp_esid_data) : "memory"); } @@ -102,6 +122,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) get_paca()->slb_cache_ptr = 0; get_paca()->context = mm->context; +#ifdef CONFIG_PPC_64K_PAGES + get_paca()->pgdir = mm->pgd; +#endif /* CONFIG_PPC_64K_PAGES */ /* * preload some userspace segments into the SLB. @@ -131,28 +154,77 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) slb_allocate(unmapped_base); } +static inline void patch_slb_encoding(unsigned int *insn_addr, + unsigned int immed) +{ + /* Assume the instruction had a "0" immediate value, just + * "or" in the new value + */ + *insn_addr |= immed; + flush_icache_range((unsigned long)insn_addr, 4+ + (unsigned long)insn_addr); +} + void slb_initialize(void) { + unsigned long linear_llp, virtual_llp; + static int slb_encoding_inited; + extern unsigned int *slb_miss_kernel_load_linear; + extern unsigned int *slb_miss_kernel_load_virtual; + extern unsigned int *slb_miss_user_load_normal; +#ifdef CONFIG_HUGETLB_PAGE + extern unsigned int *slb_miss_user_load_huge; + unsigned long huge_llp; + + huge_llp = mmu_psize_defs[mmu_huge_psize].sllp; +#endif + + /* Prepare our SLB miss handler based on our page size */ + linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; + virtual_llp = mmu_psize_defs[mmu_virtual_psize].sllp; + if (!slb_encoding_inited) { + slb_encoding_inited = 1; + patch_slb_encoding(slb_miss_kernel_load_linear, + SLB_VSID_KERNEL | linear_llp); + patch_slb_encoding(slb_miss_kernel_load_virtual, + SLB_VSID_KERNEL | virtual_llp); + patch_slb_encoding(slb_miss_user_load_normal, + SLB_VSID_USER | virtual_llp); + + DBG("SLB: linear LLP = %04x\n", linear_llp); + DBG("SLB: virtual LLP = %04x\n", virtual_llp); +#ifdef CONFIG_HUGETLB_PAGE + patch_slb_encoding(slb_miss_user_load_huge, + SLB_VSID_USER | huge_llp); + DBG("SLB: huge LLP = %04x\n", huge_llp); +#endif + } + /* On iSeries the bolted entries have already been set up by * the hypervisor from the lparMap data in head.S */ #ifndef CONFIG_PPC_ISERIES - unsigned long flags = SLB_VSID_KERNEL; + { + unsigned long lflags, vflags; - /* Invalidate the entire SLB (even slot 0) & all the ERATS */ - if (cpu_has_feature(CPU_FTR_16M_PAGE)) - flags |= SLB_VSID_L; + lflags = SLB_VSID_KERNEL | linear_llp; + vflags = SLB_VSID_KERNEL | virtual_llp; - asm volatile("isync":::"memory"); - asm volatile("slbmte %0,%0"::"r" (0) : "memory"); + /* Invalidate the entire SLB (even slot 0) & all the ERATS */ + asm volatile("isync":::"memory"); + asm volatile("slbmte %0,%0"::"r" (0) : "memory"); asm volatile("isync; slbia; isync":::"memory"); - create_slbe(KERNELBASE, flags, 0); - create_slbe(VMALLOCBASE, SLB_VSID_KERNEL, 1); + create_slbe(KERNELBASE, lflags, 0); + + /* VMALLOC space has 4K pages always for now */ + create_slbe(VMALLOCBASE, vflags, 1); + /* We don't bolt the stack for the time being - we're in boot, * so the stack is in the bolted segment. By the time it goes * elsewhere, we'll call _switch() which will bolt in the new * one. */ asm volatile("isync":::"memory"); -#endif + } +#endif /* CONFIG_PPC_ISERIES */ get_paca()->stab_rr = SLB_NUM_BOLTED; } diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index a3a03da503bc..3e18241b6f35 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -18,25 +18,161 @@ #include #include -#include -#include #include #include #include +#include +#include +#include -/* void slb_allocate(unsigned long ea); +/* void slb_allocate_realmode(unsigned long ea); * * Create an SLB entry for the given EA (user or kernel). * r3 = faulting address, r13 = PACA * r9, r10, r11 are clobbered by this function * No other registers are examined or changed. */ -_GLOBAL(slb_allocate) - /* - * First find a slot, round robin. Previously we tried to find - * a free slot first but that took too long. Unfortunately we - * dont have any LRU information to help us choose a slot. +_GLOBAL(slb_allocate_realmode) + /* r3 = faulting address */ + + srdi r9,r3,60 /* get region */ + srdi r10,r3,28 /* get esid */ + cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ + + /* r3 = address, r10 = esid, cr7 = <>KERNELBASE */ + blt cr7,0f /* user or kernel? */ + + /* kernel address: proto-VSID = ESID */ + /* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but + * this code will generate the protoVSID 0xfffffffff for the + * top segment. That's ok, the scramble below will translate + * it to VSID 0, which is reserved as a bad VSID - one which + * will never have any pages in it. */ + + /* Check if hitting the linear mapping of the vmalloc/ioremap + * kernel space + */ + bne cr7,1f + + /* Linear mapping encoding bits, the "li" instruction below will + * be patched by the kernel at boot */ +_GLOBAL(slb_miss_kernel_load_linear) + li r11,0 + b slb_finish_load + +1: /* vmalloc/ioremap mapping encoding bits, the "li" instruction below + * will be patched by the kernel at boot + */ +_GLOBAL(slb_miss_kernel_load_virtual) + li r11,0 + b slb_finish_load + + +0: /* user address: proto-VSID = context << 15 | ESID. First check + * if the address is within the boundaries of the user region + */ + srdi. r9,r10,USER_ESID_BITS + bne- 8f /* invalid ea bits set */ + + /* Figure out if the segment contains huge pages */ +#ifdef CONFIG_HUGETLB_PAGE +BEGIN_FTR_SECTION + b 1f +END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE) + lhz r9,PACAHIGHHTLBAREAS(r13) + srdi r11,r10,(HTLB_AREA_SHIFT-SID_SHIFT) + srd r9,r9,r11 + lhz r11,PACALOWHTLBAREAS(r13) + srd r11,r11,r10 + or. r9,r9,r11 + beq 1f +_GLOBAL(slb_miss_user_load_huge) + li r11,0 + b 2f +1: +#endif /* CONFIG_HUGETLB_PAGE */ + +_GLOBAL(slb_miss_user_load_normal) + li r11,0 + +2: + ld r9,PACACONTEXTID(r13) + rldimi r10,r9,USER_ESID_BITS,0 + b slb_finish_load + +8: /* invalid EA */ + li r10,0 /* BAD_VSID */ + li r11,SLB_VSID_USER /* flags don't much matter */ + b slb_finish_load + +#ifdef __DISABLED__ + +/* void slb_allocate_user(unsigned long ea); + * + * Create an SLB entry for the given EA (user or kernel). + * r3 = faulting address, r13 = PACA + * r9, r10, r11 are clobbered by this function + * No other registers are examined or changed. + * + * It is called with translation enabled in order to be able to walk the + * page tables. This is not currently used. + */ +_GLOBAL(slb_allocate_user) + /* r3 = faulting address */ + srdi r10,r3,28 /* get esid */ + + crset 4*cr7+lt /* set "user" flag for later */ + + /* check if we fit in the range covered by the pagetables*/ + srdi. r9,r3,PGTABLE_EADDR_SIZE + crnot 4*cr0+eq,4*cr0+eq + beqlr + + /* now we need to get to the page tables in order to get the page + * size encoding from the PMD. In the future, we'll be able to deal + * with 1T segments too by getting the encoding from the PGD instead + */ + ld r9,PACAPGDIR(r13) + cmpldi cr0,r9,0 + beqlr + rlwinm r11,r10,8,25,28 + ldx r9,r9,r11 /* get pgd_t */ + cmpldi cr0,r9,0 + beqlr + rlwinm r11,r10,3,17,28 + ldx r9,r9,r11 /* get pmd_t */ + cmpldi cr0,r9,0 + beqlr + + /* build vsid flags */ + andi. r11,r9,SLB_VSID_LLP + ori r11,r11,SLB_VSID_USER + + /* get context to calculate proto-VSID */ + ld r9,PACACONTEXTID(r13) + rldimi r10,r9,USER_ESID_BITS,0 + + /* fall through slb_finish_load */ + +#endif /* __DISABLED__ */ + + +/* + * Finish loading of an SLB entry and return + * + * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9, cr7 = <>KERNELBASE + */ +slb_finish_load: + ASM_VSID_SCRAMBLE(r10,r9) + rldimi r11,r10,SLB_VSID_SHIFT,16 /* combine VSID and flags */ + + /* r3 = EA, r11 = VSID data */ + /* + * Find a slot, round robin. Previously we tried to find a + * free slot first but that took too long. Unfortunately we + * dont have any LRU information to help us choose a slot. + */ #ifdef CONFIG_PPC_ISERIES /* * On iSeries, the "bolted" stack segment can be cast out on @@ -45,9 +181,9 @@ _GLOBAL(slb_allocate) */ ld r9,PACAKSAVE(r13) clrrdi r9,r9,28 - clrrdi r11,r3,28 + clrrdi r3,r3,28 li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */ - cmpld r9,r11 + cmpld r9,r3 beq 3f #endif /* CONFIG_PPC_ISERIES */ @@ -61,63 +197,12 @@ _GLOBAL(slb_allocate) 4: std r10,PACASTABRR(r13) + 3: - /* r3 = faulting address, r10 = entry */ + rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */ + oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */ - srdi r9,r3,60 /* get region */ - srdi r3,r3,28 /* get esid */ - cmpldi cr7,r9,0xc /* cmp KERNELBASE for later use */ - - rldimi r10,r3,28,0 /* r10= ESID<<28 | entry */ - oris r10,r10,SLB_ESID_V@h /* r10 |= SLB_ESID_V */ - - /* r3 = esid, r10 = esid_data, cr7 = <>KERNELBASE */ - - blt cr7,0f /* user or kernel? */ - - /* kernel address: proto-VSID = ESID */ - /* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but - * this code will generate the protoVSID 0xfffffffff for the - * top segment. That's ok, the scramble below will translate - * it to VSID 0, which is reserved as a bad VSID - one which - * will never have any pages in it. */ - li r11,SLB_VSID_KERNEL -BEGIN_FTR_SECTION - bne cr7,9f - li r11,(SLB_VSID_KERNEL|SLB_VSID_L) -END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) - b 9f - -0: /* user address: proto-VSID = context<<15 | ESID */ - srdi. r9,r3,USER_ESID_BITS - bne- 8f /* invalid ea bits set */ - -#ifdef CONFIG_HUGETLB_PAGE -BEGIN_FTR_SECTION - lhz r9,PACAHIGHHTLBAREAS(r13) - srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT) - srd r9,r9,r11 - lhz r11,PACALOWHTLBAREAS(r13) - srd r11,r11,r3 - or r9,r9,r11 -END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) -#endif /* CONFIG_HUGETLB_PAGE */ - - li r11,SLB_VSID_USER - -#ifdef CONFIG_HUGETLB_PAGE -BEGIN_FTR_SECTION - rldimi r11,r9,8,55 /* shift masked bit into SLB_VSID_L */ -END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) -#endif /* CONFIG_HUGETLB_PAGE */ - - ld r9,PACACONTEXTID(r13) - rldimi r3,r9,USER_ESID_BITS,0 - -9: /* r3 = protovsid, r11 = flags, r10 = esid_data, cr7 = <>KERNELBASE */ - ASM_VSID_SCRAMBLE(r3,r9) - - rldimi r11,r3,SLB_VSID_SHIFT,16 /* combine VSID and flags */ + /* r3 = ESID data, r11 = VSID data */ /* * No need for an isync before or after this slbmte. The exception @@ -125,7 +210,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) */ slbmte r11,r10 - bgelr cr7 /* we're done for kernel addresses */ + /* we're done for kernel addresses */ + crclr 4*cr0+eq /* set result to "success" */ + bgelr cr7 /* Update the slb cache */ lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */ @@ -143,9 +230,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) li r3,SLB_CACHE_ENTRIES+1 2: sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */ + crclr 4*cr0+eq /* set result to "success" */ blr -8: /* invalid EA */ - li r3,0 /* BAD_VSID */ - li r11,SLB_VSID_USER /* flags don't much matter */ - b 9b diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 1b83f002bf27..fa325dbf98fc 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -26,7 +26,6 @@ struct stab_entry { unsigned long vsid_data; }; -/* Both the segment table and SLB code uses the following cache */ #define NR_STAB_CACHE_ENTRIES 8 DEFINE_PER_CPU(long, stab_cache_ptr); DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); @@ -186,7 +185,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) /* Never flush the first entry. */ ste += 1; for (entry = 1; - entry < (PAGE_SIZE / sizeof(struct stab_entry)); + entry < (HW_PAGE_SIZE / sizeof(struct stab_entry)); entry++, ste++) { unsigned long ea; ea = ste->esid_data & ESID_MASK; @@ -200,6 +199,10 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) __get_cpu_var(stab_cache_ptr) = 0; +#ifdef CONFIG_PPC_64K_PAGES + get_paca()->pgdir = mm->pgd; +#endif /* CONFIG_PPC_64K_PAGES */ + /* Now preload some entries for the new task */ if (test_tsk_thread_flag(tsk, TIF_32BIT)) unmapped_base = TASK_UNMAPPED_BASE_USER32; @@ -223,8 +226,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) asm volatile("sync" : : : "memory"); } -extern void slb_initialize(void); - /* * Allocate segment tables for secondary CPUs. These must all go in * the first (bolted) segment, so that do_stab_bolted won't get a @@ -243,18 +244,21 @@ void stabs_alloc(void) if (cpu == 0) continue; /* stab for CPU 0 is statically allocated */ - newstab = lmb_alloc_base(PAGE_SIZE, PAGE_SIZE, 1< #include #include @@ -30,7 +31,7 @@ #include #include #include -#include +#include DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); @@ -126,28 +127,46 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) * (if we remove it we should clear the _PTE_HPTEFLAGS bits). */ void hpte_update(struct mm_struct *mm, unsigned long addr, - unsigned long pte, int wrprot) + pte_t *ptep, unsigned long pte, int huge) { struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); unsigned long vsid; + unsigned int psize = mmu_virtual_psize; int i; i = batch->index; + /* We mask the address for the base page size. Huge pages will + * have applied their own masking already + */ + addr &= PAGE_MASK; + + /* Get page size (maybe move back to caller) */ + if (huge) { +#ifdef CONFIG_HUGETLB_PAGE + psize = mmu_huge_psize; +#else + BUG(); +#endif + } + /* * This can happen when we are in the middle of a TLB batch and * we encounter memory pressure (eg copy_page_range when it tries * to allocate a new pte). If we have to reclaim memory and end * up scanning and resetting referenced bits then our batch context * will change mid stream. + * + * We also need to ensure only one page size is present in a given + * batch */ - if (i != 0 && (mm != batch->mm || batch->large != pte_huge(pte))) { + if (i != 0 && (mm != batch->mm || batch->psize != psize)) { flush_tlb_pending(); i = 0; } if (i == 0) { batch->mm = mm; - batch->large = pte_huge(pte); + batch->psize = psize; } if (addr < KERNELBASE) { vsid = get_vsid(mm->context.id, addr); @@ -155,7 +174,7 @@ void hpte_update(struct mm_struct *mm, unsigned long addr, } else vsid = get_kernel_vsid(addr); batch->vaddr[i] = (vsid << 28 ) | (addr & 0x0fffffff); - batch->pte[i] = __pte(pte); + batch->pte[i] = __real_pte(__pte(pte), ptep); batch->index = ++i; if (i >= PPC64_TLB_BATCH_NR) flush_tlb_pending(); @@ -177,7 +196,8 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) local = 1; if (i == 1) - flush_hash_page(batch->vaddr[0], batch->pte[0], local); + flush_hash_page(batch->vaddr[0], batch->pte[0], + batch->psize, local); else flush_hash_range(i, local); batch->index = 0; diff --git a/arch/powerpc/platforms/iseries/htab.c b/arch/powerpc/platforms/iseries/htab.c index b3c6c3374ca6..30bdcf3925d9 100644 --- a/arch/powerpc/platforms/iseries/htab.c +++ b/arch/powerpc/platforms/iseries/htab.c @@ -39,15 +39,16 @@ static inline void iSeries_hunlock(unsigned long slot) spin_unlock(&iSeries_hlocks[(slot >> 4) & 0x3f]); } -static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, - unsigned long prpn, unsigned long vflags, - unsigned long rflags) +long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, + unsigned long pa, unsigned long rflags, + unsigned long vflags, int psize) { - unsigned long arpn; long slot; hpte_t lhpte; int secondary = 0; + BUG_ON(psize != MMU_PAGE_4K); + /* * The hypervisor tries both primary and secondary. * If we are being called to insert in the secondary, @@ -59,8 +60,19 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, iSeries_hlock(hpte_group); - slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT); - BUG_ON(lhpte.v & HPTE_V_VALID); + slot = HvCallHpt_findValid(&lhpte, va >> HW_PAGE_SHIFT); + if (unlikely(lhpte.v & HPTE_V_VALID)) { + if (vflags & HPTE_V_BOLTED) { + HvCallHpt_setSwBits(slot, 0x10, 0); + HvCallHpt_setPp(slot, PP_RWXX); + iSeries_hunlock(hpte_group); + if (slot < 0) + return 0x8 | (slot & 7); + else + return slot & 7; + } + BUG(); + } if (slot == -1) { /* No available entry found in either group */ iSeries_hunlock(hpte_group); @@ -73,10 +85,9 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, slot &= 0x7fffffffffffffff; } - arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT; - lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; - lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags; + lhpte.v = hpte_encode_v(va, MMU_PAGE_4K) | vflags | HPTE_V_VALID; + lhpte.r = hpte_encode_r(phys_to_abs(pa), MMU_PAGE_4K) | rflags; /* Now fill in the actual HPTE */ HvCallHpt_addValidate(slot, secondary, &lhpte); @@ -86,25 +97,6 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, return (secondary << 3) | (slot & 7); } -long iSeries_hpte_bolt_or_insert(unsigned long hpte_group, - unsigned long va, unsigned long prpn, unsigned long vflags, - unsigned long rflags) -{ - long slot; - hpte_t lhpte; - - slot = HvCallHpt_findValid(&lhpte, va >> PAGE_SHIFT); - - if (lhpte.v & HPTE_V_VALID) { - /* Bolt the existing HPTE */ - HvCallHpt_setSwBits(slot, 0x10, 0); - HvCallHpt_setPp(slot, PP_RWXX); - return 0; - } - - return iSeries_hpte_insert(hpte_group, va, prpn, vflags, rflags); -} - static unsigned long iSeries_hpte_getword0(unsigned long slot) { hpte_t hpte; @@ -150,15 +142,17 @@ static long iSeries_hpte_remove(unsigned long hpte_group) * bits 61..63 : PP2,PP1,PP0 */ static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long va, int large, int local) + unsigned long va, int psize, int local) { hpte_t hpte; - unsigned long avpn = va >> 23; + unsigned long want_v; iSeries_hlock(slot); HvCallHpt_get(&hpte, slot); - if ((HPTE_V_AVPN_VAL(hpte.v) == avpn) && (hpte.v & HPTE_V_VALID)) { + want_v = hpte_encode_v(va, MMU_PAGE_4K); + + if (HPTE_V_COMPARE(hpte.v, want_v) && (hpte.v & HPTE_V_VALID)) { /* * Hypervisor expects bits as NPPP, which is * different from how they are mapped in our PP. @@ -210,14 +204,17 @@ static long iSeries_hpte_find(unsigned long vpn) * * No need to lock here because we should be the only user. */ -static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) +static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, + int psize) { unsigned long vsid,va,vpn; long slot; + BUG_ON(psize != MMU_PAGE_4K); + vsid = get_kernel_vsid(ea); va = (vsid << 28) | (ea & 0x0fffffff); - vpn = va >> PAGE_SHIFT; + vpn = va >> HW_PAGE_SHIFT; slot = iSeries_hpte_find(vpn); if (slot == -1) panic("updateboltedpp: Could not find page to bolt\n"); @@ -225,7 +222,7 @@ static void iSeries_hpte_updateboltedpp(unsigned long newpp, unsigned long ea) } static void iSeries_hpte_invalidate(unsigned long slot, unsigned long va, - int large, int local) + int psize, int local) { unsigned long hpte_v; unsigned long avpn = va >> 23; diff --git a/arch/powerpc/platforms/iseries/hvlog.c b/arch/powerpc/platforms/iseries/hvlog.c index 62ec73479687..f476d71194fa 100644 --- a/arch/powerpc/platforms/iseries/hvlog.c +++ b/arch/powerpc/platforms/iseries/hvlog.c @@ -22,7 +22,7 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len) while (len) { hv_buf.addr = cur; - left_this_page = ((cur & PAGE_MASK) + PAGE_SIZE) - cur; + left_this_page = ((cur & HW_PAGE_MASK) + HW_PAGE_SIZE) - cur; if (left_this_page > len) left_this_page = len; hv_buf.len = left_this_page; @@ -30,6 +30,6 @@ void HvCall_writeLogBuffer(const void *buffer, u64 len) HvCall2(HvCallBaseWriteLogBuffer, virt_to_abs(&hv_buf), left_this_page); - cur = (cur & PAGE_MASK) + PAGE_SIZE; + cur = (cur & HW_PAGE_MASK) + HW_PAGE_SIZE; } } diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index 1a6845b5c5a4..bf081b345820 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -43,9 +43,12 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, u64 rc; union tce_entry tce; + index <<= TCE_PAGE_FACTOR; + npages <<= TCE_PAGE_FACTOR; + while (npages--) { tce.te_word = 0; - tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> PAGE_SHIFT; + tce.te_bits.tb_rpn = virt_to_abs(uaddr) >> TCE_SHIFT; if (tbl->it_type == TCE_VB) { /* Virtual Bus */ @@ -66,7 +69,7 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages, panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", rc); index++; - uaddr += PAGE_SIZE; + uaddr += TCE_PAGE_SIZE; } } @@ -74,6 +77,9 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) { u64 rc; + npages <<= TCE_PAGE_FACTOR; + index <<= TCE_PAGE_FACTOR; + while (npages--) { rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0); if (rc) @@ -83,6 +89,50 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) } } +/* + * Call Hv with the architected data structure to get TCE table info. + * info. Put the returned data into the Linux representation of the + * TCE table data. + * The Hardware Tce table comes in three flavors. + * 1. TCE table shared between Buses. + * 2. TCE table per Bus. + * 3. TCE Table per IOA. + */ +void iommu_table_getparms_iSeries(unsigned long busno, + unsigned char slotno, + unsigned char virtbus, + struct iommu_table* tbl) +{ + struct iommu_table_cb *parms; + + parms = kmalloc(sizeof(*parms), GFP_KERNEL); + if (parms == NULL) + panic("PCI_DMA: TCE Table Allocation failed."); + + memset(parms, 0, sizeof(*parms)); + + parms->itc_busno = busno; + parms->itc_slotno = slotno; + parms->itc_virtbus = virtbus; + + HvCallXm_getTceTableParms(iseries_hv_addr(parms)); + + if (parms->itc_size == 0) + panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms); + + /* itc_size is in pages worth of table, it_size is in # of entries */ + tbl->it_size = ((parms->itc_size * TCE_PAGE_SIZE) / + sizeof(union tce_entry)) >> TCE_PAGE_FACTOR; + tbl->it_busno = parms->itc_busno; + tbl->it_offset = parms->itc_offset >> TCE_PAGE_FACTOR; + tbl->it_index = parms->itc_index; + tbl->it_blocksize = 1; + tbl->it_type = virtbus ? TCE_VB : TCE_PCI; + + kfree(parms); +} + + #ifdef CONFIG_PCI /* * This function compares the known tables to find an iommu_table @@ -104,46 +154,6 @@ static struct iommu_table *iommu_table_find(struct iommu_table * tbl) return NULL; } -/* - * Call Hv with the architected data structure to get TCE table info. - * info. Put the returned data into the Linux representation of the - * TCE table data. - * The Hardware Tce table comes in three flavors. - * 1. TCE table shared between Buses. - * 2. TCE table per Bus. - * 3. TCE Table per IOA. - */ -static void iommu_table_getparms(struct pci_dn *pdn, - struct iommu_table* tbl) -{ - struct iommu_table_cb *parms; - - parms = kmalloc(sizeof(*parms), GFP_KERNEL); - if (parms == NULL) - panic("PCI_DMA: TCE Table Allocation failed."); - - memset(parms, 0, sizeof(*parms)); - - parms->itc_busno = pdn->busno; - parms->itc_slotno = pdn->LogicalSlot; - parms->itc_virtbus = 0; - - HvCallXm_getTceTableParms(iseries_hv_addr(parms)); - - if (parms->itc_size == 0) - panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms); - - /* itc_size is in pages worth of table, it_size is in # of entries */ - tbl->it_size = (parms->itc_size * PAGE_SIZE) / sizeof(union tce_entry); - tbl->it_busno = parms->itc_busno; - tbl->it_offset = parms->itc_offset; - tbl->it_index = parms->itc_index; - tbl->it_blocksize = 1; - tbl->it_type = TCE_PCI; - - kfree(parms); -} - void iommu_devnode_init_iSeries(struct device_node *dn) { @@ -152,7 +162,7 @@ void iommu_devnode_init_iSeries(struct device_node *dn) tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - iommu_table_getparms(pdn, tbl); + iommu_table_getparms_iSeries(pdn->busno, pdn->LogicalSlot, 0, tbl); /* Look for existing tce table */ pdn->iommu_table = iommu_table_find(tbl); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index fda712b42168..c5207064977d 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -320,11 +320,11 @@ static void __init iSeries_init_early(void) */ if (naca.xRamDisk) { initrd_start = (unsigned long)__va(naca.xRamDisk); - initrd_end = initrd_start + naca.xRamDiskSize * PAGE_SIZE; + initrd_end = initrd_start + naca.xRamDiskSize * HW_PAGE_SIZE; initrd_below_start_ok = 1; // ramdisk in kernel space ROOT_DEV = Root_RAM0; - if (((rd_size * 1024) / PAGE_SIZE) < naca.xRamDiskSize) - rd_size = (naca.xRamDiskSize * PAGE_SIZE) / 1024; + if (((rd_size * 1024) / HW_PAGE_SIZE) < naca.xRamDiskSize) + rd_size = (naca.xRamDiskSize * HW_PAGE_SIZE) / 1024; } else #endif /* CONFIG_BLK_DEV_INITRD */ { @@ -470,13 +470,14 @@ static void __init build_iSeries_Memory_Map(void) */ hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); hptSizePages = (u32)HvCallHpt_getHptPages(); - hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT); + hptSizeChunks = hptSizePages >> + (MSCHUNKS_CHUNK_SHIFT - HW_PAGE_SHIFT); hptLastChunk = hptFirstChunk + hptSizeChunks - 1; printk("HPT absolute addr = %016lx, size = %dK\n", chunk_to_addr(hptFirstChunk), hptSizeChunks * 256); - ppc64_pft_size = __ilog2(hptSizePages * PAGE_SIZE); + ppc64_pft_size = __ilog2(hptSizePages * HW_PAGE_SIZE); /* * The actual hashed page table is in the hypervisor, @@ -629,7 +630,7 @@ static void __init iSeries_fixup_klimit(void) */ if (naca.xRamDisk) klimit = KERNELBASE + (u64)naca.xRamDisk + - (naca.xRamDiskSize * PAGE_SIZE); + (naca.xRamDiskSize * HW_PAGE_SIZE); else { /* * No ram disk was included - check and see if there diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c index c27a66876c2c..384360ee06ec 100644 --- a/arch/powerpc/platforms/iseries/vio.c +++ b/arch/powerpc/platforms/iseries/vio.c @@ -30,41 +30,14 @@ static struct iommu_table vio_iommu_table; static void __init iommu_vio_init(void) { - struct iommu_table *t; - struct iommu_table_cb cb; - unsigned long cbp; - unsigned long itc_entries; + iommu_table_getparms_iSeries(255, 0, 0xff, &veth_iommu_table); + veth_iommu_table.it_size /= 2; + vio_iommu_table = veth_iommu_table; + vio_iommu_table.it_offset += veth_iommu_table.it_size; - cb.itc_busno = 255; /* Bus 255 is the virtual bus */ - cb.itc_virtbus = 0xff; /* Ask for virtual bus */ - - cbp = virt_to_abs(&cb); - HvCallXm_getTceTableParms(cbp); - - itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); - veth_iommu_table.it_size = itc_entries / 2; - veth_iommu_table.it_busno = cb.itc_busno; - veth_iommu_table.it_offset = cb.itc_offset; - veth_iommu_table.it_index = cb.itc_index; - veth_iommu_table.it_type = TCE_VB; - veth_iommu_table.it_blocksize = 1; - - t = iommu_init_table(&veth_iommu_table); - - if (!t) + if (!iommu_init_table(&veth_iommu_table)) printk("Virtual Bus VETH TCE table failed.\n"); - - vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; - vio_iommu_table.it_busno = cb.itc_busno; - vio_iommu_table.it_offset = cb.itc_offset + - veth_iommu_table.it_size; - vio_iommu_table.it_index = cb.itc_index; - vio_iommu_table.it_type = TCE_VB; - vio_iommu_table.it_blocksize = 1; - - t = iommu_init_table(&vio_iommu_table); - - if (!t) + if (!iommu_init_table(&vio_iommu_table)) printk("Virtual Bus VIO TCE table failed.\n"); } diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c index fe97bfbf7463..842672695598 100644 --- a/arch/powerpc/platforms/iseries/viopath.c +++ b/arch/powerpc/platforms/iseries/viopath.c @@ -68,7 +68,8 @@ static DEFINE_SPINLOCK(statuslock); * For each kind of event we allocate a buffer that is * guaranteed not to cross a page boundary */ -static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] __page_aligned; +static unsigned char event_buffer[VIO_MAX_SUBTYPES * 256] + __attribute__((__aligned__(4096))); static atomic_t event_buffer_available[VIO_MAX_SUBTYPES]; static int event_buffer_initialised; @@ -116,12 +117,12 @@ static int proc_viopath_show(struct seq_file *m, void *v) HvLpEvent_Rc hvrc; DECLARE_MUTEX_LOCKED(Semaphore); - buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + buf = kmalloc(HW_PAGE_SIZE, GFP_KERNEL); if (!buf) return 0; - memset(buf, 0, PAGE_SIZE); + memset(buf, 0, HW_PAGE_SIZE); - handle = dma_map_single(iSeries_vio_dev, buf, PAGE_SIZE, + handle = dma_map_single(iSeries_vio_dev, buf, HW_PAGE_SIZE, DMA_FROM_DEVICE); hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp, @@ -131,7 +132,7 @@ static int proc_viopath_show(struct seq_file *m, void *v) viopath_sourceinst(viopath_hostLp), viopath_targetinst(viopath_hostLp), (u64)(unsigned long)&Semaphore, VIOVERSION << 16, - ((u64)handle) << 32, PAGE_SIZE, 0, 0); + ((u64)handle) << 32, HW_PAGE_SIZE, 0, 0); if (hvrc != HvLpEvent_Rc_Good) printk(VIOPATH_KERN_WARN "hv error on op %d\n", (int)hvrc); @@ -140,7 +141,7 @@ static int proc_viopath_show(struct seq_file *m, void *v) vlanMap = HvLpConfig_getVirtualLanIndexMap(); - buf[PAGE_SIZE-1] = '\0'; + buf[HW_PAGE_SIZE-1] = '\0'; seq_printf(m, "%s", buf); seq_printf(m, "AVAILABLE_VETH=%x\n", vlanMap); seq_printf(m, "SRLNBR=%c%c%c%c%c%c%c\n", @@ -152,7 +153,8 @@ static int proc_viopath_show(struct seq_file *m, void *v) e2a(xItExtVpdPanel.systemSerial[4]), e2a(xItExtVpdPanel.systemSerial[5])); - dma_unmap_single(iSeries_vio_dev, handle, PAGE_SIZE, DMA_FROM_DEVICE); + dma_unmap_single(iSeries_vio_dev, handle, HW_PAGE_SIZE, + DMA_FROM_DEVICE); kfree(buf); return 0; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index e384a5a91796..ab0c6dd6ec94 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -19,7 +19,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define DEBUG +#undef DEBUG_LOW #include #include @@ -41,10 +41,10 @@ #include "plpar_wrappers.h" -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) +#ifdef DEBUG_LOW +#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0) #else -#define DBG(fmt...) +#define DBG_LOW(fmt...) do { } while(0) #endif /* in pSeries_hvCall.S */ @@ -276,8 +276,9 @@ void vpa_init(int cpu) } long pSeries_lpar_hpte_insert(unsigned long hpte_group, - unsigned long va, unsigned long prpn, - unsigned long vflags, unsigned long rflags) + unsigned long va, unsigned long pa, + unsigned long rflags, unsigned long vflags, + int psize) { unsigned long lpar_rc; unsigned long flags; @@ -285,11 +286,28 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long hpte_v, hpte_r; unsigned long dummy0, dummy1; - hpte_v = ((va >> 23) << HPTE_V_AVPN_SHIFT) | vflags | HPTE_V_VALID; - if (vflags & HPTE_V_LARGE) - hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); + if (!(vflags & HPTE_V_BOLTED)) + DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " + "rflags=%lx, vflags=%lx, psize=%d)\n", + hpte_group, va, pa, rflags, vflags, psize); - hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; + hpte_v = hpte_encode_v(va, psize) | vflags | HPTE_V_VALID; + hpte_r = hpte_encode_r(pa, psize) | rflags; + + if (!(vflags & HPTE_V_BOLTED)) + DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); + +#if 1 + { + int i; + for (i=0;i<8;i++) { + unsigned long w0, w1; + plpar_pte_read(0, hpte_group, &w0, &w1); + BUG_ON (HPTE_V_COMPARE(hpte_v, w0) + && (w0 & HPTE_V_VALID)); + } + } +#endif /* Now fill in the actual HPTE */ /* Set CEC cookie to 0 */ @@ -299,23 +317,30 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, /* Exact = 0 */ flags = 0; - /* XXX why is this here? - Anton */ + /* Make pHyp happy */ if (rflags & (_PAGE_GUARDED|_PAGE_NO_CACHE)) hpte_r &= ~_PAGE_COHERENT; lpar_rc = plpar_hcall(H_ENTER, flags, hpte_group, hpte_v, hpte_r, &slot, &dummy0, &dummy1); - - if (unlikely(lpar_rc == H_PTEG_Full)) + if (unlikely(lpar_rc == H_PTEG_Full)) { + if (!(vflags & HPTE_V_BOLTED)) + DBG_LOW(" full\n"); return -1; + } /* * Since we try and ioremap PHBs we don't own, the pte insert * will fail. However we must catch the failure in hash_page * or we will loop forever, so return -2 in this case. */ - if (unlikely(lpar_rc != H_Success)) + if (unlikely(lpar_rc != H_Success)) { + if (!(vflags & HPTE_V_BOLTED)) + DBG_LOW(" lpar err %d\n", lpar_rc); return -2; + } + if (!(vflags & HPTE_V_BOLTED)) + DBG_LOW(" -> slot: %d\n", slot & 7); /* Because of iSeries, we have to pass down the secondary * bucket bit here as well @@ -340,10 +365,8 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group) /* don't remove a bolted entry */ lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, (0x1UL << 4), &dummy1, &dummy2); - if (lpar_rc == H_Success) return i; - BUG_ON(lpar_rc != H_Not_Found); slot_offset++; @@ -371,20 +394,28 @@ static void pSeries_lpar_hptab_clear(void) * We can probably optimize here and assume the high bits of newpp are * already zero. For now I am paranoid. */ -static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, - unsigned long va, int large, int local) +static long pSeries_lpar_hpte_updatepp(unsigned long slot, + unsigned long newpp, + unsigned long va, + int psize, int local) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; - unsigned long avpn = va >> 23; + unsigned long want_v; - if (large) - avpn &= ~0x1UL; + want_v = hpte_encode_v(va, psize); - lpar_rc = plpar_pte_protect(flags, slot, (avpn << 7)); + DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ", + want_v & HPTE_V_AVPN, slot, flags, psize); - if (lpar_rc == H_Not_Found) + lpar_rc = plpar_pte_protect(flags, slot, want_v & HPTE_V_AVPN); + + if (lpar_rc == H_Not_Found) { + DBG_LOW("not found !\n"); return -1; + } + + DBG_LOW("ok\n"); BUG_ON(lpar_rc != H_Success); @@ -410,21 +441,22 @@ static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot) return dword0; } -static long pSeries_lpar_hpte_find(unsigned long vpn) +static long pSeries_lpar_hpte_find(unsigned long va, int psize) { unsigned long hash; unsigned long i, j; long slot; - unsigned long hpte_v; + unsigned long want_v, hpte_v; - hash = hpt_hash(vpn, 0); + hash = hpt_hash(va, mmu_psize_defs[psize].shift); + want_v = hpte_encode_v(va, psize); for (j = 0; j < 2; j++) { slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; for (i = 0; i < HPTES_PER_GROUP; i++) { hpte_v = pSeries_lpar_hpte_getword0(slot); - if ((HPTE_V_AVPN_VAL(hpte_v) == (vpn >> 11)) + if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID) && (!!(hpte_v & HPTE_V_SECONDARY) == j)) { /* HPTE matches */ @@ -441,17 +473,15 @@ static long pSeries_lpar_hpte_find(unsigned long vpn) } static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, - unsigned long ea) + unsigned long ea, + int psize) { - unsigned long lpar_rc; - unsigned long vsid, va, vpn, flags; - long slot; + unsigned long lpar_rc, slot, vsid, va, flags; vsid = get_kernel_vsid(ea); va = (vsid << 28) | (ea & 0x0fffffff); - vpn = va >> PAGE_SHIFT; - slot = pSeries_lpar_hpte_find(vpn); + slot = pSeries_lpar_hpte_find(va, psize); BUG_ON(slot == -1); flags = newpp & 7; @@ -461,18 +491,18 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, } static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, - int large, int local) + int psize, int local) { - unsigned long avpn = va >> 23; + unsigned long want_v; unsigned long lpar_rc; unsigned long dummy1, dummy2; - if (large) - avpn &= ~0x1UL; - - lpar_rc = plpar_pte_remove(H_AVPN, slot, (avpn << 7), &dummy1, - &dummy2); + DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d", + slot, va, psize, local); + want_v = hpte_encode_v(va, psize); + lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v & HPTE_V_AVPN, + &dummy1, &dummy2); if (lpar_rc == H_Not_Found) return; @@ -494,7 +524,8 @@ void pSeries_lpar_flush_hash_range(unsigned long number, int local) spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); for (i = 0; i < number; i++) - flush_hash_page(batch->vaddr[i], batch->pte[i], local); + flush_hash_page(batch->vaddr[i], batch->pte[i], + batch->psize, local); if (lock_tlbie) spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index b987164fca4c..2130cc315957 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -47,6 +47,10 @@ config ARCH_MAY_HAVE_PC_FDC bool default y +config PPC_STD_MMU + bool + default y + # We optimistically allocate largepages from the VM, so make the limit # large enough (16MB). This badly named config option is actually # max order + 1 @@ -294,6 +298,15 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES +config PPC_64K_PAGES + bool "64k page size" + help + This option changes the kernel logical page size to 64k. On machines + without processor support for 64k pages, the kernel will simulate + them by loading each individual 4k page on demand transparently, + while on hardware with such support, it will be used to map + normal application pages. + config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" depends on SMP diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c index 504dee836d29..bce9065da6cb 100644 --- a/arch/ppc64/kernel/asm-offsets.c +++ b/arch/ppc64/kernel/asm-offsets.c @@ -93,6 +93,9 @@ int main(void) DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); +#ifdef CONFIG_PPC_64K_PAGES + DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir)); +#endif #ifdef CONFIG_HUGETLB_PAGE DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index db1cf397be2d..9e8050ea1225 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -195,11 +195,11 @@ exception_marker: #define EX_R12 24 #define EX_R13 32 #define EX_SRR0 40 -#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */ #define EX_DAR 48 -#define EX_LR 48 /* SLB miss saves LR, but not DAR */ #define EX_DSISR 56 #define EX_CCR 60 +#define EX_R3 64 +#define EX_LR 72 #define EXCEPTION_PROLOG_PSERIES(area, label) \ mfspr r13,SPRN_SPRG3; /* get paca address into r13 */ \ @@ -419,17 +419,22 @@ data_access_slb_pSeries: mtspr SPRN_SPRG1,r13 RUNLATCH_ON(r13) mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_DAR std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + mfcr r9 +#ifdef __DISABLED__ + /* Keep that around for when we re-implement dynamic VSIDs */ + cmpdi r3,0 + bge slb_miss_user_pseries +#endif /* __DISABLED__ */ std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r9,SPRN_SPRG1 - std r9,PACA_EXSLB+EX_R13(r13) - mfcr r9 + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ - mfspr r3,SPRN_DAR - b .do_slb_miss /* Rel. branch works in real mode */ + b .slb_miss_realmode /* Rel. branch works in real mode */ STD_EXCEPTION_PSERIES(0x400, instruction_access) @@ -440,17 +445,22 @@ instruction_access_slb_pSeries: mtspr SPRN_SPRG1,r13 RUNLATCH_ON(r13) mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + mfcr r9 +#ifdef __DISABLED__ + /* Keep that around for when we re-implement dynamic VSIDs */ + cmpdi r3,0 + bge slb_miss_user_pseries +#endif /* __DISABLED__ */ std r10,PACA_EXSLB+EX_R10(r13) std r11,PACA_EXSLB+EX_R11(r13) std r12,PACA_EXSLB+EX_R12(r13) - std r3,PACA_EXSLB+EX_R3(r13) - mfspr r9,SPRN_SPRG1 - std r9,PACA_EXSLB+EX_R13(r13) - mfcr r9 + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) mfspr r12,SPRN_SRR1 /* and SRR1 */ - mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ - b .do_slb_miss /* Rel. branch works in real mode */ + b .slb_miss_realmode /* Rel. branch works in real mode */ STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) STD_EXCEPTION_PSERIES(0x600, alignment) @@ -508,6 +518,38 @@ _GLOBAL(do_stab_bolted_pSeries) mfspr r12,SPRN_SPRG2 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) +/* + * We have some room here we use that to put + * the peries slb miss user trampoline code so it's reasonably + * away from slb_miss_user_common to avoid problems with rfid + * + * This is used for when the SLB miss handler has to go virtual, + * which doesn't happen for now anymore but will once we re-implement + * dynamic VSIDs for shared page tables + */ +#ifdef __DISABLED__ +slb_miss_user_pseries: + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfspr r10,SPRG1 + ld r11,PACA_EXSLB+EX_R9(r13) + ld r12,PACA_EXSLB+EX_R3(r13) + std r10,PACA_EXGEN+EX_R13(r13) + std r11,PACA_EXGEN+EX_R9(r13) + std r12,PACA_EXGEN+EX_R3(r13) + clrrdi r12,r13,32 + mfmsr r10 + mfspr r11,SRR0 /* save SRR0 */ + ori r12,r12,slb_miss_user_common@l /* virt addr of handler */ + ori r10,r10,MSR_IR|MSR_DR|MSR_RI + mtspr SRR0,r12 + mfspr r12,SRR1 /* and SRR1 */ + mtspr SRR1,r10 + rfid + b . /* prevent spec. execution */ +#endif /* __DISABLED__ */ + /* * Vectors for the FWNMI option. Share common code. */ @@ -559,22 +601,59 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) .globl data_access_slb_iSeries data_access_slb_iSeries: mtspr SPRN_SPRG1,r13 /* save r13 */ - EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) - ld r12,PACALPPACA+LPPACASRR1(r13) mfspr r3,SPRN_DAR - b .do_slb_miss + std r9,PACA_EXSLB+EX_R9(r13) + mfcr r9 +#ifdef __DISABLED__ + cmpdi r3,0 + bge slb_miss_user_iseries +#endif + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) + ld r12,PACALPPACA+LPPACASRR1(r13); + b .slb_miss_realmode STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) .globl instruction_access_slb_iSeries instruction_access_slb_iSeries: mtspr SPRN_SPRG1,r13 /* save r13 */ - EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) - ld r12,PACALPPACA+LPPACASRR1(r13) - ld r3,PACALPPACA+LPPACASRR0(r13) - b .do_slb_miss + ld r3,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ + std r9,PACA_EXSLB+EX_R9(r13) + mfcr r9 +#ifdef __DISABLED__ + cmpdi r3,0 + bge .slb_miss_user_iseries +#endif + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + mfspr r10,SPRN_SPRG1 + std r10,PACA_EXSLB+EX_R13(r13) + ld r12,PACALPPACA+LPPACASRR1(r13); + b .slb_miss_realmode + +#ifdef __DISABLED__ +slb_miss_user_iseries: + std r10,PACA_EXGEN+EX_R10(r13) + std r11,PACA_EXGEN+EX_R11(r13) + std r12,PACA_EXGEN+EX_R12(r13) + mfspr r10,SPRG1 + ld r11,PACA_EXSLB+EX_R9(r13) + ld r12,PACA_EXSLB+EX_R3(r13) + std r10,PACA_EXGEN+EX_R13(r13) + std r11,PACA_EXGEN+EX_R9(r13) + std r12,PACA_EXGEN+EX_R3(r13) + EXCEPTION_PROLOG_ISERIES_2 + b slb_miss_user_common +#endif MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) @@ -809,6 +888,126 @@ instruction_access_common: li r5,0x400 b .do_hash_page /* Try to handle as hpte fault */ +/* + * Here is the common SLB miss user that is used when going to virtual + * mode for SLB misses, that is currently not used + */ +#ifdef __DISABLED__ + .align 7 + .globl slb_miss_user_common +slb_miss_user_common: + mflr r10 + std r3,PACA_EXGEN+EX_DAR(r13) + stw r9,PACA_EXGEN+EX_CCR(r13) + std r10,PACA_EXGEN+EX_LR(r13) + std r11,PACA_EXGEN+EX_SRR0(r13) + bl .slb_allocate_user + + ld r10,PACA_EXGEN+EX_LR(r13) + ld r3,PACA_EXGEN+EX_R3(r13) + lwz r9,PACA_EXGEN+EX_CCR(r13) + ld r11,PACA_EXGEN+EX_SRR0(r13) + mtlr r10 + beq- slb_miss_fault + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_user_slb + mfmsr r10 + +.machine push +.machine "power4" + mtcrf 0x80,r9 +.machine pop + + clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */ + mtmsrd r10,1 + + mtspr SRR0,r11 + mtspr SRR1,r12 + + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + rfid + b . + +slb_miss_fault: + EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN) + ld r4,PACA_EXGEN+EX_DAR(r13) + li r5,0 + std r4,_DAR(r1) + std r5,_DSISR(r1) + b .handle_page_fault + +unrecov_user_slb: + EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + +#endif /* __DISABLED__ */ + + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r12 contain the saved SRR1, SRR0 is still ready for return + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(slb_miss_realmode) + mflr r10 + + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + bl .slb_allocate_realmode + + /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ +#ifdef CONFIG_PPC_ISERIES + ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ +#endif /* CONFIG_PPC_ISERIES */ + + mtlr r10 + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_slb + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + +#ifdef CONFIG_PPC_ISERIES + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r12 +#endif /* CONFIG_PPC_ISERIES */ + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + .align 7 .globl hardware_interrupt_common .globl hardware_interrupt_entry @@ -1138,62 +1337,6 @@ _GLOBAL(do_stab_bolted) rfid b . /* prevent speculative execution */ -/* - * r13 points to the PACA, r9 contains the saved CR, - * r11 and r12 contain the saved SRR0 and SRR1. - * r3 has the faulting address - * r9 - r13 are saved in paca->exslb. - * r3 is saved in paca->slb_r3 - * We assume we aren't going to take any exceptions during this procedure. - */ -_GLOBAL(do_slb_miss) - mflr r10 - - stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ - std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ - - bl .slb_allocate /* handle it */ - - /* All done -- return from exception. */ - - ld r10,PACA_EXSLB+EX_LR(r13) - ld r3,PACA_EXSLB+EX_R3(r13) - lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ -#ifdef CONFIG_PPC_ISERIES - ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ -#endif /* CONFIG_PPC_ISERIES */ - - mtlr r10 - - andi. r10,r12,MSR_RI /* check for unrecoverable exception */ - beq- unrecov_slb - -.machine push -.machine "power4" - mtcrf 0x80,r9 - mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ -.machine pop - -#ifdef CONFIG_PPC_ISERIES - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r12 -#endif /* CONFIG_PPC_ISERIES */ - ld r9,PACA_EXSLB+EX_R9(r13) - ld r10,PACA_EXSLB+EX_R10(r13) - ld r11,PACA_EXSLB+EX_R11(r13) - ld r12,PACA_EXSLB+EX_R12(r13) - ld r13,PACA_EXSLB+EX_R13(r13) - rfid - b . /* prevent speculative execution */ - -unrecov_slb: - EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) - DISABLE_INTS - bl .save_nvgprs -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl .unrecoverable_exception - b 1b - /* * Space for CPU0's segment table. * @@ -1569,7 +1712,10 @@ _GLOBAL(__secondary_start) #endif /* Initialize the first segment table (or SLB) entry */ ld r3,PACASTABVIRT(r13) /* get addr of segment table */ +BEGIN_FTR_SECTION bl .stab_initialize +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + bl .slb_initialize /* Initialize the kernel stack. Just a repeat for iSeries. */ LOADADDR(r3,current_set) diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c index 5e27e5a6a35d..3133c72b28ec 100644 --- a/arch/ppc64/kernel/pacaData.c +++ b/arch/ppc64/kernel/pacaData.c @@ -23,7 +23,7 @@ static union { struct systemcfg data; u8 page[PAGE_SIZE]; -} systemcfg_store __page_aligned; +} systemcfg_store __attribute__((__section__(".data.page.aligned"))); struct systemcfg *systemcfg = &systemcfg_store.data; EXPORT_SYMBOL(systemcfg); diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 97bfceb5353b..dece31e58bc4 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -635,10 +635,10 @@ static inline char *find_flat_dt_string(u32 offset) * used to extract the memory informations at boot before we can * unflatten the tree */ -static int __init scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), - void *data) +int __init of_scan_flat_dt(int (*it)(unsigned long node, + const char *uname, int depth, + void *data), + void *data) { unsigned long p = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; @@ -695,8 +695,8 @@ static int __init scan_flat_dt(int (*it)(unsigned long node, * This function can be used within scan_flattened_dt callback to get * access to properties */ -static void* __init get_flat_dt_prop(unsigned long node, const char *name, - unsigned long *size) +void* __init of_get_flat_dt_prop(unsigned long node, const char *name, + unsigned long *size) { unsigned long p = node; @@ -996,7 +996,7 @@ void __init unflatten_device_tree(void) static int __init early_init_dt_scan_cpus(unsigned long node, const char *uname, int depth, void *data) { - char *type = get_flat_dt_prop(node, "device_type", NULL); + char *type = of_get_flat_dt_prop(node, "device_type", NULL); u32 *prop; unsigned long size; @@ -1004,17 +1004,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; - /* On LPAR, look for the first ibm,pft-size property for the hash table size - */ - if (systemcfg->platform == PLATFORM_PSERIES_LPAR && ppc64_pft_size == 0) { - u32 *pft_size; - pft_size = (u32 *)get_flat_dt_prop(node, "ibm,pft-size", NULL); - if (pft_size != NULL) { - /* pft_size[0] is the NUMA CEC cookie */ - ppc64_pft_size = pft_size[1]; - } - } - if (initial_boot_params && initial_boot_params->version >= 2) { /* version 2 of the kexec param format adds the phys cpuid * of booted proc. @@ -1023,8 +1012,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, boot_cpuid = 0; } else { /* Check if it's the boot-cpu, set it's hw index in paca now */ - if (get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { - u32 *prop = get_flat_dt_prop(node, "reg", NULL); + if (of_get_flat_dt_prop(node, "linux,boot-cpu", NULL) + != NULL) { + u32 *prop = of_get_flat_dt_prop(node, "reg", NULL); set_hard_smp_processor_id(0, prop == NULL ? 0 : *prop); boot_cpuid_phys = get_hard_smp_processor_id(0); } @@ -1032,14 +1022,14 @@ static int __init early_init_dt_scan_cpus(unsigned long node, #ifdef CONFIG_ALTIVEC /* Check if we have a VMX and eventually update CPU features */ - prop = (u32 *)get_flat_dt_prop(node, "ibm,vmx", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "ibm,vmx", NULL); if (prop && (*prop) > 0) { cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; } /* Same goes for Apple's "altivec" property */ - prop = (u32 *)get_flat_dt_prop(node, "altivec", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "altivec", NULL); if (prop) { cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC; @@ -1051,7 +1041,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, * this by looking at the size of the ibm,ppc-interrupt-server#s * property */ - prop = (u32 *)get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", + prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &size); cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; if (prop && ((size / sizeof(u32)) > 1)) @@ -1072,26 +1062,26 @@ static int __init early_init_dt_scan_chosen(unsigned long node, return 0; /* get platform type */ - prop = (u32 *)get_flat_dt_prop(node, "linux,platform", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL); if (prop == NULL) return 0; systemcfg->platform = *prop; /* check if iommu is forced on or off */ - if (get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) + if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) iommu_is_off = 1; - if (get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) + if (of_get_flat_dt_prop(node, "linux,iommu-force-on", NULL) != NULL) iommu_force_on = 1; - prop64 = (u64*)get_flat_dt_prop(node, "linux,memory-limit", NULL); + prop64 = (u64*)of_get_flat_dt_prop(node, "linux,memory-limit", NULL); if (prop64) memory_limit = *prop64; - prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-start", NULL); + prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-start",NULL); if (prop64) tce_alloc_start = *prop64; - prop64 = (u64*)get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); + prop64 = (u64*)of_get_flat_dt_prop(node, "linux,tce-alloc-end", NULL); if (prop64) tce_alloc_end = *prop64; @@ -1102,9 +1092,12 @@ static int __init early_init_dt_scan_chosen(unsigned long node, { u64 *basep, *entryp; - basep = (u64*)get_flat_dt_prop(node, "linux,rtas-base", NULL); - entryp = (u64*)get_flat_dt_prop(node, "linux,rtas-entry", NULL); - prop = (u32*)get_flat_dt_prop(node, "linux,rtas-size", NULL); + basep = (u64*)of_get_flat_dt_prop(node, + "linux,rtas-base", NULL); + entryp = (u64*)of_get_flat_dt_prop(node, + "linux,rtas-entry", NULL); + prop = (u32*)of_get_flat_dt_prop(node, + "linux,rtas-size", NULL); if (basep && entryp && prop) { rtas.base = *basep; rtas.entry = *entryp; @@ -1125,11 +1118,11 @@ static int __init early_init_dt_scan_root(unsigned long node, if (depth != 0) return 0; - prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "#size-cells", NULL); dt_root_size_cells = (prop == NULL) ? 1 : *prop; DBG("dt_root_size_cells = %x\n", dt_root_size_cells); - prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL); + prop = (u32 *)of_get_flat_dt_prop(node, "#address-cells", NULL); dt_root_addr_cells = (prop == NULL) ? 2 : *prop; DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); @@ -1161,7 +1154,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) static int __init early_init_dt_scan_memory(unsigned long node, const char *uname, int depth, void *data) { - char *type = get_flat_dt_prop(node, "device_type", NULL); + char *type = of_get_flat_dt_prop(node, "device_type", NULL); cell_t *reg, *endp; unsigned long l; @@ -1169,7 +1162,7 @@ static int __init early_init_dt_scan_memory(unsigned long node, if (type == NULL || strcmp(type, "memory") != 0) return 0; - reg = (cell_t *)get_flat_dt_prop(node, "reg", &l); + reg = (cell_t *)of_get_flat_dt_prop(node, "reg", &l); if (reg == NULL) return 0; @@ -1225,19 +1218,16 @@ void __init early_init_devtree(void *params) /* Setup flat device-tree pointer */ initial_boot_params = params; - /* By default, hash size is not set */ - ppc64_pft_size = 0; - /* Retreive various informations from the /chosen node of the * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... */ - scan_flat_dt(early_init_dt_scan_chosen, NULL); + of_scan_flat_dt(early_init_dt_scan_chosen, NULL); /* Scan memory nodes and rebuild LMBs */ lmb_init(); - scan_flat_dt(early_init_dt_scan_root, NULL); - scan_flat_dt(early_init_dt_scan_memory, NULL); + of_scan_flat_dt(early_init_dt_scan_root, NULL); + of_scan_flat_dt(early_init_dt_scan_memory, NULL); lmb_enforce_memory_limit(memory_limit); lmb_analyze(); systemcfg->physicalMemorySize = lmb_phys_mem_size(); @@ -1253,26 +1243,8 @@ void __init early_init_devtree(void *params) /* Retreive hash table size from flattened tree plus other * CPU related informations (altivec support, boot CPU ID, ...) */ - scan_flat_dt(early_init_dt_scan_cpus, NULL); + of_scan_flat_dt(early_init_dt_scan_cpus, NULL); - /* If hash size wasn't obtained above, we calculate it now based on - * the total RAM size - */ - if (ppc64_pft_size == 0) { - unsigned long rnd_mem_size, pteg_count; - - /* round mem_size up to next power of 2 */ - rnd_mem_size = 1UL << __ilog2(systemcfg->physicalMemorySize); - if (rnd_mem_size < systemcfg->physicalMemorySize) - rnd_mem_size <<= 1; - - /* # pages / 2 */ - pteg_count = max(rnd_mem_size >> (12 + 1), 1UL << 11); - - ppc64_pft_size = __ilog2(pteg_count << 7); - } - - DBG("Hash pftSize: %x\n", (int)ppc64_pft_size); DBG(" <- early_init_devtree()\n"); } diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index c019501daceb..79a0556a0ab8 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -101,6 +101,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset); #define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0000020000000000) #define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0000040000000000) #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) +#define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0000100000000000) #else /* ensure on 32b processors the flags are available for compiling but * don't do anything */ @@ -116,6 +117,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset); #define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0) #define CPU_FTR_LOCKLESS_TLBIE ASM_CONST(0x0) #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0) +#define CPU_FTR_CI_LARGE_PAGE ASM_CONST(0x0) #endif #ifndef __ASSEMBLY__ @@ -339,6 +341,7 @@ enum { #ifdef __powerpc64__ CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_CELL | + CPU_FTR_CI_LARGE_PAGE | #endif 0, diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h index 9d91bdd667ae..6a35e6570ccd 100644 --- a/include/asm-powerpc/iommu.h +++ b/include/asm-powerpc/iommu.h @@ -74,6 +74,11 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn); /* Creates table for an individual device node */ extern void iommu_devnode_init_iSeries(struct device_node *dn); +/* Get table parameters from HV */ +extern void iommu_table_getparms_iSeries(unsigned long busno, + unsigned char slotno, + unsigned char virtbus, + struct iommu_table* tbl); #endif /* CONFIG_PPC_ISERIES */ diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h index 629ca964b974..fa03864d06eb 100644 --- a/include/asm-powerpc/machdep.h +++ b/include/asm-powerpc/machdep.h @@ -47,20 +47,22 @@ struct machdep_calls { #ifdef CONFIG_PPC64 void (*hpte_invalidate)(unsigned long slot, unsigned long va, - int large, + int psize, int local); long (*hpte_updatepp)(unsigned long slot, unsigned long newpp, unsigned long va, - int large, + int pize, int local); void (*hpte_updateboltedpp)(unsigned long newpp, - unsigned long ea); + unsigned long ea, + int psize); long (*hpte_insert)(unsigned long hpte_group, unsigned long va, unsigned long prpn, + unsigned long rflags, unsigned long vflags, - unsigned long rflags); + int psize); long (*hpte_remove)(unsigned long hpte_group); void (*flush_hash_range)(unsigned long number, int local); diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h index 3a0104fa0462..7587bf5f38c6 100644 --- a/include/asm-powerpc/prom.h +++ b/include/asm-powerpc/prom.h @@ -178,6 +178,14 @@ extern struct device_node *of_get_next_child(const struct device_node *node, extern struct device_node *of_node_get(struct device_node *node); extern void of_node_put(struct device_node *node); +/* For scanning the flat device-tree at boot time */ +int __init of_scan_flat_dt(int (*it)(unsigned long node, + const char *uname, int depth, + void *data), + void *data); +void* __init of_get_flat_dt_prop(unsigned long node, const char *name, + unsigned long *size); + /* For updating the device tree at runtime */ extern void of_attach_node(struct device_node *); extern void of_detach_node(const struct device_node *); diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h index b5da0b851e02..3536a5cd7a2d 100644 --- a/include/asm-powerpc/system.h +++ b/include/asm-powerpc/system.h @@ -289,7 +289,7 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) #ifdef CONFIG_PPC64 static __inline__ unsigned long -__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new) +__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) { unsigned long prev; diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h index ab17db79f69d..e525f49bd179 100644 --- a/include/asm-powerpc/thread_info.h +++ b/include/asm-powerpc/thread_info.h @@ -65,23 +65,27 @@ struct thread_info { /* thread information allocation */ -#ifdef CONFIG_DEBUG_STACK_USAGE -#define THREAD_INFO_GFP GFP_KERNEL | __GFP_ZERO -#else -#define THREAD_INFO_GFP GFP_KERNEL -#endif - #if THREAD_SHIFT >= PAGE_SHIFT #define THREAD_ORDER (THREAD_SHIFT - PAGE_SHIFT) +#ifdef CONFIG_DEBUG_STACK_USAGE #define alloc_thread_info(tsk) \ - ((struct thread_info *)__get_free_pages(THREAD_INFO_GFP, THREAD_ORDER)) + ((struct thread_info *)__get_free_pages(GFP_KERNEL | \ + __GFP_ZERO, THREAD_ORDER)) +#else +#define alloc_thread_info(tsk) \ + ((struct thread_info *)__get_free_pages(GFP_KERNEL, THREAD_ORDER)) +#endif #define free_thread_info(ti) free_pages((unsigned long)ti, THREAD_ORDER) #else /* THREAD_SHIFT < PAGE_SHIFT */ -#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, THREAD_INFO_GFP) +#ifdef CONFIG_DEBUG_STACK_USAGE +#define alloc_thread_info(tsk) kzalloc(THREAD_SIZE, GFP_KERNEL) +#else +#define alloc_thread_info(tsk) kmalloc(THREAD_SIZE, GFP_KERNEL) +#endif #define free_thread_info(ti) kfree(ti) #endif /* THREAD_SHIFT < PAGE_SHIFT */ diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h index ca3655672bbc..a2998eee37bb 100644 --- a/include/asm-powerpc/tlbflush.h +++ b/include/asm-powerpc/tlbflush.h @@ -31,9 +31,9 @@ struct mm_struct; struct ppc64_tlb_batch { unsigned long index; struct mm_struct *mm; - pte_t pte[PPC64_TLB_BATCH_NR]; + real_pte_t pte[PPC64_TLB_BATCH_NR]; unsigned long vaddr[PPC64_TLB_BATCH_NR]; - unsigned int large; + unsigned int psize; }; DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); @@ -48,8 +48,9 @@ static inline void flush_tlb_pending(void) put_cpu_var(ppc64_tlb_batch); } -extern void flush_hash_page(unsigned long va, pte_t pte, int local); -void flush_hash_range(unsigned long number, int local); +extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize, + int local); +extern void flush_hash_range(unsigned long number, int local); #else /* CONFIG_PPC64 */ diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index e0505acb77d9..4c18a5cb69f5 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -48,13 +48,21 @@ extern char initial_stab[]; /* Bits in the SLB VSID word */ #define SLB_VSID_SHIFT 12 +#define SLB_VSID_B ASM_CONST(0xc000000000000000) +#define SLB_VSID_B_256M ASM_CONST(0x0000000000000000) +#define SLB_VSID_B_1T ASM_CONST(0x4000000000000000) #define SLB_VSID_KS ASM_CONST(0x0000000000000800) #define SLB_VSID_KP ASM_CONST(0x0000000000000400) #define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */ -#define SLB_VSID_L ASM_CONST(0x0000000000000100) /* largepage */ +#define SLB_VSID_L ASM_CONST(0x0000000000000100) #define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */ -#define SLB_VSID_LS ASM_CONST(0x0000000000000070) /* size of largepage */ - +#define SLB_VSID_LP ASM_CONST(0x0000000000000030) +#define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000) +#define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010) +#define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020) +#define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030) +#define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP) + #define SLB_VSID_KERNEL (SLB_VSID_KP) #define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) @@ -69,6 +77,7 @@ extern char initial_stab[]; #define HPTE_V_AVPN_SHIFT 7 #define HPTE_V_AVPN ASM_CONST(0xffffffffffffff80) #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) +#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & HPTE_V_AVPN)) #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) #define HPTE_V_LOCK ASM_CONST(0x0000000000000008) #define HPTE_V_LARGE ASM_CONST(0x0000000000000004) @@ -81,6 +90,7 @@ extern char initial_stab[]; #define HPTE_R_RPN ASM_CONST(0x3ffffffffffff000) #define HPTE_R_FLAGS ASM_CONST(0x00000000000003ff) #define HPTE_R_PP ASM_CONST(0x0000000000000003) +#define HPTE_R_N ASM_CONST(0x0000000000000004) /* Values for PP (assumes Ks=0, Kp=1) */ /* pp0 will always be 0 for linux */ @@ -99,100 +109,120 @@ typedef struct { extern hpte_t *htab_address; extern unsigned long htab_hash_mask; -static inline unsigned long hpt_hash(unsigned long vpn, int large) +/* + * Page size definition + * + * shift : is the "PAGE_SHIFT" value for that page size + * sllp : is a bit mask with the value of SLB L || LP to be or'ed + * directly to a slbmte "vsid" value + * penc : is the HPTE encoding mask for the "LP" field: + * + */ +struct mmu_psize_def { - unsigned long vsid; - unsigned long page; + unsigned int shift; /* number of bits */ + unsigned int penc; /* HPTE encoding */ + unsigned int tlbiel; /* tlbiel supported for that page size */ + unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */ + unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */ +}; - if (large) { - vsid = vpn >> 4; - page = vpn & 0xf; - } else { - vsid = vpn >> 16; - page = vpn & 0xffff; - } +#endif /* __ASSEMBLY__ */ - return (vsid & 0x7fffffffffUL) ^ page; -} +/* + * The kernel use the constants below to index in the page sizes array. + * The use of fixed constants for this purpose is better for performances + * of the low level hash refill handlers. + * + * A non supported page size has a "shift" field set to 0 + * + * Any new page size being implemented can get a new entry in here. Whether + * the kernel will use it or not is a different matter though. The actual page + * size used by hugetlbfs is not defined here and may be made variable + */ -static inline void __tlbie(unsigned long va, int large) +#define MMU_PAGE_4K 0 /* 4K */ +#define MMU_PAGE_64K 1 /* 64K */ +#define MMU_PAGE_64K_AP 2 /* 64K Admixed (in a 4K segment) */ +#define MMU_PAGE_1M 3 /* 1M */ +#define MMU_PAGE_16M 4 /* 16M */ +#define MMU_PAGE_16G 5 /* 16G */ +#define MMU_PAGE_COUNT 6 + +#ifndef __ASSEMBLY__ + +/* + * The current system page sizes + */ +extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +extern int mmu_linear_psize; +extern int mmu_virtual_psize; + +#ifdef CONFIG_HUGETLB_PAGE +/* + * The page size index of the huge pages for use by hugetlbfs + */ +extern int mmu_huge_psize; + +#endif /* CONFIG_HUGETLB_PAGE */ + +/* + * This function sets the AVPN and L fields of the HPTE appropriately + * for the page size + */ +static inline unsigned long hpte_encode_v(unsigned long va, int psize) { - /* clear top 16 bits, non SLS segment */ - va &= ~(0xffffULL << 48); - - if (large) { - va &= HPAGE_MASK; - asm volatile("tlbie %0,1" : : "r"(va) : "memory"); - } else { - va &= PAGE_MASK; - asm volatile("tlbie %0,0" : : "r"(va) : "memory"); - } -} - -static inline void tlbie(unsigned long va, int large) -{ - asm volatile("ptesync": : :"memory"); - __tlbie(va, large); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); -} - -static inline void __tlbiel(unsigned long va) -{ - /* clear top 16 bits, non SLS segment */ - va &= ~(0xffffULL << 48); - va &= PAGE_MASK; - - /* - * Thanks to Alan Modra we are now able to use machine specific - * assembly instructions (like tlbiel) by using the gas -many flag. - * However we have to support older toolchains so for the moment - * we hardwire it. - */ -#if 0 - asm volatile("tlbiel %0" : : "r"(va) : "memory"); -#else - asm volatile(".long 0x7c000224 | (%0 << 11)" : : "r"(va) : "memory"); -#endif -} - -static inline void tlbiel(unsigned long va) -{ - asm volatile("ptesync": : :"memory"); - __tlbiel(va); - asm volatile("ptesync": : :"memory"); -} - -static inline unsigned long slot2va(unsigned long hpte_v, unsigned long slot) -{ - unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v); - unsigned long va; - - va = avpn << 23; - - if (! (hpte_v & HPTE_V_LARGE)) { - unsigned long vpi, pteg; - - pteg = slot / HPTES_PER_GROUP; - if (hpte_v & HPTE_V_SECONDARY) - pteg = ~pteg; - - vpi = ((va >> 28) ^ pteg) & htab_hash_mask; - - va |= vpi << PAGE_SHIFT; - } - - return va; + unsigned long v = + v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); + v <<= HPTE_V_AVPN_SHIFT; + if (psize != MMU_PAGE_4K) + v |= HPTE_V_LARGE; + return v; } /* - * Handle a fault by adding an HPTE. If the address can't be determined - * to be valid via Linux page tables, return 1. If handled return 0 + * This function sets the ARPN, and LP fields of the HPTE appropriately + * for the page size. We assume the pa is already "clean" that is properly + * aligned for the requested page size */ -extern int __hash_page(unsigned long ea, unsigned long access, - unsigned long vsid, pte_t *ptep, unsigned long trap, - int local); +static inline unsigned long hpte_encode_r(unsigned long pa, int psize) +{ + unsigned long r; + + /* A 4K page needs no special encoding */ + if (psize == MMU_PAGE_4K) + return pa & HPTE_R_RPN; + else { + unsigned int penc = mmu_psize_defs[psize].penc; + unsigned int shift = mmu_psize_defs[psize].shift; + return (pa & ~((1ul << shift) - 1)) | (penc << 12); + } + return r; +} + +/* + * This hashes a virtual address for a 256Mb segment only for now + */ + +static inline unsigned long hpt_hash(unsigned long va, unsigned int shift) +{ + return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift); +} + +extern int __hash_page_4K(unsigned long ea, unsigned long access, + unsigned long vsid, pte_t *ptep, unsigned long trap, + unsigned int local); +extern int __hash_page_64K(unsigned long ea, unsigned long access, + unsigned long vsid, pte_t *ptep, unsigned long trap, + unsigned int local); +struct mm_struct; +extern int hash_huge_page(struct mm_struct *mm, unsigned long access, + unsigned long ea, unsigned long vsid, int local); extern void htab_finish_init(void); +extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, + unsigned long pstart, unsigned long mode, + int psize); extern void hpte_init_native(void); extern void hpte_init_lpar(void); @@ -200,17 +230,21 @@ extern void hpte_init_iSeries(void); extern long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, - unsigned long vflags, - unsigned long rflags); -extern long native_hpte_insert(unsigned long hpte_group, unsigned long va, - unsigned long prpn, - unsigned long vflags, unsigned long rflags); + unsigned long rflags, + unsigned long vflags, int psize); -extern long iSeries_hpte_bolt_or_insert(unsigned long hpte_group, - unsigned long va, unsigned long prpn, - unsigned long vflags, unsigned long rflags); +extern long native_hpte_insert(unsigned long hpte_group, + unsigned long va, unsigned long prpn, + unsigned long rflags, + unsigned long vflags, int psize); + +extern long iSeries_hpte_insert(unsigned long hpte_group, + unsigned long va, unsigned long prpn, + unsigned long rflags, + unsigned long vflags, int psize); extern void stabs_alloc(void); +extern void slb_initialize(void); #endif /* __ASSEMBLY__ */ diff --git a/include/asm-ppc64/mmu_context.h b/include/asm-ppc64/mmu_context.h index 820dd729b895..4f512e9fa6b8 100644 --- a/include/asm-ppc64/mmu_context.h +++ b/include/asm-ppc64/mmu_context.h @@ -16,8 +16,16 @@ * 2 of the License, or (at your option) any later version. */ -static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +/* + * Getting into a kernel thread, there is no valid user segment, mark + * paca->pgdir NULL so that SLB miss on user addresses will fault + */ +static inline void enter_lazy_tlb(struct mm_struct *mm, + struct task_struct *tsk) { +#ifdef CONFIG_PPC_64K_PAGES + get_paca()->pgdir = NULL; +#endif /* CONFIG_PPC_64K_PAGES */ } #define NO_CONTEXT 0 @@ -40,8 +48,13 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, cpu_set(smp_processor_id(), next->cpu_vm_mask); /* No need to flush userspace segments if the mm doesnt change */ +#ifdef CONFIG_PPC_64K_PAGES + if (prev == next && get_paca()->pgdir == next->pgd) + return; +#else if (prev == next) return; +#endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC)) diff --git a/include/asm-ppc64/paca.h b/include/asm-ppc64/paca.h index f68fe91debaf..bccacd6aa93a 100644 --- a/include/asm-ppc64/paca.h +++ b/include/asm-ppc64/paca.h @@ -72,10 +72,15 @@ struct paca_struct { /* * Now, starting in cacheline 2, the exception save areas */ - u64 exgen[8] __attribute__((aligned(0x80))); /* used for most interrupts/exceptions */ - u64 exmc[8]; /* used for machine checks */ - u64 exslb[8]; /* used for SLB/segment table misses - * on the linear mapping */ + /* used for most interrupts/exceptions */ + u64 exgen[10] __attribute__((aligned(0x80))); + u64 exmc[10]; /* used for machine checks */ + u64 exslb[10]; /* used for SLB/segment table misses + * on the linear mapping */ +#ifdef CONFIG_PPC_64K_PAGES + pgd_t *pgdir; +#endif /* CONFIG_PPC_64K_PAGES */ + mm_context_t context; u16 slb_cache[SLB_CACHE_ENTRIES]; u16 slb_cache_ptr; diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h index d404431f0a9a..82ce187e5be8 100644 --- a/include/asm-ppc64/page.h +++ b/include/asm-ppc64/page.h @@ -13,32 +13,59 @@ #include #include /* for ASM_CONST */ -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) +/* + * We support either 4k or 64k software page size. When using 64k pages + * however, wether we are really supporting 64k pages in HW or not is + * irrelevant to those definitions. We always define HW_PAGE_SHIFT to 12 + * as use of 64k pages remains a linux kernel specific, every notion of + * page number shared with the firmware, TCEs, iommu, etc... still assumes + * a page size of 4096. + */ +#ifdef CONFIG_PPC_64K_PAGES +#define PAGE_SHIFT 16 +#else +#define PAGE_SHIFT 12 +#endif -#define SID_SHIFT 28 -#define SID_MASK 0xfffffffffUL -#define ESID_MASK 0xfffffffff0000000UL -#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) -#define HPAGE_SHIFT 24 -#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) -#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +/* HW_PAGE_SHIFT is always 4k pages */ +#define HW_PAGE_SHIFT 12 +#define HW_PAGE_SIZE (ASM_CONST(1) << HW_PAGE_SHIFT) +#define HW_PAGE_MASK (~(HW_PAGE_SIZE-1)) + +/* PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and + * HW_PAGE_SHIFT, that is 4k pages + */ +#define PAGE_FACTOR (PAGE_SHIFT - HW_PAGE_SHIFT) + +/* Segment size */ +#define SID_SHIFT 28 +#define SID_MASK 0xfffffffffUL +#define ESID_MASK 0xfffffffff0000000UL +#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) + +/* Large pages size */ + +#ifndef __ASSEMBLY__ +extern unsigned int HPAGE_SHIFT; +#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#endif /* __ASSEMBLY__ */ #ifdef CONFIG_HUGETLB_PAGE -#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HTLB_AREA_SHIFT 40 #define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) #define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) -#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ - - (1U << GET_ESID(addr))) & 0xffff) -#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ - - (1U << GET_HTLB_AREA(addr))) & 0xffff) +#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ + - (1U << GET_ESID(addr))) & 0xffff) +#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ + - (1U << GET_HTLB_AREA(addr))) & 0xffff) #define ARCH_HAS_HUGEPAGE_ONLY_RANGE #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE @@ -114,7 +141,25 @@ static __inline__ void clear_page(void *addr) : "ctr", "memory"); } -extern void copy_page(void *to, void *from); +extern void copy_4K_page(void *to, void *from); + +#ifdef CONFIG_PPC_64K_PAGES +static inline void copy_page(void *to, void *from) +{ + unsigned int i; + for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) { + copy_4K_page(to, from); + to += 4096; + from += 4096; + } +} +#else /* CONFIG_PPC_64K_PAGES */ +static inline void copy_page(void *to, void *from) +{ + copy_4K_page(to, from); +} +#endif /* CONFIG_PPC_64K_PAGES */ + struct page; extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *p); @@ -124,43 +169,75 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag * These are used to make use of C type-checking. * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. */ + +/* PTE level */ typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; -typedef struct { unsigned long pud; } pud_t; -typedef struct { unsigned long pgd; } pgd_t; -typedef struct { unsigned long pgprot; } pgprot_t; - #define pte_val(x) ((x).pte) -#define pmd_val(x) ((x).pmd) -#define pud_val(x) ((x).pud) -#define pgd_val(x) ((x).pgd) -#define pgprot_val(x) ((x).pgprot) - #define __pte(x) ((pte_t) { (x) }) + +/* 64k pages additionally define a bigger "real PTE" type that gathers + * the "second half" part of the PTE for pseudo 64k pages + */ +#ifdef CONFIG_PPC_64K_PAGES +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#else +typedef struct { pte_t pte; } real_pte_t; +#endif + +/* PMD level */ +typedef struct { unsigned long pmd; } pmd_t; +#define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) }) + +/* PUD level exusts only on 4k pages */ +#ifndef CONFIG_PPC_64K_PAGES +typedef struct { unsigned long pud; } pud_t; +#define pud_val(x) ((x).pud) #define __pud(x) ((pud_t) { (x) }) +#endif + +/* PGD level */ +typedef struct { unsigned long pgd; } pgd_t; +#define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) }) + +/* Page protection bits */ +typedef struct { unsigned long pgprot; } pgprot_t; +#define pgprot_val(x) ((x).pgprot) #define __pgprot(x) ((pgprot_t) { (x) }) #else + /* * .. while these make it easier on the compiler */ -typedef unsigned long pte_t; -typedef unsigned long pmd_t; -typedef unsigned long pud_t; -typedef unsigned long pgd_t; -typedef unsigned long pgprot_t; +typedef unsigned long pte_t; #define pte_val(x) (x) +#define __pte(x) (x) + +#ifdef CONFIG_PPC_64K_PAGES +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#else +typedef unsigned long real_pte_t; +#endif + + +typedef unsigned long pmd_t; #define pmd_val(x) (x) +#define __pmd(x) (x) + +#ifndef CONFIG_PPC_64K_PAGES +typedef unsigned long pud_t; #define pud_val(x) (x) +#define __pud(x) (x) +#endif + +typedef unsigned long pgd_t; #define pgd_val(x) (x) #define pgprot_val(x) (x) -#define __pte(x) (x) -#define __pmd(x) (x) -#define __pud(x) (x) +typedef unsigned long pgprot_t; #define __pgd(x) (x) #define __pgprot(x) (x) diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h index 26bc49c1108d..98da0e4262bd 100644 --- a/include/asm-ppc64/pgalloc.h +++ b/include/asm-ppc64/pgalloc.h @@ -8,10 +8,16 @@ extern kmem_cache_t *pgtable_cache[]; +#ifdef CONFIG_PPC_64K_PAGES +#define PTE_CACHE_NUM 0 +#define PMD_CACHE_NUM 0 +#define PGD_CACHE_NUM 1 +#else #define PTE_CACHE_NUM 0 #define PMD_CACHE_NUM 1 #define PUD_CACHE_NUM 1 #define PGD_CACHE_NUM 0 +#endif /* * This program is free software; you can redistribute it and/or @@ -30,6 +36,8 @@ static inline void pgd_free(pgd_t *pgd) kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); } +#ifndef CONFIG_PPC_64K_PAGES + #define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) @@ -43,7 +51,30 @@ static inline void pud_free(pud_t *pud) kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); } -#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) +static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) +{ + pud_set(pud, (unsigned long)pmd); +} + +#define pmd_populate(mm, pmd, pte_page) \ + pmd_populate_kernel(mm, pmd, page_address(pte_page)) +#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte)) + + +#else /* CONFIG_PPC_64K_PAGES */ + +#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd) + +static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, + pte_t *pte) +{ + pmd_set(pmd, (unsigned long)pte); +} + +#define pmd_populate(mm, pmd, pte_page) \ + pmd_populate_kernel(mm, pmd, page_address(pte_page)) + +#endif /* CONFIG_PPC_64K_PAGES */ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -56,17 +87,15 @@ static inline void pmd_free(pmd_t *pmd) kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); } -#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) -#define pmd_populate(mm, pmd, pte_page) \ - pmd_populate_kernel(mm, pmd, page_address(pte_page)) - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, + unsigned long address) { return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], GFP_KERNEL|__GFP_REPEAT); } -static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +static inline struct page *pte_alloc_one(struct mm_struct *mm, + unsigned long address) { return virt_to_page(pte_alloc_one_kernel(mm, address)); } @@ -103,7 +132,7 @@ static inline void pgtable_free(pgtable_free_t pgf) kmem_cache_free(pgtable_cache[cachenum], p); } -void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); +extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); #define __pte_free_tlb(tlb, ptepage) \ pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ @@ -111,9 +140,11 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); #define __pmd_free_tlb(tlb, pmd) \ pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) +#ifndef CONFIG_PPC_64K_PAGES #define __pud_free_tlb(tlb, pmd) \ pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) +#endif /* CONFIG_PPC_64K_PAGES */ #define check_pgt_cache() do { } while (0) diff --git a/include/asm-ppc64/pgtable-4k.h b/include/asm-ppc64/pgtable-4k.h new file mode 100644 index 000000000000..c883a2748558 --- /dev/null +++ b/include/asm-ppc64/pgtable-4k.h @@ -0,0 +1,88 @@ +/* + * Entries per page directory level. The PTE level must use a 64b record + * for each page table entry. The PMD and PGD level use a 32b record for + * each entry by assuming that each entry is page aligned. + */ +#define PTE_INDEX_SIZE 9 +#define PMD_INDEX_SIZE 7 +#define PUD_INDEX_SIZE 7 +#define PGD_INDEX_SIZE 9 + +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + +/* PMD_SHIFT determines what a second-level page table entry can map */ +#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* PTE bits */ +#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */ +#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */ +#define _PAGE_F_SECOND _PAGE_SECONDARY +#define _PAGE_F_GIX _PAGE_GROUP_IX + +/* PTE flags to conserve for HPTE identification */ +#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \ + _PAGE_SECONDARY | _PAGE_GROUP_IX) + +/* PAGE_MASK gives the right answer below, but only by accident */ +/* It should be preserving the high 48 bits and then specifically */ +/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */ +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \ + _PAGE_HPTEFLAGS) + +/* Bits to mask out from a PMD to get to the PTE page */ +#define PMD_MASKED_BITS 0 +/* Bits to mask out from a PUD to get to the PMD page */ +#define PUD_MASKED_BITS 0 +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0 + +/* shift to put page number into pte */ +#define PTE_RPN_SHIFT (17) + +#define __real_pte(e,p) ((real_pte_t)(e)) +#define __rpte_to_pte(r) (r) +#define __rpte_to_hidx(r,index) (pte_val((r)) >> 12) + +#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ + do { \ + index = 0; \ + shift = mmu_psize_defs[psize].shift; \ + +#define pte_iterate_hashed_end() } while(0) + +/* + * 4-level page tables related bits + */ + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (pgd_val(pgd) == 0) +#define pgd_present(pgd) (pgd_val(pgd) != 0) +#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) +#define pgd_page(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS) + +#define pud_offset(pgdp, addr) \ + (((pud_t *) pgd_page(*(pgdp))) + \ + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) + +#define pud_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) diff --git a/include/asm-ppc64/pgtable-64k.h b/include/asm-ppc64/pgtable-64k.h new file mode 100644 index 000000000000..c5f437c86b3c --- /dev/null +++ b/include/asm-ppc64/pgtable-64k.h @@ -0,0 +1,87 @@ +#include + + +#define PTE_INDEX_SIZE 12 +#define PMD_INDEX_SIZE 12 +#define PUD_INDEX_SIZE 0 +#define PGD_INDEX_SIZE 4 + +#define PTE_TABLE_SIZE (sizeof(real_pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + +/* PMD_SHIFT determines what a second-level page table entry can map */ +#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* PGDIR_SHIFT determines what a third-level page table entry can map */ +#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* Additional PTE bits (don't change without checking asm in hash_low.S) */ +#define _PAGE_HPTE_SUB 0x0ffff000 /* combo only: sub pages HPTE bits */ +#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */ +#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */ +#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */ +#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */ + +/* PTE flags to conserve for HPTE identification */ +#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_HPTE_SUB |\ + _PAGE_COMBO) + +/* Shift to put page number into pte. + * + * That gives us a max RPN of 32 bits, which means a max of 48 bits + * of addressable physical space. + * We could get 3 more bits here by setting PTE_RPN_SHIFT to 29 but + * 32 makes PTEs more readable for debugging for now :) + */ +#define PTE_RPN_SHIFT (32) +#define PTE_RPN_MAX (1UL << (64 - PTE_RPN_SHIFT)) +#define PTE_RPN_MASK (~((1UL<> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) +#define __rpte_to_pte(r) ((r).pte) +#define __rpte_sub_valid(rpte, index) \ + (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index))) + + +/* Trick: we set __end to va + 64k, which happens works for + * a 16M page as well as we want only one iteration + */ +#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ + do { \ + unsigned long __end = va + PAGE_SIZE; \ + unsigned __split = (psize == MMU_PAGE_4K || \ + psize == MMU_PAGE_64K_AP); \ + shift = mmu_psize_defs[psize].shift; \ + for (index = 0; va < __end; index++, va += (1 << shift)) { \ + if (!__split || __rpte_sub_valid(rpte, index)) do { \ + +#define pte_iterate_hashed_end() } while(0); } } while(0) + + +#endif /* __ASSEMBLY__ */ diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index 8c3f574046b6..fde93ec36abc 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h @@ -15,40 +15,11 @@ #include #endif /* __ASSEMBLY__ */ -/* - * Entries per page directory level. The PTE level must use a 64b record - * for each page table entry. The PMD and PGD level use a 32b record for - * each entry by assuming that each entry is page aligned. - */ -#define PTE_INDEX_SIZE 9 -#define PMD_INDEX_SIZE 7 -#define PUD_INDEX_SIZE 7 -#define PGD_INDEX_SIZE 9 - -#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) -#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) -#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) - -#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) -#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) -#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE) -#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) - -/* PMD_SHIFT determines what a second-level page table entry can map */ -#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) - -/* PUD_SHIFT determines what a third-level page table entry can map */ -#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) -#define PUD_SIZE (1UL << PUD_SHIFT) -#define PUD_MASK (~(PUD_SIZE-1)) - -/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ -#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#ifdef CONFIG_PPC_64K_PAGES +#include +#else +#include +#endif #define FIRST_USER_ADDRESS 0 @@ -75,8 +46,9 @@ #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* - * Bits in a linux-style PTE. These match the bits in the - * (hardware-defined) PowerPC PTE as closely as possible. + * Common bits in a linux-style PTE. These match the bits in the + * (hardware-defined) PowerPC PTE as closely as possible. Additional + * bits may be defined in pgtable-*.h */ #define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */ #define _PAGE_USER 0x0002 /* matches one of the PP bits */ @@ -91,15 +63,6 @@ #define _PAGE_RW 0x0200 /* software: user write access allowed */ #define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */ #define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */ -#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */ -#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */ -#define _PAGE_HUGE 0x10000 /* 16MB page */ -/* Bits 0x7000 identify the index within an HPT Group */ -#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX) -/* PAGE_MASK gives the right answer below, but only by accident */ -/* It should be preserving the high 48 bits and then specifically */ -/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HPTEFLAGS) #define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT) @@ -122,10 +85,10 @@ #define PAGE_AGP __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_NO_CACHE) #define HAVE_PAGE_AGP -/* - * This bit in a hardware PTE indicates that the page is *not* executable. - */ -#define HW_NO_EXEC _PAGE_EXEC +/* PTEIDX nibble */ +#define _PTEIDX_SECONDARY 0x8 +#define _PTEIDX_GROUP_IX 0x7 + /* * POWER4 and newer have per page execute protection, older chips can only @@ -164,21 +127,10 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) #endif /* __ASSEMBLY__ */ -/* shift to put page number into pte */ -#define PTE_SHIFT (17) - #ifdef CONFIG_HUGETLB_PAGE -#ifndef __ASSEMBLY__ -int hash_huge_page(struct mm_struct *mm, unsigned long access, - unsigned long ea, unsigned long vsid, int local); -#endif /* __ASSEMBLY__ */ - #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#else - -#define hash_huge_page(mm,a,ea,vsid,local) -1 #endif @@ -197,7 +149,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) pte_t pte; - pte_val(pte) = (pfn << PTE_SHIFT) | pgprot_val(pgprot); + pte_val(pte) = (pfn << PTE_RPN_SHIFT) | pgprot_val(pgprot); return pte; } @@ -209,30 +161,25 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) /* pte_clear moved to later in this file */ -#define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) +#define pte_pfn(x) ((unsigned long)((pte_val(x)>>PTE_RPN_SHIFT))) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) +#define pmd_set(pmdp, pmdval) (pmd_val(*(pmdp)) = (pmdval)) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) == 0) #define pmd_present(pmd) (pmd_val(pmd) != 0) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) -#define pmd_page_kernel(pmd) (pmd_val(pmd)) +#define pmd_page_kernel(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) -#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp)) +#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval)) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) ((pud_val(pud)) == 0) #define pud_present(pud) (pud_val(pud) != 0) #define pud_clear(pudp) (pud_val(*(pudp)) = 0) -#define pud_page(pud) (pud_val(pud)) +#define pud_page(pud) (pud_val(pud) & ~PUD_MASKED_BITS) #define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) (pgd_val(pgd) == 0) -#define pgd_present(pgd) (pgd_val(pgd) != 0) -#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) -#define pgd_page(pgd) (pgd_val(pgd)) /* * Find an entry in a page-table-directory. We combine the address region @@ -243,9 +190,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) - #define pmd_offset(pudp,addr) \ (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) @@ -271,7 +215,6 @@ static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC;} static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;} static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;} static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;} -static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE;} static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } @@ -286,7 +229,6 @@ static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(_PAGE_DIRTY); return pte; } static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } - static inline pte_t pte_mkread(pte_t pte) { pte_val(pte) |= _PAGE_USER; return pte; } static inline pte_t pte_mkexec(pte_t pte) { @@ -298,7 +240,7 @@ static inline pte_t pte_mkdirty(pte_t pte) { static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkhuge(pte_t pte) { - pte_val(pte) |= _PAGE_HUGE; return pte; } + return pte; } /* Atomic PTE updates */ static inline unsigned long pte_update(pte_t *p, unsigned long clr) @@ -321,11 +263,13 @@ static inline unsigned long pte_update(pte_t *p, unsigned long clr) /* PTE updating functions, this function puts the PTE in the * batch, doesn't actually triggers the hash flush immediately, * you need to call flush_tlb_pending() to do that. + * Pass -1 for "normal" size (4K or 64K) */ -extern void hpte_update(struct mm_struct *mm, unsigned long addr, unsigned long pte, - int wrprot); +extern void hpte_update(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long pte, int huge); -static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline int __ptep_test_and_clear_young(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { unsigned long old; @@ -333,7 +277,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned lon return 0; old = pte_update(ptep, _PAGE_ACCESSED); if (old & _PAGE_HASHPTE) { - hpte_update(mm, addr, old, 0); + hpte_update(mm, addr, ptep, old, 0); flush_tlb_pending(); } return (old & _PAGE_ACCESSED) != 0; @@ -351,7 +295,8 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned lon * moment we always flush but we need to fix hpte_update and test if the * optimisation is worth it. */ -static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { unsigned long old; @@ -359,7 +304,7 @@ static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned lon return 0; old = pte_update(ptep, _PAGE_DIRTY); if (old & _PAGE_HASHPTE) - hpte_update(mm, addr, old, 0); + hpte_update(mm, addr, ptep, old, 0); return (old & _PAGE_DIRTY) != 0; } #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY @@ -371,7 +316,8 @@ static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned lon }) #define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { unsigned long old; @@ -379,7 +325,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, return; old = pte_update(ptep, _PAGE_RW); if (old & _PAGE_HASHPTE) - hpte_update(mm, addr, old, 0); + hpte_update(mm, addr, ptep, old, 0); } /* @@ -408,21 +354,23 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, }) #define __HAVE_ARCH_PTEP_GET_AND_CLEAR -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { unsigned long old = pte_update(ptep, ~0UL); if (old & _PAGE_HASHPTE) - hpte_update(mm, addr, old, 0); + hpte_update(mm, addr, ptep, old, 0); return __pte(old); } -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t * ptep) +static inline void pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t * ptep) { unsigned long old = pte_update(ptep, ~0UL); if (old & _PAGE_HASHPTE) - hpte_update(mm, addr, old, 0); + hpte_update(mm, addr, ptep, old, 0); } /* @@ -435,7 +383,14 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); flush_tlb_pending(); } - *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); + pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); + +#ifdef CONFIG_PPC_64K_PAGES + if (mmu_virtual_psize != MMU_PAGE_64K) + pte = __pte(pte_val(pte) | _PAGE_COMBO); +#endif /* CONFIG_PPC_64K_PAGES */ + + *ptep = pte; } /* Set the dirty and/or accessed bits atomically in a linux PTE, this @@ -482,8 +437,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) #define pmd_ERROR(e) \ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) -#define pud_ERROR(e) \ - printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) @@ -509,12 +462,12 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); /* Encode and de-code a swap entry */ #define __swp_type(entry) (((entry).val >> 1) & 0x3f) #define __swp_offset(entry) ((entry).val >> 8) -#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) -#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> PTE_SHIFT }) -#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_SHIFT }) -#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_SHIFT) -#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_SHIFT)|_PAGE_FILE}) -#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_SHIFT) +#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)}) +#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT}) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT }) +#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_RPN_SHIFT) +#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE}) +#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT) /* * kern_addr_valid is intended to indicate whether an address is a valid @@ -532,29 +485,22 @@ void pgtable_cache_init(void); /* * find_linux_pte returns the address of a linux pte for a given * effective address and directory. If not found, it returns zero. - */ -static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea) + */static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea) { pgd_t *pg; pud_t *pu; pmd_t *pm; pte_t *pt = NULL; - pte_t pte; pg = pgdir + pgd_index(ea); if (!pgd_none(*pg)) { pu = pud_offset(pg, ea); if (!pud_none(*pu)) { pm = pmd_offset(pu, ea); - if (pmd_present(*pm)) { + if (pmd_present(*pm)) pt = pte_offset_kernel(pm, ea); - pte = *pt; - if (!pte_present(pte)) - pt = NULL; - } } } - return pt; } diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h index e8d0d2ab4c0f..bdb47174ff0e 100644 --- a/include/asm-ppc64/prom.h +++ b/include/asm-ppc64/prom.h @@ -188,6 +188,14 @@ extern struct device_node *of_get_next_child(const struct device_node *node, extern struct device_node *of_node_get(struct device_node *node); extern void of_node_put(struct device_node *node); +/* For scanning the flat device-tree at boot time */ +int __init of_scan_flat_dt(int (*it)(unsigned long node, + const char *uname, int depth, + void *data), + void *data); +void* __init of_get_flat_dt_prop(unsigned long node, const char *name, + unsigned long *size); + /* For updating the device tree at runtime */ extern void of_attach_node(struct device_node *); extern void of_detach_node(const struct device_node *); diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h index 99b8ca52f101..0cdd66c9f4b7 100644 --- a/include/asm-ppc64/system.h +++ b/include/asm-ppc64/system.h @@ -248,7 +248,7 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) } static __inline__ unsigned long -__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new) +__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new) { unsigned long prev; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c9b43360fd33..9a565808da3f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -103,6 +103,9 @@ static int __init hugetlb_init(void) unsigned long i; struct page *page; + if (HPAGE_SHIFT == 0) + return 0; + for (i = 0; i < MAX_NUMNODES; ++i) INIT_LIST_HEAD(&hugepage_freelists[i]);