From 83f45fc360c8e16a330474860ebda872d1384c8c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 09:10:18 +0200 Subject: [PATCH 001/640] drm: Don't grab an fb reference for the idr The current refcounting scheme is that the fb lookup idr also holds a reference. This works out nicely bacause thus far we've always explicitly cleaned up idr entries for framebuffers: - Userspace fbs get removed in the rmfb ioctl or when the drm file gets closed. - Kernel fbs (for fbdev emulation) get cleaned up by the driver code at module unload time. But now i915 also reconstructs the bios fbs for a smooth transition. And that fb is purely transitional and should get removed immmediately once all crtcs stop using it. Of course if the i915 fbdev code decides to reuse it as the main fbdev fb then it shouldn't be cleaned up, but in that case the fbdev code will grab it's own reference. The problem is now that we also want to register that takeover fb in the idr, so that userspace can do a smooth transition (animated maybe even!) itself. But currently we have no one who will clean up the idr reference once that fb isn't useful any more, and so essentially leak it. Fix this by no longer holding a full fb reference for the idr, but instead just have a weak reference using kref_get_unless_zero. But that requires us to synchronize and clean up with the idr and fb_lock in drm_framebuffer_free, so add that. It's a bit ugly that we have to unconditionally grab the fb_lock, but without that someone might creep through a race. This leak was caught by the fb leak check in drm_mode_config_cleanup. Originally the leak was introduced in commit 46f297fb83d4f9a6f6891964beb184664341a28b Author: Jesse Barnes Date: Fri Mar 7 08:57:48 2014 -0800 drm/i915: add plane_config fetching infrastructure v2 Cc: Jesse Barnes Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=77511 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 46 +++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index fa2be249999c..33ff631c8d23 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -515,9 +515,6 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, if (ret) goto out; - /* Grab the idr reference. */ - drm_framebuffer_reference(fb); - dev->mode_config.num_fb++; list_add(&fb->head, &dev->mode_config.fb_list); out: @@ -527,10 +524,34 @@ out: } EXPORT_SYMBOL(drm_framebuffer_init); +/* dev->mode_config.fb_lock must be held! */ +static void __drm_framebuffer_unregister(struct drm_device *dev, + struct drm_framebuffer *fb) +{ + mutex_lock(&dev->mode_config.idr_mutex); + idr_remove(&dev->mode_config.crtc_idr, fb->base.id); + mutex_unlock(&dev->mode_config.idr_mutex); + + fb->base.id = 0; +} + static void drm_framebuffer_free(struct kref *kref) { struct drm_framebuffer *fb = container_of(kref, struct drm_framebuffer, refcount); + struct drm_device *dev = fb->dev; + + /* + * The lookup idr holds a weak reference, which has not necessarily been + * removed at this point. Check for that. + */ + mutex_lock(&dev->mode_config.fb_lock); + if (fb->base.id) { + /* Mark fb as reaped and drop idr ref. */ + __drm_framebuffer_unregister(dev, fb); + } + mutex_unlock(&dev->mode_config.fb_lock); + fb->funcs->destroy(fb); } @@ -567,8 +588,10 @@ struct drm_framebuffer *drm_framebuffer_lookup(struct drm_device *dev, mutex_lock(&dev->mode_config.fb_lock); fb = __drm_framebuffer_lookup(dev, id); - if (fb) - drm_framebuffer_reference(fb); + if (fb) { + if (!kref_get_unless_zero(&fb->refcount)) + fb = NULL; + } mutex_unlock(&dev->mode_config.fb_lock); return fb; @@ -612,19 +635,6 @@ static void __drm_framebuffer_unreference(struct drm_framebuffer *fb) kref_put(&fb->refcount, drm_framebuffer_free_bug); } -/* dev->mode_config.fb_lock must be held! */ -static void __drm_framebuffer_unregister(struct drm_device *dev, - struct drm_framebuffer *fb) -{ - mutex_lock(&dev->mode_config.idr_mutex); - idr_remove(&dev->mode_config.crtc_idr, fb->base.id); - mutex_unlock(&dev->mode_config.idr_mutex); - - fb->base.id = 0; - - __drm_framebuffer_unreference(fb); -} - /** * drm_framebuffer_unregister_private - unregister a private fb from the lookup idr * @fb: fb to unregister From ea6763c104c93acb6554659fe4a3c9e9328a4b51 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 11:36:38 +0200 Subject: [PATCH 002/640] video/fbdev: Always built-in video= cmdline parsing In drm/i915 we want to get at the video= cmdline modes even when we don't have fbdev support enabled, so that users can always override the kernel's initial mode selection. But that gives us a direct depency upon the parsing code in the fbdev subsystem. Since it's so little code just extract these 2 functions and always build them in. Whiel at it fix the checkpatch fail in this code. v2: Also move fb_mode_option. Spotted by the kbuild. v3: Review from Geert: - Keep the old copyright notice from fb_mem.c, although I have no idea what exactly applies. - Only compile this when needed. Cc: Geert Uytterhoeven Cc: Plagniol-Villard Cc: Tomi Valkeinen Cc: linux-fbdev@vger.kernel.org Signed-off-by: Daniel Vetter -- I prefer if we can merge this through drm-next since we'll use it there in follow-up patches. -Daniel --- drivers/video/fbdev/Kconfig | 4 + drivers/video/fbdev/core/Makefile | 1 + drivers/video/fbdev/core/fb_cmdline.c | 110 ++++++++++++++++++++++++++ drivers/video/fbdev/core/fbmem.c | 92 --------------------- drivers/video/fbdev/core/modedb.c | 3 - 5 files changed, 115 insertions(+), 95 deletions(-) create mode 100644 drivers/video/fbdev/core/fb_cmdline.c diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 59c98bfd5a8a..f1458c95a688 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -4,6 +4,7 @@ menuconfig FB tristate "Support for frame buffer devices" + select FB_CMDLINE ---help--- The frame buffer device provides an abstraction for the graphics hardware. It represents the frame buffer of some video hardware and @@ -52,6 +53,9 @@ config FIRMWARE_EDID combination with certain motherboards and monitors are known to suffer from this problem. +config FB_CMDLINE + bool + config FB_DDC tristate depends on FB diff --git a/drivers/video/fbdev/core/Makefile b/drivers/video/fbdev/core/Makefile index fa306538dac2..67f28e20a892 100644 --- a/drivers/video/fbdev/core/Makefile +++ b/drivers/video/fbdev/core/Makefile @@ -1,4 +1,5 @@ obj-y += fb_notify.o +obj-$(CONFIG_FB_CMDLINE) += fb_cmdline.o obj-$(CONFIG_FB) += fb.o fb-y := fbmem.o fbmon.o fbcmap.o fbsysfs.o \ modedb.o fbcvt.o diff --git a/drivers/video/fbdev/core/fb_cmdline.c b/drivers/video/fbdev/core/fb_cmdline.c new file mode 100644 index 000000000000..39509ccd92f1 --- /dev/null +++ b/drivers/video/fbdev/core/fb_cmdline.c @@ -0,0 +1,110 @@ +/* + * linux/drivers/video/fb_cmdline.c + * + * Copyright (C) 2014 Intel Corp + * Copyright (C) 1994 Martin Schaller + * + * 2001 - Documented with DocBook + * - Brad Douglas + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + * Authors: + * Vetter + */ +#include +#include + +static char *video_options[FB_MAX] __read_mostly; +static int ofonly __read_mostly; + +const char *fb_mode_option; +EXPORT_SYMBOL_GPL(fb_mode_option); + +/** + * fb_get_options - get kernel boot parameters + * @name: framebuffer name as it would appear in + * the boot parameter line + * (video=:) + * @option: the option will be stored here + * + * NOTE: Needed to maintain backwards compatibility + */ +int fb_get_options(const char *name, char **option) +{ + char *opt, *options = NULL; + int retval = 0; + int name_len = strlen(name), i; + + if (name_len && ofonly && strncmp(name, "offb", 4)) + retval = 1; + + if (name_len && !retval) { + for (i = 0; i < FB_MAX; i++) { + if (video_options[i] == NULL) + continue; + if (!video_options[i][0]) + continue; + opt = video_options[i]; + if (!strncmp(name, opt, name_len) && + opt[name_len] == ':') + options = opt + name_len + 1; + } + } + /* No match, pass global option */ + if (!options && option && fb_mode_option) + options = kstrdup(fb_mode_option, GFP_KERNEL); + if (options && !strncmp(options, "off", 3)) + retval = 1; + + if (option) + *option = options; + + return retval; +} +EXPORT_SYMBOL(fb_get_options); + +/** + * video_setup - process command line options + * @options: string of options + * + * Process command line options for frame buffer subsystem. + * + * NOTE: This function is a __setup and __init function. + * It only stores the options. Drivers have to call + * fb_get_options() as necessary. + * + * Returns zero. + * + */ +static int __init video_setup(char *options) +{ + int i, global = 0; + + if (!options || !*options) + global = 1; + + if (!global && !strncmp(options, "ofonly", 6)) { + ofonly = 1; + global = 1; + } + + if (!global && !strchr(options, ':')) { + fb_mode_option = options; + global = 1; + } + + if (!global) { + for (i = 0; i < FB_MAX; i++) { + if (video_options[i] == NULL) { + video_options[i] = options; + break; + } + } + } + + return 1; +} +__setup("video=", video_setup); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index b5e85f6c1c26..0705d8883ede 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1908,96 +1908,4 @@ int fb_new_modelist(struct fb_info *info) return err; } -static char *video_options[FB_MAX] __read_mostly; -static int ofonly __read_mostly; - -/** - * fb_get_options - get kernel boot parameters - * @name: framebuffer name as it would appear in - * the boot parameter line - * (video=:) - * @option: the option will be stored here - * - * NOTE: Needed to maintain backwards compatibility - */ -int fb_get_options(const char *name, char **option) -{ - char *opt, *options = NULL; - int retval = 0; - int name_len = strlen(name), i; - - if (name_len && ofonly && strncmp(name, "offb", 4)) - retval = 1; - - if (name_len && !retval) { - for (i = 0; i < FB_MAX; i++) { - if (video_options[i] == NULL) - continue; - if (!video_options[i][0]) - continue; - opt = video_options[i]; - if (!strncmp(name, opt, name_len) && - opt[name_len] == ':') - options = opt + name_len + 1; - } - } - /* No match, pass global option */ - if (!options && option && fb_mode_option) - options = kstrdup(fb_mode_option, GFP_KERNEL); - if (options && !strncmp(options, "off", 3)) - retval = 1; - - if (option) - *option = options; - - return retval; -} -EXPORT_SYMBOL(fb_get_options); - -#ifndef MODULE -/** - * video_setup - process command line options - * @options: string of options - * - * Process command line options for frame buffer subsystem. - * - * NOTE: This function is a __setup and __init function. - * It only stores the options. Drivers have to call - * fb_get_options() as necessary. - * - * Returns zero. - * - */ -static int __init video_setup(char *options) -{ - int i, global = 0; - - if (!options || !*options) - global = 1; - - if (!global && !strncmp(options, "ofonly", 6)) { - ofonly = 1; - global = 1; - } - - if (!global && !strchr(options, ':')) { - fb_mode_option = options; - global = 1; - } - - if (!global) { - for (i = 0; i < FB_MAX; i++) { - if (video_options[i] == NULL) { - video_options[i] = options; - break; - } - - } - } - - return 1; -} -__setup("video=", video_setup); -#endif - MODULE_LICENSE("GPL"); diff --git a/drivers/video/fbdev/core/modedb.c b/drivers/video/fbdev/core/modedb.c index a9a907c440d7..388f7971494b 100644 --- a/drivers/video/fbdev/core/modedb.c +++ b/drivers/video/fbdev/core/modedb.c @@ -29,9 +29,6 @@ #define DPRINTK(fmt, args...) #endif -const char *fb_mode_option; -EXPORT_SYMBOL_GPL(fb_mode_option); - /* * Standard video mode definitions (taken from XFree86) */ From eaf99c749d43ae74ac7ffece5512f3c73f01dfd2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 Aug 2014 10:08:32 +0200 Subject: [PATCH 003/640] drm: Perform cmdline mode parsing during connector initialisation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915.ko has a custom fbdev initialisation routine that aims to preserve the current mode set by the BIOS, unless overruled by the user. The user's wishes are determined by what, if any, mode is specified on the command line (via the video= parameter). However, that command line mode is first parsed by drm_fb_helper_initial_config() which is called after i915.ko's custom initial_config() as a fallback method. So in order for us to honour it, we need to move the cmdline parser earlier. If we perform the connector cmdline parsing as soon as we initialise the connector, that cmdline mode and forced status is then available even if the fbdev helper is not compiled in or never called. We also then expose the cmdline user mode in the connector mode lists. v2: Rebase after connector->name upheaval. v3: Adapt mga200 to look for the cmdline mode in the new place. Nicely simplifies things while at that. v4: Fix checkpatch. v5: Select FB_CMDLINE to adapt to the changed fbdev patch. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73154 Signed-off-by: Chris Wilson (v2) Cc: Jesse Barnes Cc: Ville Syrjälä Cc: Daniel Vetter Reviewed-by: Jesse Barnes (v2) Cc: dri-devel@lists.freedesktop.org Cc: Julia Lemire Cc: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/Kconfig | 1 + drivers/gpu/drm/drm_crtc.c | 55 ++++++++++++++++++++++ drivers/gpu/drm/drm_fb_helper.c | 64 ++------------------------ drivers/gpu/drm/drm_modes.c | 1 + drivers/gpu/drm/drm_probe_helper.c | 17 +++++++ drivers/gpu/drm/mgag200/mgag200_mode.c | 21 ++------- include/drm/drm_crtc.h | 1 + include/drm/drm_fb_helper.h | 1 - 8 files changed, 83 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 31894c8c1773..367f5dd23291 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -8,6 +8,7 @@ menuconfig DRM tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)" depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && MMU && HAS_DMA select HDMI + select FB_CMDLINE select I2C select I2C_ALGOBIT select DMA_SHARED_BUFFER diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 33ff631c8d23..66d3bfb8d264 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -862,6 +862,59 @@ static void drm_mode_remove(struct drm_connector *connector, drm_mode_destroy(connector->dev, mode); } +/** + * drm_connector_get_cmdline_mode - reads the user's cmdline mode + * @connector: connector to quwery + * @mode: returned mode + * + * The kernel supports per-connector configration of its consoles through + * use of the video= parameter. This function parses that option and + * extracts the user's specified mode (or enable/disable status) for a + * particular connector. This is typically only used during the early fbdev + * setup. + */ +static void drm_connector_get_cmdline_mode(struct drm_connector *connector) +{ + struct drm_cmdline_mode *mode = &connector->cmdline_mode; + char *option = NULL; + + if (fb_get_options(connector->name, &option)) + return; + + if (!drm_mode_parse_command_line_for_connector(option, + connector, + mode)) + return; + + if (mode->force) { + const char *s; + + switch (mode->force) { + case DRM_FORCE_OFF: + s = "OFF"; + break; + case DRM_FORCE_ON_DIGITAL: + s = "ON - dig"; + break; + default: + case DRM_FORCE_ON: + s = "ON"; + break; + } + + DRM_INFO("forcing %s connector %s\n", connector->name, s); + connector->force = mode->force; + } + + DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n", + connector->name, + mode->xres, mode->yres, + mode->refresh_specified ? mode->refresh : 60, + mode->rb ? " reduced blanking" : "", + mode->margins ? " with margins" : "", + mode->interlace ? " interlaced" : ""); +} + /** * drm_connector_init - Init a preallocated connector * @dev: DRM device @@ -914,6 +967,8 @@ int drm_connector_init(struct drm_device *dev, connector->edid_blob_ptr = NULL; connector->status = connector_status_unknown; + drm_connector_get_cmdline_mode(connector); + list_add_tail(&connector->head, &dev->mode_config.connector_list); dev->mode_config.num_connector++; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 3144db9dc0f1..3a6b6635e3f5 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -171,60 +171,6 @@ int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper, } EXPORT_SYMBOL(drm_fb_helper_remove_one_connector); -static int drm_fb_helper_parse_command_line(struct drm_fb_helper *fb_helper) -{ - struct drm_fb_helper_connector *fb_helper_conn; - int i; - - for (i = 0; i < fb_helper->connector_count; i++) { - struct drm_cmdline_mode *mode; - struct drm_connector *connector; - char *option = NULL; - - fb_helper_conn = fb_helper->connector_info[i]; - connector = fb_helper_conn->connector; - mode = &fb_helper_conn->cmdline_mode; - - /* do something on return - turn off connector maybe */ - if (fb_get_options(connector->name, &option)) - continue; - - if (drm_mode_parse_command_line_for_connector(option, - connector, - mode)) { - if (mode->force) { - const char *s; - switch (mode->force) { - case DRM_FORCE_OFF: - s = "OFF"; - break; - case DRM_FORCE_ON_DIGITAL: - s = "ON - dig"; - break; - default: - case DRM_FORCE_ON: - s = "ON"; - break; - } - - DRM_INFO("forcing %s connector %s\n", - connector->name, s); - connector->force = mode->force; - } - - DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n", - connector->name, - mode->xres, mode->yres, - mode->refresh_specified ? mode->refresh : 60, - mode->rb ? " reduced blanking" : "", - mode->margins ? " with margins" : "", - mode->interlace ? " interlaced" : ""); - } - - } - return 0; -} - static void drm_fb_helper_save_lut_atomic(struct drm_crtc *crtc, struct drm_fb_helper *helper) { uint16_t *r_base, *g_base, *b_base; @@ -1013,7 +959,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_connector *fb_helper_conn = fb_helper->connector_info[i]; struct drm_cmdline_mode *cmdline_mode; - cmdline_mode = &fb_helper_conn->cmdline_mode; + cmdline_mode = &fb_helper_conn->connector->cmdline_mode; if (cmdline_mode->bpp_specified) { switch (cmdline_mode->bpp) { @@ -1260,9 +1206,7 @@ EXPORT_SYMBOL(drm_has_preferred_mode); static bool drm_has_cmdline_mode(struct drm_fb_helper_connector *fb_connector) { - struct drm_cmdline_mode *cmdline_mode; - cmdline_mode = &fb_connector->cmdline_mode; - return cmdline_mode->specified; + return fb_connector->connector->cmdline_mode.specified; } struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *fb_helper_conn, @@ -1272,7 +1216,7 @@ struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *f struct drm_display_mode *mode = NULL; bool prefer_non_interlace; - cmdline_mode = &fb_helper_conn->cmdline_mode; + cmdline_mode = &fb_helper_conn->connector->cmdline_mode; if (cmdline_mode->specified == false) return mode; @@ -1657,8 +1601,6 @@ bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel) struct drm_device *dev = fb_helper->dev; int count = 0; - drm_fb_helper_parse_command_line(fb_helper); - mutex_lock(&dev->mode_config.mutex); count = drm_fb_helper_probe_connector_modes(fb_helper, dev->mode_config.max_width, diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index bedf1894e17e..d1b7d2006529 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1259,6 +1259,7 @@ drm_mode_create_from_cmdline_mode(struct drm_device *dev, if (!mode) return NULL; + mode->type |= DRM_MODE_TYPE_USERDEF; drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V); return mode; } diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index db7d250f7ac7..6857e9ad6339 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -82,6 +82,22 @@ static void drm_mode_validate_flag(struct drm_connector *connector, return; } +static int drm_helper_probe_add_cmdline_mode(struct drm_connector *connector) +{ + struct drm_display_mode *mode; + + if (!connector->cmdline_mode.specified) + return 0; + + mode = drm_mode_create_from_cmdline_mode(connector->dev, + &connector->cmdline_mode); + if (mode == NULL) + return 0; + + drm_mode_probed_add(connector, mode); + return 1; +} + static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connector *connector, uint32_t maxX, uint32_t maxY, bool merge_type_bits) { @@ -141,6 +157,7 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect if (count == 0 && connector->status == connector_status_connected) count = drm_add_modes_noedid(connector, 1024, 768); + count += drm_helper_probe_add_cmdline_mode(connector); if (count == 0) goto prune; diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index 45f04dea0ac2..83485ab81ce8 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -1483,11 +1483,7 @@ static int mga_vga_mode_valid(struct drm_connector *connector, { struct drm_device *dev = connector->dev; struct mga_device *mdev = (struct mga_device*)dev->dev_private; - struct mga_fbdev *mfbdev = mdev->mfbdev; - struct drm_fb_helper *fb_helper = &mfbdev->helper; - struct drm_fb_helper_connector *fb_helper_conn = NULL; int bpp = 32; - int i = 0; if (IS_G200_SE(mdev)) { if (mdev->unique_rev_id == 0x01) { @@ -1537,21 +1533,14 @@ static int mga_vga_mode_valid(struct drm_connector *connector, } /* Validate the mode input by the user */ - for (i = 0; i < fb_helper->connector_count; i++) { - if (fb_helper->connector_info[i]->connector == connector) { - /* Found the helper for this connector */ - fb_helper_conn = fb_helper->connector_info[i]; - if (fb_helper_conn->cmdline_mode.specified) { - if (fb_helper_conn->cmdline_mode.bpp_specified) { - bpp = fb_helper_conn->cmdline_mode.bpp; - } - } - } + if (connector->cmdline_mode.specified) { + if (connector->cmdline_mode.bpp_specified) + bpp = connector->cmdline_mode.bpp; } if ((mode->hdisplay * mode->vdisplay * (bpp/8)) > mdev->mc.vram_size) { - if (fb_helper_conn) - fb_helper_conn->cmdline_mode.specified = false; + if (connector->cmdline_mode.specified) + connector->cmdline_mode.specified = false; return MODE_BAD; } diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index f1105d0da059..c530b4920a09 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -548,6 +548,7 @@ struct drm_connector { void *helper_private; /* forced on connector */ + struct drm_cmdline_mode cmdline_mode; enum drm_connector_force force; bool override_edid; uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER]; diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index bfd329d613c4..f4ad254e3488 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -77,7 +77,6 @@ struct drm_fb_helper_funcs { struct drm_fb_helper_connector { struct drm_connector *connector; - struct drm_cmdline_mode cmdline_mode; }; struct drm_fb_helper { From ddde43711fdde505ac413102faa2352704cd858a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:02:50 +0300 Subject: [PATCH 004/640] drm: Warn when leaking flip events on close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Warn when there are events on the file_priv->event_list just before file_priv gets freed. This can occur if the driver doesn't clean up pending page flip events in ->preclose(). Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/drm_fops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c index 8f91062db5b6..0fa4dadac4c6 100644 --- a/drivers/gpu/drm/drm_fops.c +++ b/drivers/gpu/drm/drm_fops.c @@ -529,6 +529,8 @@ int drm_release(struct inode *inode, struct file *filp) if (drm_core_check_feature(dev, DRIVER_PRIME)) drm_prime_destroy_file_private(&file_priv->prime); + WARN_ON(!list_empty(&file_priv->event_list)); + put_pid(file_priv->pid); kfree(file_priv); From e6ae8687a87b1fe5c25e824c8ad300f5587eb622 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 6 Aug 2014 13:16:59 -0400 Subject: [PATCH 005/640] drm: idiot-proof vblank After spending slightly more time than I'd care to admit debugging the various and presumably spectacular way things fail when you pass too low a value to drm_vblank_init() (thanks console-lock for not letting me see the carnage!), I decided it might be a good idea to add some sanity checking. Signed-off-by: Rob Clark Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 0de123afdb34..6f16a104d6d0 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -730,6 +730,8 @@ EXPORT_SYMBOL(drm_get_last_vbltimestamp); */ u32 drm_vblank_count(struct drm_device *dev, int crtc) { + if (WARN_ON(crtc >= dev->num_crtcs)) + return 0; return atomic_read(&dev->vblank[crtc].count); } EXPORT_SYMBOL(drm_vblank_count); @@ -752,6 +754,9 @@ u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc, { u32 cur_vblank; + if (WARN_ON(crtc >= dev->num_crtcs)) + return 0; + /* Read timestamp from slot of _vblank_time ringbuffer * that corresponds to current vblank count. Retry if * count has incremented during readout. This works like @@ -927,6 +932,9 @@ int drm_vblank_get(struct drm_device *dev, int crtc) unsigned long irqflags; int ret = 0; + if (WARN_ON(crtc >= dev->num_crtcs)) + return -EINVAL; + spin_lock_irqsave(&dev->vbl_lock, irqflags); /* Going from 0->1 means we have to enable interrupts again */ if (atomic_add_return(1, &dev->vblank[crtc].refcount) == 1) { @@ -975,6 +983,9 @@ void drm_vblank_put(struct drm_device *dev, int crtc) { BUG_ON(atomic_read(&dev->vblank[crtc].refcount) == 0); + if (WARN_ON(crtc >= dev->num_crtcs)) + return; + /* Last user schedules interrupt disable */ if (atomic_dec_and_test(&dev->vblank[crtc].refcount) && (drm_vblank_offdelay > 0)) @@ -1019,6 +1030,9 @@ void drm_vblank_off(struct drm_device *dev, int crtc) unsigned long irqflags; unsigned int seq; + if (WARN_ON(crtc >= dev->num_crtcs)) + return; + spin_lock_irqsave(&dev->vbl_lock, irqflags); vblank_disable_and_save(dev, crtc); wake_up(&dev->vblank[crtc].queue); @@ -1078,6 +1092,9 @@ void drm_vblank_on(struct drm_device *dev, int crtc) { unsigned long irqflags; + if (WARN_ON(crtc >= dev->num_crtcs)) + return; + spin_lock_irqsave(&dev->vbl_lock, irqflags); /* re-enable interrupts if there's are users left */ if (atomic_read(&dev->vblank[crtc].refcount) != 0) @@ -1131,6 +1148,10 @@ void drm_vblank_pre_modeset(struct drm_device *dev, int crtc) /* vblank is not initialized (IRQ not installed ?), or has been freed */ if (!dev->num_crtcs) return; + + if (WARN_ON(crtc >= dev->num_crtcs)) + return; + /* * To avoid all the problems that might happen if interrupts * were enabled/disabled around or between these calls, we just @@ -1439,6 +1460,9 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) if (!dev->num_crtcs) return false; + if (WARN_ON(crtc >= dev->num_crtcs)) + return false; + /* Need timestamp lock to prevent concurrent execution with * vblank enable/disable, as this would cause inconsistent * or corrupted timestamps and vblank counts. From 7ffd7a68511c710b84db3548a1997fd2625f580a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:44 +0300 Subject: [PATCH 006/640] drm: Always reject drm_vblank_get() after drm_vblank_off() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure drm_vblank_get() never succeeds when called between drm_vblank_off() and drm_vblank_on(). Borrow a trick from the old drm_vblank_{pre,post}_modeset() functions and just bump the refcount in drm_vblank_off() and drop it in drm_vblank_on(). When drm_vblank_get() encounters a >0 refcount and the vblank interrupt is already disabled it will simply return -EINVAL. Hopefully the use of inmodeset won't conflict badly with drm_vblank_{pre,post}_modeset(). For i915 there's a window between drm_vblank_off() and marking the crtc as inactive where the current code still allows drm_vblank_get(). v2: Describe what drm_vblank_get() does to explain how a simple refcount bump manages to fix things (Daniel) Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 0de123afdb34..b16a63622bad 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1039,6 +1039,15 @@ void drm_vblank_off(struct drm_device *dev, int crtc) } spin_unlock(&dev->event_lock); + /* + * Prevent subsequent drm_vblank_get() from re-enabling + * the vblank interrupt by bumping the refcount. + */ + if (!dev->vblank[crtc].inmodeset) { + atomic_inc(&dev->vblank[crtc].refcount); + dev->vblank[crtc].inmodeset = 1; + } + spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } EXPORT_SYMBOL(drm_vblank_off); @@ -1079,6 +1088,11 @@ void drm_vblank_on(struct drm_device *dev, int crtc) unsigned long irqflags; spin_lock_irqsave(&dev->vbl_lock, irqflags); + /* Drop our private "prevent drm_vblank_get" refcount */ + if (dev->vblank[crtc].inmodeset) { + atomic_dec(&dev->vblank[crtc].refcount); + dev->vblank[crtc].inmodeset = 0; + } /* re-enable interrupts if there's are users left */ if (atomic_read(&dev->vblank[crtc].refcount) != 0) WARN_ON(drm_vblank_enable(dev, crtc)); From 08c71e5e817a956389af5da5e99ab3e26d5c673d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:45 +0300 Subject: [PATCH 007/640] drm/i915: Warn if drm_vblank_get() still works after drm_vblank_off() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Drop the drm_vblank_off() (Daniel) Use drm_crtc_vblank_{get,put}() Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 99eb7cad62a8..8f6b932d8e79 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1341,6 +1341,12 @@ static void assert_sprites_disabled(struct drm_i915_private *dev_priv, } } +static void assert_vblank_disabled(struct drm_crtc *crtc) +{ + if (WARN_ON(drm_crtc_vblank_get(crtc) == 0)) + drm_crtc_vblank_put(crtc); +} + static void ibx_assert_pch_refclk_enabled(struct drm_i915_private *dev_priv) { u32 val; @@ -3905,6 +3911,8 @@ static void intel_crtc_enable_planes(struct drm_crtc *crtc) int pipe = intel_crtc->pipe; int plane = intel_crtc->plane; + assert_vblank_disabled(crtc); + drm_vblank_on(dev, pipe); intel_enable_primary_hw_plane(dev_priv, plane, pipe); @@ -3954,6 +3962,8 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc) intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe)); drm_vblank_off(dev, pipe); + + assert_vblank_disabled(crtc); } static void ironlake_crtc_enable(struct drm_crtc *crtc) From 844b03f27739135fe1fed2fef06da0ffc4c7a081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:46 +0300 Subject: [PATCH 008/640] drm: Don't clear vblank timestamps when vblank interrupt is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clearing the timestamps causes us to send zeroed timestamps to userspace if they get sent out in response to the drm_vblank_off(). It's better to send the very latest timestamp and count instead. Testcase: igt/kms_flip/modeset-vs-vblank-race Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index b16a63622bad..65d2da9b604b 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -55,14 +55,6 @@ */ #define DRM_REDUNDANT_VBLIRQ_THRESH_NS 1000000 -/* - * Clear vblank timestamp buffer for a crtc. - */ -static void clear_vblank_timestamps(struct drm_device *dev, int crtc) -{ - memset(dev->vblank[crtc].time, 0, sizeof(dev->vblank[crtc].time)); -} - /* * Disable vblank irq's on crtc, make sure that last vblank count * of hardware and corresponding consistent software vblank counter @@ -131,9 +123,6 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) smp_mb__after_atomic(); } - /* Invalidate all timestamps while vblank irq's are off. */ - clear_vblank_timestamps(dev, crtc); - spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); } From 13b030af54a5e307cbcccdf5479873fbc4b7f185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:47 +0300 Subject: [PATCH 009/640] drm: Move drm_update_vblank_count() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move drm_update_vblank_count() to avoid forward a declaration. No functional change. Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 128 +++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 65d2da9b604b..af965174b083 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -55,6 +55,70 @@ */ #define DRM_REDUNDANT_VBLIRQ_THRESH_NS 1000000 +/** + * drm_update_vblank_count - update the master vblank counter + * @dev: DRM device + * @crtc: counter to update + * + * Call back into the driver to update the appropriate vblank counter + * (specified by @crtc). Deal with wraparound, if it occurred, and + * update the last read value so we can deal with wraparound on the next + * call if necessary. + * + * Only necessary when going from off->on, to account for frames we + * didn't get an interrupt for. + * + * Note: caller must hold dev->vbl_lock since this reads & writes + * device vblank fields. + */ +static void drm_update_vblank_count(struct drm_device *dev, int crtc) +{ + u32 cur_vblank, diff, tslot, rc; + struct timeval t_vblank; + + /* + * Interrupts were disabled prior to this call, so deal with counter + * wrap if needed. + * NOTE! It's possible we lost a full dev->max_vblank_count events + * here if the register is small or we had vblank interrupts off for + * a long time. + * + * We repeat the hardware vblank counter & timestamp query until + * we get consistent results. This to prevent races between gpu + * updating its hardware counter while we are retrieving the + * corresponding vblank timestamp. + */ + do { + cur_vblank = dev->driver->get_vblank_counter(dev, crtc); + rc = drm_get_last_vbltimestamp(dev, crtc, &t_vblank, 0); + } while (cur_vblank != dev->driver->get_vblank_counter(dev, crtc)); + + /* Deal with counter wrap */ + diff = cur_vblank - dev->vblank[crtc].last; + if (cur_vblank < dev->vblank[crtc].last) { + diff += dev->max_vblank_count; + + DRM_DEBUG("last_vblank[%d]=0x%x, cur_vblank=0x%x => diff=0x%x\n", + crtc, dev->vblank[crtc].last, cur_vblank, diff); + } + + DRM_DEBUG("enabling vblank interrupts on crtc %d, missed %d\n", + crtc, diff); + + /* Reinitialize corresponding vblank timestamp if high-precision query + * available. Skip this step if query unsupported or failed. Will + * reinitialize delayed at next vblank interrupt in that case. + */ + if (rc) { + tslot = atomic_read(&dev->vblank[crtc].count) + diff; + vblanktimestamp(dev, crtc, tslot) = t_vblank; + } + + smp_mb__before_atomic(); + atomic_add(diff, &dev->vblank[crtc].count); + smp_mb__after_atomic(); +} + /* * Disable vblank irq's on crtc, make sure that last vblank count * of hardware and corresponding consistent software vblank counter @@ -798,70 +862,6 @@ void drm_send_vblank_event(struct drm_device *dev, int crtc, } EXPORT_SYMBOL(drm_send_vblank_event); -/** - * drm_update_vblank_count - update the master vblank counter - * @dev: DRM device - * @crtc: counter to update - * - * Call back into the driver to update the appropriate vblank counter - * (specified by @crtc). Deal with wraparound, if it occurred, and - * update the last read value so we can deal with wraparound on the next - * call if necessary. - * - * Only necessary when going from off->on, to account for frames we - * didn't get an interrupt for. - * - * Note: caller must hold dev->vbl_lock since this reads & writes - * device vblank fields. - */ -static void drm_update_vblank_count(struct drm_device *dev, int crtc) -{ - u32 cur_vblank, diff, tslot, rc; - struct timeval t_vblank; - - /* - * Interrupts were disabled prior to this call, so deal with counter - * wrap if needed. - * NOTE! It's possible we lost a full dev->max_vblank_count events - * here if the register is small or we had vblank interrupts off for - * a long time. - * - * We repeat the hardware vblank counter & timestamp query until - * we get consistent results. This to prevent races between gpu - * updating its hardware counter while we are retrieving the - * corresponding vblank timestamp. - */ - do { - cur_vblank = dev->driver->get_vblank_counter(dev, crtc); - rc = drm_get_last_vbltimestamp(dev, crtc, &t_vblank, 0); - } while (cur_vblank != dev->driver->get_vblank_counter(dev, crtc)); - - /* Deal with counter wrap */ - diff = cur_vblank - dev->vblank[crtc].last; - if (cur_vblank < dev->vblank[crtc].last) { - diff += dev->max_vblank_count; - - DRM_DEBUG("last_vblank[%d]=0x%x, cur_vblank=0x%x => diff=0x%x\n", - crtc, dev->vblank[crtc].last, cur_vblank, diff); - } - - DRM_DEBUG("enabling vblank interrupts on crtc %d, missed %d\n", - crtc, diff); - - /* Reinitialize corresponding vblank timestamp if high-precision query - * available. Skip this step if query unsupported or failed. Will - * reinitialize delayed at next vblank interrupt in that case. - */ - if (rc) { - tslot = atomic_read(&dev->vblank[crtc].count) + diff; - vblanktimestamp(dev, crtc, tslot) = t_vblank; - } - - smp_mb__before_atomic(); - atomic_add(diff, &dev->vblank[crtc].count); - smp_mb__after_atomic(); -} - /** * drm_vblank_enable - enable the vblank interrupt on a CRTC * @dev: DRM device From 812e7465a7decf3cca0b5f71977a25eecd9626a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:48 +0300 Subject: [PATCH 010/640] drm: Have the vblank counter account for the time between vblank irq disable and drm_vblank_off() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the vblank irq has already been disabled (via the disable timer) when we call drm_vblank_off() sample the counter and timestamp one last time. This will make the sure that the user space visible counter will account for time between vblank irq disable and drm_vblank_off(). Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index af965174b083..1f86f6c6ecc6 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -140,6 +140,19 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) */ spin_lock_irqsave(&dev->vblank_time_lock, irqflags); + /* + * If the vblank interrupt was already disbled update the count + * and timestamp to maintain the appearance that the counter + * has been ticking all along until this time. This makes the + * count account for the entire time between drm_vblank_on() and + * drm_vblank_off(). + */ + if (!dev->vblank[crtc].enabled) { + drm_update_vblank_count(dev, crtc); + spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); + return; + } + dev->driver->disable_vblank(dev, crtc); dev->vblank[crtc].enabled = false; From f8ad028cc033f75fc479ca1c30e2ea4ba56e5269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:49 +0300 Subject: [PATCH 011/640] drm: Avoid random vblank counter jumps if the hardware counter has been reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When drm_vblank_on() is called the hardware vblank counter may have been reset, so we can't trust that the old values sampled prior to drm_vblank_off() have anything to do with the new values. So update the .last count in drm_vblank_on() to make the first drm_vblank_enable() consider that as the reference point. This will correct the user space visible counter to account for the time between drm_vblank_on() and the first drm_vblank_enable() calls. For extra safety subtract one from the .last count in drm_vblank_on() to make sure that user space will never see the same counter value before and after modeset. Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 1f86f6c6ecc6..fc1525a499d9 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1095,6 +1095,18 @@ void drm_vblank_on(struct drm_device *dev, int crtc) atomic_dec(&dev->vblank[crtc].refcount); dev->vblank[crtc].inmodeset = 0; } + + /* + * sample the current counter to avoid random jumps + * when drm_vblank_enable() applies the diff + * + * -1 to make sure user will never see the same + * vblank counter value before and after a modeset + */ + dev->vblank[crtc].last = + (dev->driver->get_vblank_counter(dev, crtc) - 1) & + dev->max_vblank_count; + /* re-enable interrupts if there's are users left */ if (atomic_read(&dev->vblank[crtc].refcount) != 0) WARN_ON(drm_vblank_enable(dev, crtc)); From 8a51d5bef07f1c8c59de20089fb27ea39d395f1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:50 +0300 Subject: [PATCH 012/640] drm: Reduce the amount of dev->vblank[crtc] in the code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Declare a local struct drm_vblank_crtc * and use that instead of having to do dig it out via 'dev->vblank[crtc]' everywhere. Performed with the following coccinelle incantation, and a few manual whitespace cleanups: @@ identifier func,member; expression num_crtcs; struct drm_device *dev; unsigned int crtc; @@ func (...) { + struct drm_vblank_crtc *vblank; ... if (crtc >= num_crtcs) return ...; + vblank = &dev->vblank[crtc]; <+... ( - dev->vblank[crtc].member + vblank->member | - &(dev->vblank[crtc]) + vblank ) ...+> } @@ struct drm_device *dev; int crtc; identifier member; expression num_crtcs; @@ for (crtc = 0; crtc < num_crtcs; crtc++) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; + <+... ( - dev->vblank[crtc].member + vblank->member | - &(dev->vblank[crtc]) + vblank ) ...+> } @@ identifier func,member; @@ func (struct drm_device *dev, int crtc, ...) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; <+... ( - dev->vblank[crtc].member + vblank->member | - &(dev->vblank[crtc]) + vblank ) ...+> } v2: Rebased Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 134 ++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index fc1525a499d9..b4460bf0e0e4 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -73,6 +73,7 @@ */ static void drm_update_vblank_count(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; u32 cur_vblank, diff, tslot, rc; struct timeval t_vblank; @@ -94,12 +95,12 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc) } while (cur_vblank != dev->driver->get_vblank_counter(dev, crtc)); /* Deal with counter wrap */ - diff = cur_vblank - dev->vblank[crtc].last; - if (cur_vblank < dev->vblank[crtc].last) { + diff = cur_vblank - vblank->last; + if (cur_vblank < vblank->last) { diff += dev->max_vblank_count; DRM_DEBUG("last_vblank[%d]=0x%x, cur_vblank=0x%x => diff=0x%x\n", - crtc, dev->vblank[crtc].last, cur_vblank, diff); + crtc, vblank->last, cur_vblank, diff); } DRM_DEBUG("enabling vblank interrupts on crtc %d, missed %d\n", @@ -110,12 +111,12 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc) * reinitialize delayed at next vblank interrupt in that case. */ if (rc) { - tslot = atomic_read(&dev->vblank[crtc].count) + diff; + tslot = atomic_read(&vblank->count) + diff; vblanktimestamp(dev, crtc, tslot) = t_vblank; } smp_mb__before_atomic(); - atomic_add(diff, &dev->vblank[crtc].count); + atomic_add(diff, &vblank->count); smp_mb__after_atomic(); } @@ -127,6 +128,7 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc) */ static void vblank_disable_and_save(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; unsigned long irqflags; u32 vblcount; s64 diff_ns; @@ -147,14 +149,14 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) * count account for the entire time between drm_vblank_on() and * drm_vblank_off(). */ - if (!dev->vblank[crtc].enabled) { + if (!vblank->enabled) { drm_update_vblank_count(dev, crtc); spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); return; } dev->driver->disable_vblank(dev, crtc); - dev->vblank[crtc].enabled = false; + vblank->enabled = false; /* No further vblank irq's will be processed after * this point. Get current hardware vblank count and @@ -169,9 +171,9 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) * delayed gpu counter increment. */ do { - dev->vblank[crtc].last = dev->driver->get_vblank_counter(dev, crtc); + vblank->last = dev->driver->get_vblank_counter(dev, crtc); vblrc = drm_get_last_vbltimestamp(dev, crtc, &tvblank, 0); - } while (dev->vblank[crtc].last != dev->driver->get_vblank_counter(dev, crtc) && (--count) && vblrc); + } while (vblank->last != dev->driver->get_vblank_counter(dev, crtc) && (--count) && vblrc); if (!count) vblrc = 0; @@ -179,7 +181,7 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) /* Compute time difference to stored timestamp of last vblank * as updated by last invocation of drm_handle_vblank() in vblank irq. */ - vblcount = atomic_read(&dev->vblank[crtc].count); + vblcount = atomic_read(&vblank->count); diff_ns = timeval_to_ns(&tvblank) - timeval_to_ns(&vblanktimestamp(dev, crtc, vblcount)); @@ -196,7 +198,7 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) * hope for the best. */ if ((vblrc > 0) && (abs64(diff_ns) > 1000000)) { - atomic_inc(&dev->vblank[crtc].count); + atomic_inc(&vblank->count); smp_mb__after_atomic(); } @@ -236,8 +238,10 @@ void drm_vblank_cleanup(struct drm_device *dev) return; for (crtc = 0; crtc < dev->num_crtcs; crtc++) { - del_timer_sync(&dev->vblank[crtc].disable_timer); - vblank_disable_fn((unsigned long)&dev->vblank[crtc]); + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; + + del_timer_sync(&vblank->disable_timer); + vblank_disable_fn((unsigned long)vblank); } kfree(dev->vblank); @@ -270,11 +274,13 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs) goto err; for (i = 0; i < num_crtcs; i++) { - dev->vblank[i].dev = dev; - dev->vblank[i].crtc = i; - init_waitqueue_head(&dev->vblank[i].queue); - setup_timer(&dev->vblank[i].disable_timer, vblank_disable_fn, - (unsigned long)&dev->vblank[i]); + struct drm_vblank_crtc *vblank = &dev->vblank[i]; + + vblank->dev = dev; + vblank->crtc = i; + init_waitqueue_head(&vblank->queue); + setup_timer(&vblank->disable_timer, vblank_disable_fn, + (unsigned long)vblank); } DRM_INFO("Supports vblank timestamp caching Rev 2 (21.10.2013).\n"); @@ -426,9 +432,11 @@ int drm_irq_uninstall(struct drm_device *dev) if (dev->num_crtcs) { spin_lock_irqsave(&dev->vbl_lock, irqflags); for (i = 0; i < dev->num_crtcs; i++) { - wake_up(&dev->vblank[i].queue); - dev->vblank[i].enabled = false; - dev->vblank[i].last = + struct drm_vblank_crtc *vblank = &dev->vblank[i]; + + wake_up(&vblank->queue); + vblank->enabled = false; + vblank->last = dev->driver->get_vblank_counter(dev, i); } spin_unlock_irqrestore(&dev->vbl_lock, irqflags); @@ -796,7 +804,9 @@ EXPORT_SYMBOL(drm_get_last_vbltimestamp); */ u32 drm_vblank_count(struct drm_device *dev, int crtc) { - return atomic_read(&dev->vblank[crtc].count); + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; + + return atomic_read(&vblank->count); } EXPORT_SYMBOL(drm_vblank_count); @@ -816,6 +826,7 @@ EXPORT_SYMBOL(drm_vblank_count); u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc, struct timeval *vblanktime) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; u32 cur_vblank; /* Read timestamp from slot of _vblank_time ringbuffer @@ -824,10 +835,10 @@ u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc, * a seqlock. */ do { - cur_vblank = atomic_read(&dev->vblank[crtc].count); + cur_vblank = atomic_read(&vblank->count); *vblanktime = vblanktimestamp(dev, crtc, cur_vblank); smp_rmb(); - } while (cur_vblank != atomic_read(&dev->vblank[crtc].count)); + } while (cur_vblank != atomic_read(&vblank->count)); return cur_vblank; } @@ -882,13 +893,14 @@ EXPORT_SYMBOL(drm_send_vblank_event); */ static int drm_vblank_enable(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; int ret = 0; assert_spin_locked(&dev->vbl_lock); spin_lock(&dev->vblank_time_lock); - if (!dev->vblank[crtc].enabled) { + if (!vblank->enabled) { /* * Enable vblank irqs under vblank_time_lock protection. * All vblank count & timestamp updates are held off @@ -899,9 +911,9 @@ static int drm_vblank_enable(struct drm_device *dev, int crtc) ret = dev->driver->enable_vblank(dev, crtc); DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n", crtc, ret); if (ret) - atomic_dec(&dev->vblank[crtc].refcount); + atomic_dec(&vblank->refcount); else { - dev->vblank[crtc].enabled = true; + vblank->enabled = true; drm_update_vblank_count(dev, crtc); } } @@ -926,16 +938,17 @@ static int drm_vblank_enable(struct drm_device *dev, int crtc) */ int drm_vblank_get(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; unsigned long irqflags; int ret = 0; spin_lock_irqsave(&dev->vbl_lock, irqflags); /* Going from 0->1 means we have to enable interrupts again */ - if (atomic_add_return(1, &dev->vblank[crtc].refcount) == 1) { + if (atomic_add_return(1, &vblank->refcount) == 1) { ret = drm_vblank_enable(dev, crtc); } else { - if (!dev->vblank[crtc].enabled) { - atomic_dec(&dev->vblank[crtc].refcount); + if (!vblank->enabled) { + atomic_dec(&vblank->refcount); ret = -EINVAL; } } @@ -975,12 +988,14 @@ EXPORT_SYMBOL(drm_crtc_vblank_get); */ void drm_vblank_put(struct drm_device *dev, int crtc) { - BUG_ON(atomic_read(&dev->vblank[crtc].refcount) == 0); + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; + + BUG_ON(atomic_read(&vblank->refcount) == 0); /* Last user schedules interrupt disable */ - if (atomic_dec_and_test(&dev->vblank[crtc].refcount) && + if (atomic_dec_and_test(&vblank->refcount) && (drm_vblank_offdelay > 0)) - mod_timer(&dev->vblank[crtc].disable_timer, + mod_timer(&vblank->disable_timer, jiffies + ((drm_vblank_offdelay * HZ)/1000)); } EXPORT_SYMBOL(drm_vblank_put); @@ -1016,6 +1031,7 @@ EXPORT_SYMBOL(drm_crtc_vblank_put); */ void drm_vblank_off(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; struct drm_pending_vblank_event *e, *t; struct timeval now; unsigned long irqflags; @@ -1023,7 +1039,7 @@ void drm_vblank_off(struct drm_device *dev, int crtc) spin_lock_irqsave(&dev->vbl_lock, irqflags); vblank_disable_and_save(dev, crtc); - wake_up(&dev->vblank[crtc].queue); + wake_up(&vblank->queue); /* Send any queued vblank events, lest the natives grow disquiet */ seq = drm_vblank_count_and_time(dev, crtc, &now); @@ -1045,9 +1061,9 @@ void drm_vblank_off(struct drm_device *dev, int crtc) * Prevent subsequent drm_vblank_get() from re-enabling * the vblank interrupt by bumping the refcount. */ - if (!dev->vblank[crtc].inmodeset) { - atomic_inc(&dev->vblank[crtc].refcount); - dev->vblank[crtc].inmodeset = 1; + if (!vblank->inmodeset) { + atomic_inc(&vblank->refcount); + vblank->inmodeset = 1; } spin_unlock_irqrestore(&dev->vbl_lock, irqflags); @@ -1087,13 +1103,14 @@ EXPORT_SYMBOL(drm_crtc_vblank_off); */ void drm_vblank_on(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; unsigned long irqflags; spin_lock_irqsave(&dev->vbl_lock, irqflags); /* Drop our private "prevent drm_vblank_get" refcount */ - if (dev->vblank[crtc].inmodeset) { - atomic_dec(&dev->vblank[crtc].refcount); - dev->vblank[crtc].inmodeset = 0; + if (vblank->inmodeset) { + atomic_dec(&vblank->refcount); + vblank->inmodeset = 0; } /* @@ -1103,12 +1120,12 @@ void drm_vblank_on(struct drm_device *dev, int crtc) * -1 to make sure user will never see the same * vblank counter value before and after a modeset */ - dev->vblank[crtc].last = + vblank->last = (dev->driver->get_vblank_counter(dev, crtc) - 1) & dev->max_vblank_count; /* re-enable interrupts if there's are users left */ - if (atomic_read(&dev->vblank[crtc].refcount) != 0) + if (atomic_read(&vblank->refcount) != 0) WARN_ON(drm_vblank_enable(dev, crtc)); spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } @@ -1156,6 +1173,8 @@ EXPORT_SYMBOL(drm_crtc_vblank_on); */ void drm_vblank_pre_modeset(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; + /* vblank is not initialized (IRQ not installed ?), or has been freed */ if (!dev->num_crtcs) return; @@ -1166,10 +1185,10 @@ void drm_vblank_pre_modeset(struct drm_device *dev, int crtc) * to avoid corrupting the count if multiple, mismatch calls occur), * so that interrupts remain enabled in the interim. */ - if (!dev->vblank[crtc].inmodeset) { - dev->vblank[crtc].inmodeset = 0x1; + if (!vblank->inmodeset) { + vblank->inmodeset = 0x1; if (drm_vblank_get(dev, crtc) == 0) - dev->vblank[crtc].inmodeset |= 0x2; + vblank->inmodeset |= 0x2; } } EXPORT_SYMBOL(drm_vblank_pre_modeset); @@ -1184,21 +1203,22 @@ EXPORT_SYMBOL(drm_vblank_pre_modeset); */ void drm_vblank_post_modeset(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; unsigned long irqflags; /* vblank is not initialized (IRQ not installed ?), or has been freed */ if (!dev->num_crtcs) return; - if (dev->vblank[crtc].inmodeset) { + if (vblank->inmodeset) { spin_lock_irqsave(&dev->vbl_lock, irqflags); dev->vblank_disable_allowed = true; spin_unlock_irqrestore(&dev->vbl_lock, irqflags); - if (dev->vblank[crtc].inmodeset & 0x2) + if (vblank->inmodeset & 0x2) drm_vblank_put(dev, crtc); - dev->vblank[crtc].inmodeset = 0; + vblank->inmodeset = 0; } } EXPORT_SYMBOL(drm_vblank_post_modeset); @@ -1333,6 +1353,7 @@ err_put: int drm_wait_vblank(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct drm_vblank_crtc *vblank; union drm_wait_vblank *vblwait = data; int ret; unsigned int flags, seq, crtc, high_crtc; @@ -1362,6 +1383,8 @@ int drm_wait_vblank(struct drm_device *dev, void *data, if (crtc >= dev->num_crtcs) return -EINVAL; + vblank = &dev->vblank[crtc]; + ret = drm_vblank_get(dev, crtc); if (ret) { DRM_DEBUG("failed to acquire vblank counter, %d\n", ret); @@ -1394,11 +1417,11 @@ int drm_wait_vblank(struct drm_device *dev, void *data, DRM_DEBUG("waiting on vblank count %d, crtc %d\n", vblwait->request.sequence, crtc); - dev->vblank[crtc].last_wait = vblwait->request.sequence; - DRM_WAIT_ON(ret, dev->vblank[crtc].queue, 3 * HZ, + vblank->last_wait = vblwait->request.sequence; + DRM_WAIT_ON(ret, vblank->queue, 3 * HZ, (((drm_vblank_count(dev, crtc) - vblwait->request.sequence) <= (1 << 23)) || - !dev->vblank[crtc].enabled || + !vblank->enabled || !dev->irq_enabled)); if (ret != -EINTR) { @@ -1459,6 +1482,7 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc) */ bool drm_handle_vblank(struct drm_device *dev, int crtc) { + struct drm_vblank_crtc *vblank = &dev->vblank[crtc]; u32 vblcount; s64 diff_ns; struct timeval tvblank; @@ -1474,7 +1498,7 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) spin_lock_irqsave(&dev->vblank_time_lock, irqflags); /* Vblank irq handling disabled. Nothing to do. */ - if (!dev->vblank[crtc].enabled) { + if (!vblank->enabled) { spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); return false; } @@ -1484,7 +1508,7 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) */ /* Get current timestamp and count. */ - vblcount = atomic_read(&dev->vblank[crtc].count); + vblcount = atomic_read(&vblank->count); drm_get_last_vbltimestamp(dev, crtc, &tvblank, DRM_CALLED_FROM_VBLIRQ); /* Compute time difference to timestamp of last vblank */ @@ -1508,14 +1532,14 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) * the timestamp computed above. */ smp_mb__before_atomic(); - atomic_inc(&dev->vblank[crtc].count); + atomic_inc(&vblank->count); smp_mb__after_atomic(); } else { DRM_DEBUG("crtc %d: Redundant vblirq ignored. diff_ns = %d\n", crtc, (int) diff_ns); } - wake_up(&dev->vblank[crtc].queue); + wake_up(&vblank->queue); drm_handle_vblank_events(dev, crtc); spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); From 56cc279b29c7b204fe7d0943509ae209b8b128db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:51 +0300 Subject: [PATCH 013/640] drm: Fix deadlock between event_lock and vbl_lock/vblank_time_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently both drm_irq.c and several drivers call drm_vblank_put() while holding event_lock. Now that drm_vblank_put() can disable the vblank interrupt directly it may need to grab vbl_lock and vblank_time_lock. That causes deadlocks since we take the locks in the opposite order in two places in drm_irq.c. So let's make sure the locking order is always event_lock->vbl_lock->vblank_time_lock. In drm_vblank_off() pull up event_lock from underneath vbl_lock. Hold the event_lock across the whole operation to make sure we only send out the events that were on the queue when we disabled the interrupt, and not ones that got added just after (assuming drm_vblank_on() already managed to get called somewhere between). To sort the other deadlock pull the event_lock out from drm_handle_vblank_events() into drm_handle_vblank() to be taken outside vblank_time_lock. Add the appropriate assert_spin_locked() to drm_handle_vblank_events(). Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 47 +++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index b4460bf0e0e4..9353609c6770 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1037,14 +1037,25 @@ void drm_vblank_off(struct drm_device *dev, int crtc) unsigned long irqflags; unsigned int seq; - spin_lock_irqsave(&dev->vbl_lock, irqflags); + spin_lock_irqsave(&dev->event_lock, irqflags); + + spin_lock(&dev->vbl_lock); vblank_disable_and_save(dev, crtc); wake_up(&vblank->queue); + /* + * Prevent subsequent drm_vblank_get() from re-enabling + * the vblank interrupt by bumping the refcount. + */ + if (!vblank->inmodeset) { + atomic_inc(&vblank->refcount); + vblank->inmodeset = 1; + } + spin_unlock(&dev->vbl_lock); + /* Send any queued vblank events, lest the natives grow disquiet */ seq = drm_vblank_count_and_time(dev, crtc, &now); - spin_lock(&dev->event_lock); list_for_each_entry_safe(e, t, &dev->vblank_event_list, base.link) { if (e->pipe != crtc) continue; @@ -1055,18 +1066,7 @@ void drm_vblank_off(struct drm_device *dev, int crtc) drm_vblank_put(dev, e->pipe); send_vblank_event(dev, e, seq, &now); } - spin_unlock(&dev->event_lock); - - /* - * Prevent subsequent drm_vblank_get() from re-enabling - * the vblank interrupt by bumping the refcount. - */ - if (!vblank->inmodeset) { - atomic_inc(&vblank->refcount); - vblank->inmodeset = 1; - } - - spin_unlock_irqrestore(&dev->vbl_lock, irqflags); + spin_unlock_irqrestore(&dev->event_lock, irqflags); } EXPORT_SYMBOL(drm_vblank_off); @@ -1446,12 +1446,11 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc) { struct drm_pending_vblank_event *e, *t; struct timeval now; - unsigned long flags; unsigned int seq; - seq = drm_vblank_count_and_time(dev, crtc, &now); + assert_spin_locked(&dev->event_lock); - spin_lock_irqsave(&dev->event_lock, flags); + seq = drm_vblank_count_and_time(dev, crtc, &now); list_for_each_entry_safe(e, t, &dev->vblank_event_list, base.link) { if (e->pipe != crtc) @@ -1467,8 +1466,6 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc) send_vblank_event(dev, e, seq, &now); } - spin_unlock_irqrestore(&dev->event_lock, flags); - trace_drm_vblank_event(crtc, seq); } @@ -1491,15 +1488,18 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) if (!dev->num_crtcs) return false; + spin_lock_irqsave(&dev->event_lock, irqflags); + /* Need timestamp lock to prevent concurrent execution with * vblank enable/disable, as this would cause inconsistent * or corrupted timestamps and vblank counts. */ - spin_lock_irqsave(&dev->vblank_time_lock, irqflags); + spin_lock(&dev->vblank_time_lock); /* Vblank irq handling disabled. Nothing to do. */ if (!vblank->enabled) { - spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); + spin_unlock(&dev->vblank_time_lock); + spin_unlock_irqrestore(&dev->event_lock, irqflags); return false; } @@ -1539,10 +1539,13 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc) crtc, (int) diff_ns); } + spin_unlock(&dev->vblank_time_lock); + wake_up(&vblank->queue); drm_handle_vblank_events(dev, crtc); - spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); + spin_unlock_irqrestore(&dev->event_lock, irqflags); + return true; } EXPORT_SYMBOL(drm_handle_vblank); From ffe7c73a8d4f0caeebd5d220ddbf7126a4daca1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:52 +0300 Subject: [PATCH 014/640] drm: Fix race between drm_vblank_off() and drm_queue_vblank_event() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently it's possible that the following will happen: 1. drm_wait_vblank() calls drm_vblank_get() 2. drm_vblank_off() gets called 3. drm_wait_vblank() calls drm_queue_vblank_event() which adds the event to the queue event though vblank interrupts are currently disabled (and may not be re-enabled ever again). To fix the problem, add another vblank->enabled check into drm_queue_vblank_event(). drm_vblank_off() holds event_lock around the vblank disable, so no further locking needs to be added to drm_queue_vblank_event(). vblank disable from another source is not possible since drm_wait_vblank() already holds a vblank reference. Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 9353609c6770..b2428cb0c64d 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1270,6 +1270,7 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe, union drm_wait_vblank *vblwait, struct drm_file *file_priv) { + struct drm_vblank_crtc *vblank = &dev->vblank[pipe]; struct drm_pending_vblank_event *e; struct timeval now; unsigned long flags; @@ -1293,6 +1294,18 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe, spin_lock_irqsave(&dev->event_lock, flags); + /* + * drm_vblank_off() might have been called after we called + * drm_vblank_get(). drm_vblank_off() holds event_lock + * around the vblank disable, so no need for further locking. + * The reference from drm_vblank_get() protects against + * vblank disable from another source. + */ + if (!vblank->enabled) { + ret = -EINVAL; + goto err_unlock; + } + if (file_priv->event_space < sizeof e->event) { ret = -EBUSY; goto err_unlock; From 4ed0ce3d0bccd74416ba6beb33a8a79d1617e97b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:53 +0300 Subject: [PATCH 015/640] drm: Disable vblank interrupt immediately when drm_vblank_offdelay<0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make drm_vblank_put() disable the vblank interrupt immediately when the refcount drops to zero and drm_vblank_offdelay<0. v2: Preserve the current drm_vblank_offdelay==0 'never disable' behaviur Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 1 + drivers/gpu/drm/drm_drv.c | 4 ++-- drivers/gpu/drm/drm_irq.c | 11 +++++++---- include/drm/drmP.h | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 1d3756d3176c..55923d00bd52 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3386,6 +3386,7 @@ void (*disable_vblank) (struct drm_device *dev, int crtc); by scheduling a timer. The delay is accessible through the vblankoffdelay module parameter or the drm_vblank_offdelay global variable and expressed in milliseconds. Its default value is 5000 ms. + Zero means never disable, and a negative value means disable immediately. When a vertical blanking interrupt occurs drivers only need to call the diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 92bc6b1d9646..db03e16ca817 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -39,7 +39,7 @@ unsigned int drm_debug = 0; /* 1 to enable debug output */ EXPORT_SYMBOL(drm_debug); -unsigned int drm_vblank_offdelay = 5000; /* Default to 5000 msecs. */ +int drm_vblank_offdelay = 5000; /* Default to 5000 msecs. */ unsigned int drm_timestamp_precision = 20; /* Default to 20 usecs. */ @@ -53,7 +53,7 @@ MODULE_AUTHOR(CORE_AUTHOR); MODULE_DESCRIPTION(CORE_DESC); MODULE_LICENSE("GPL and additional rights"); MODULE_PARM_DESC(debug, "Enable debug output"); -MODULE_PARM_DESC(vblankoffdelay, "Delay until vblank irq auto-disable [msecs]"); +MODULE_PARM_DESC(vblankoffdelay, "Delay until vblank irq auto-disable [msecs] (0: never disable, <0: disable immediately)"); MODULE_PARM_DESC(timestamp_precision_usec, "Max. error on timestamps [usecs]"); MODULE_PARM_DESC(timestamp_monotonic, "Use monotonic timestamps"); diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index b2428cb0c64d..99145c4d536b 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -993,10 +993,13 @@ void drm_vblank_put(struct drm_device *dev, int crtc) BUG_ON(atomic_read(&vblank->refcount) == 0); /* Last user schedules interrupt disable */ - if (atomic_dec_and_test(&vblank->refcount) && - (drm_vblank_offdelay > 0)) - mod_timer(&vblank->disable_timer, - jiffies + ((drm_vblank_offdelay * HZ)/1000)); + if (atomic_dec_and_test(&vblank->refcount)) { + if (drm_vblank_offdelay < 0) + vblank_disable_fn((unsigned long)vblank); + else if (drm_vblank_offdelay > 0) + mod_timer(&vblank->disable_timer, + jiffies + ((drm_vblank_offdelay * HZ)/1000)); + } } EXPORT_SYMBOL(drm_vblank_put); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index a57646382086..24b32d453c60 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1345,7 +1345,7 @@ extern void drm_put_dev(struct drm_device *dev); extern void drm_unplug_dev(struct drm_device *dev); extern unsigned int drm_debug; -extern unsigned int drm_vblank_offdelay; +extern int drm_vblank_offdelay; extern unsigned int drm_timestamp_precision; extern unsigned int drm_timestamp_monotonic; From 00185e667009dda907887a4f84fbd02c6e651a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:54 +0300 Subject: [PATCH 016/640] drm: Add dev->vblank_disable_immediate flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a flag to drm_device which will cause the vblank code to bypass the disable timer and always disable the vblank interrupt immediately when the last reference is dropped. v2: Add some notes about the flag to the kernel doc Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 6 ++++++ drivers/gpu/drm/drm_irq.c | 2 +- include/drm/drmP.h | 10 ++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 55923d00bd52..583edbffff1a 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3387,6 +3387,12 @@ void (*disable_vblank) (struct drm_device *dev, int crtc); module parameter or the drm_vblank_offdelay global variable and expressed in milliseconds. Its default value is 5000 ms. Zero means never disable, and a negative value means disable immediately. + Drivers may override the behaviour by setting the + drm_device + vblank_disable_immediate flag, which when set + causes vblank interrupts to be disabled immediately regardless of the + drm_vblank_offdelay value. The flag should only be set if there's a + properly working hardware vblank counter present. When a vertical blanking interrupt occurs drivers only need to call the diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 99145c4d536b..8dbcc3f892d5 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -994,7 +994,7 @@ void drm_vblank_put(struct drm_device *dev, int crtc) /* Last user schedules interrupt disable */ if (atomic_dec_and_test(&vblank->refcount)) { - if (drm_vblank_offdelay < 0) + if (dev->vblank_disable_immediate || drm_vblank_offdelay < 0) vblank_disable_fn((unsigned long)vblank); else if (drm_vblank_offdelay > 0) mod_timer(&vblank->disable_timer, diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 24b32d453c60..17a5c10474bd 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1074,6 +1074,16 @@ struct drm_device { */ bool vblank_disable_allowed; + /* + * If true, vblank interrupt will be disabled immediately when the + * refcount drops to zero, as opposed to via the vblank disable + * timer. + * This can be set to true it the hardware has a working vblank + * counter and the driver uses drm_vblank_on() and drm_vblank_off() + * appropriately. + */ + bool vblank_disable_immediate; + /* array of size num_crtcs */ struct drm_vblank_crtc *vblank; From 21da27005f79d72499bb809616b15fd2c5c15319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:55 +0300 Subject: [PATCH 017/640] drm/i915: Opt out of vblank disable timer on >gen2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the vblank races are plugged, we can opt out of using the vblank disable timer and just let vblank interrupts get disabled immediately when the last reference is dropped. Gen2 is the exception since it has no hardware frame counter. Reviewed-by: Matt Roper Reviewed-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6ef9d6fabf80..845f0f6c1eeb 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4680,6 +4680,14 @@ void intel_irq_init(struct drm_device *dev) dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */ } + /* + * Opt out of the vblank disable timer on everything except gen2. + * Gen2 doesn't have a hardware frame counter and so depends on + * vblank interrupts to produce sane vblank seuquence numbers. + */ + if (!IS_GEN2(dev)) + dev->vblank_disable_immediate = true; + if (drm_core_check_feature(dev, DRIVER_MODESET)) { dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; From cd19e52aee922ffe5c50b6ed67acd58cc1b2738b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:56 +0300 Subject: [PATCH 018/640] drm: Kick start vblank interrupts at drm_vblank_on() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the user is interested in getting accurate vblank sequence numbers all the time they may disable the vblank disable timer entirely. In that case it seems appropriate to kick start the vblank interrupts already from drm_vblank_on(). v2: Adapt to the drm_vblank_offdelay ==0 vs <0 changes Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 8dbcc3f892d5..af33df1adc6d 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1126,9 +1126,12 @@ void drm_vblank_on(struct drm_device *dev, int crtc) vblank->last = (dev->driver->get_vblank_counter(dev, crtc) - 1) & dev->max_vblank_count; - - /* re-enable interrupts if there's are users left */ - if (atomic_read(&vblank->refcount) != 0) + /* + * re-enable interrupts if there are users left, or the + * user wishes vblank interrupts to be enabled all the time. + */ + if (atomic_read(&vblank->refcount) != 0 || + (!dev->vblank_disable_immediate && drm_vblank_offdelay == 0)) WARN_ON(drm_vblank_enable(dev, crtc)); spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } From d297e1037327884fe9545f434d720fd3e8f18c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:50:01 +0300 Subject: [PATCH 019/640] drm/i915: Update scanline_offset only for active crtcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit update_scanline_offset() in intel_sanitize_crtc() was supposed to be called only for active crtcs. But due to some underrun patches it now gets updated for all crtcs on gmch platforms. Move the update_scanline_offset() to the very beginning of intel_sanitize_crtc() where we update the vblank state. This seems like a better place anyway since the scanline offset ought to be up to date before we might need to consult it. So before any vblanky stuff happens. Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8f6b932d8e79..de40a44e0ca0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12719,9 +12719,10 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) I915_WRITE(reg, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK); /* restore vblank interrupts to correct state */ - if (crtc->active) + if (crtc->active) { + update_scanline_offset(crtc); drm_vblank_on(dev, crtc->pipe); - else + } else drm_vblank_off(dev, crtc->pipe); /* We need to sanitize the plane -> pipe mapping first because this will @@ -12820,8 +12821,6 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) */ crtc->cpu_fifo_underrun_disabled = true; crtc->pch_fifo_underrun_disabled = true; - - update_scanline_offset(crtc); } } From 96a9fdd778037799f63c9ae272ec915dd3ad83dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:50:02 +0300 Subject: [PATCH 020/640] drm: Fix confusing debug message in drm_update_vblank_count() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that drm_update_vblank_count() can be called even when we're not about to enable the vblank interrupts we shouldn't print debug messages stating otherwise. Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index af33df1adc6d..62dee812d28a 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -103,7 +103,7 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc) crtc, vblank->last, cur_vblank, diff); } - DRM_DEBUG("enabling vblank interrupts on crtc %d, missed %d\n", + DRM_DEBUG("updating vblank count on crtc %d, missed %d\n", crtc, diff); /* Reinitialize corresponding vblank timestamp if high-precision query From c50d7521617d823d769b280bc499e19e364434ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:49:59 +0300 Subject: [PATCH 021/640] drm: Store the vblank timestamp when adjusting the counter during disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During vblank disable the code tries to guess based on the timestamps whether we just missed one vblank or not. And if so it increments the counter. However it forgets to store the new timestamp to the approriate slot in our timestamp ring buffer. So anyone querying the timestamp for the resulting sequence number would get a stale timestamp. Fix it up by storing the new timestamp. Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 62dee812d28a..aa9b06495067 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -198,6 +198,13 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc) * hope for the best. */ if ((vblrc > 0) && (abs64(diff_ns) > 1000000)) { + /* Store new timestamp in ringbuffer. */ + vblanktimestamp(dev, crtc, vblcount + 1) = tvblank; + + /* Increment cooked vblank count. This also atomically commits + * the timestamp computed above. + */ + smp_mb__before_atomic(); atomic_inc(&vblank->count); smp_mb__after_atomic(); } From 79a093aea44f11fda0a5b4dbe4c1e29b2f586f4e Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 6 Aug 2014 03:22:44 +0200 Subject: [PATCH 022/640] drm: Remove drm_vblank_cleanup from drm_vblank_init error path. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_vblank_cleanup() would operate on non-existent dev->vblank data structure, as failure to allocate that data structure is what triggers the error path in the first place. Signed-off-by: Mario Kleiner Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index aa9b06495067..6473089e5fd3 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -303,7 +303,7 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs) return 0; err: - drm_vblank_cleanup(dev); + dev->num_crtcs = 0; return ret; } EXPORT_SYMBOL(drm_vblank_init); From f769cd247d2be5af377adf82882eddd1dce183c4 Mon Sep 17 00:00:00 2001 From: Vandana Kannan Date: Tue, 5 Aug 2014 07:51:22 -0700 Subject: [PATCH 023/640] drm/i915: Set M2_N2 registers during mode set For Gen < 8, set M2_N2 registers on every mode set. This is required to make sure M2_N2 registers are set during boot, resume from sleep for cross- checking the state. The register is set only if DRRS is supported. v2: Patch rebased v3: Daniel's review comments - Removed HAS_DRRS(dev) and added bool has_drrs to pipe_config to track drrs support v4: Jesse's review comments - Made changes to set m2_n2 in intel_dp_set_m_n() Signed-off-by: Vandana Kannan Cc: Daniel Vetter Cc: Jesse Barnes Signed-off-by: Rodrigo Vivi Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 28 +++++++++++++++++++++------- drivers/gpu/drm/i915/intel_dp.c | 18 +++--------------- drivers/gpu/drm/i915/intel_drv.h | 3 ++- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 018fb7222f60..acee1416eb93 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -91,11 +91,11 @@ static int intel_framebuffer_init(struct drm_device *dev, struct intel_framebuffer *ifb, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_i915_gem_object *obj); -static void intel_dp_set_m_n(struct intel_crtc *crtc); static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc); static void intel_set_pipe_timings(struct intel_crtc *intel_crtc); static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc, - struct intel_link_m_n *m_n); + struct intel_link_m_n *m_n, + struct intel_link_m_n *m2_n2); static void ironlake_set_pipeconf(struct drm_crtc *crtc); static void haswell_set_pipeconf(struct drm_crtc *crtc); static void intel_set_pipe_csc(struct drm_crtc *crtc); @@ -3980,7 +3980,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) if (intel_crtc->config.has_pch_encoder) { intel_cpu_transcoder_set_m_n(intel_crtc, - &intel_crtc->config.fdi_m_n); + &intel_crtc->config.fdi_m_n, NULL); } ironlake_set_pipeconf(crtc); @@ -4093,7 +4093,7 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) if (intel_crtc->config.has_pch_encoder) { intel_cpu_transcoder_set_m_n(intel_crtc, - &intel_crtc->config.fdi_m_n); + &intel_crtc->config.fdi_m_n, NULL); } haswell_set_pipeconf(crtc); @@ -5509,7 +5509,8 @@ static void intel_pch_transcoder_set_m_n(struct intel_crtc *crtc, } static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc, - struct intel_link_m_n *m_n) + struct intel_link_m_n *m_n, + struct intel_link_m_n *m2_n2) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -5521,6 +5522,18 @@ static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc, I915_WRITE(PIPE_DATA_N1(transcoder), m_n->gmch_n); I915_WRITE(PIPE_LINK_M1(transcoder), m_n->link_m); I915_WRITE(PIPE_LINK_N1(transcoder), m_n->link_n); + /* M2_N2 registers to be set only for gen < 8 (M2_N2 available + * for gen < 8) and if DRRS is supported (to make sure the + * registers are not unnecessarily accessed). + */ + if (m2_n2 && INTEL_INFO(dev)->gen < 8 && + crtc->config.has_drrs) { + I915_WRITE(PIPE_DATA_M2(transcoder), + TU_SIZE(m2_n2->tu) | m2_n2->gmch_m); + I915_WRITE(PIPE_DATA_N2(transcoder), m2_n2->gmch_n); + I915_WRITE(PIPE_LINK_M2(transcoder), m2_n2->link_m); + I915_WRITE(PIPE_LINK_N2(transcoder), m2_n2->link_n); + } } else { I915_WRITE(PIPE_DATA_M_G4X(pipe), TU_SIZE(m_n->tu) | m_n->gmch_m); I915_WRITE(PIPE_DATA_N_G4X(pipe), m_n->gmch_n); @@ -5529,12 +5542,13 @@ static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc, } } -static void intel_dp_set_m_n(struct intel_crtc *crtc) +void intel_dp_set_m_n(struct intel_crtc *crtc) { if (crtc->config.has_pch_encoder) intel_pch_transcoder_set_m_n(crtc, &crtc->config.dp_m_n); else - intel_cpu_transcoder_set_m_n(crtc, &crtc->config.dp_m_n); + intel_cpu_transcoder_set_m_n(crtc, &crtc->config.dp_m_n, + &crtc->config.dp_m2_n2); } static void vlv_update_pll(struct intel_crtc *crtc) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 805b6f63df0f..3ea5cef9bbe6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -828,20 +828,6 @@ intel_dp_set_clock(struct intel_encoder *encoder, } } -static void -intel_dp_set_m2_n2(struct intel_crtc *crtc, struct intel_link_m_n *m_n) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - enum transcoder transcoder = crtc->config.cpu_transcoder; - - I915_WRITE(PIPE_DATA_M2(transcoder), - TU_SIZE(m_n->tu) | m_n->gmch_m); - I915_WRITE(PIPE_DATA_N2(transcoder), m_n->gmch_n); - I915_WRITE(PIPE_LINK_M2(transcoder), m_n->link_m); - I915_WRITE(PIPE_LINK_N2(transcoder), m_n->link_n); -} - bool intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_config *pipe_config) @@ -867,6 +853,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = true; pipe_config->has_dp_encoder = true; + pipe_config->has_drrs = false; pipe_config->has_audio = intel_dp->has_audio; if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) { @@ -970,6 +957,7 @@ found: if (intel_connector->panel.downclock_mode != NULL && intel_dp->drrs_state.type == SEAMLESS_DRRS_SUPPORT) { + pipe_config->has_drrs = true; intel_link_compute_m_n(bpp, lane_count, intel_connector->panel.downclock_mode->clock, pipe_config->port_clock, @@ -4389,7 +4377,7 @@ void intel_dp_set_drrs_state(struct drm_device *dev, int refresh_rate) val = I915_READ(reg); if (index > DRRS_HIGH_RR) { val |= PIPECONF_EDP_RR_MODE_SWITCH; - intel_dp_set_m2_n2(intel_crtc, &config->dp_m2_n2); + intel_dp_set_m_n(intel_crtc); } else { val &= ~PIPECONF_EDP_RR_MODE_SWITCH; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4b2664bd5b81..7a3cac095afe 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -324,6 +324,7 @@ struct intel_crtc_config { /* m2_n2 for eDP downclock */ struct intel_link_m_n dp_m2_n2; + bool has_drrs; /* * Frequence the dpll for the port should run at. Differs from the @@ -877,6 +878,7 @@ void hsw_enable_pc8(struct drm_i915_private *dev_priv); void hsw_disable_pc8(struct drm_i915_private *dev_priv); void intel_dp_get_m_n(struct intel_crtc *crtc, struct intel_crtc_config *pipe_config); +void intel_dp_set_m_n(struct intel_crtc *crtc); int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n); void ironlake_check_encoder_dotclock(const struct intel_crtc_config *pipe_config, @@ -892,7 +894,6 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, int intel_format_to_fourcc(int format); void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc); - /* intel_dp.c */ void intel_dp_init(struct drm_device *dev, int output_reg, enum port port); bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, From b95af8bee524974768681b2b92235e1e1e21bf1a Mon Sep 17 00:00:00 2001 From: Vandana Kannan Date: Tue, 5 Aug 2014 07:51:23 -0700 Subject: [PATCH 024/640] drm/i915: State readout and cross-checking for dp_m2_n2 Adding relevant read out comparison code, in check_crtc_state, for the new member of crtc_config, dp_m2_n2, which was introduced to store link_m_n values for a DP downclock mode (if available). Suggested by Daniel. v2: Changed patch title. Daniel's review comments incorporated. Added relevant state readout code for M2_N2. dp_m2_n2 comparison to be done only when high RR is not in use (This is because alternate m_n register programming will be done only when low RR is being used). v3: Modified call to get_m2_n2 which had dp_m_n as param by mistake. Compare dp_m_n and dp_m2_n2 for gen 7 and below. compare the structures based on DRRS state for gen 8 and above. Save and restore M2 N2 registers for gen 7 and below v4: For Gen>=8, check M_N registers against dp_m_n and dp_m2_n2 as there is only one set of M_N registers v5: Removed the chunk which saves and restores M2_N2 registers. Modified get_m_n() to get M2_N2 registers as well. Modified the macro which compares hw.dp_m_n against sw.dp_m2_n2/sw.dp_m_n for gen > 8. v6: Added check to compare dp_m2_n2 only when DRRS is enabled v7: Modified drrs check to use has_drrs v8: Add has_drrs check before reading M2_N2 registers Signed-off-by: Vandana Kannan Cc: Daniel Vetter Cc: Jani Nikula Cc: Jesse Barnes Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 75 +++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index acee1416eb93..620a89961d36 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7149,7 +7149,8 @@ static void intel_pch_transcoder_get_m_n(struct intel_crtc *crtc, static void intel_cpu_transcoder_get_m_n(struct intel_crtc *crtc, enum transcoder transcoder, - struct intel_link_m_n *m_n) + struct intel_link_m_n *m_n, + struct intel_link_m_n *m2_n2) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -7163,6 +7164,20 @@ static void intel_cpu_transcoder_get_m_n(struct intel_crtc *crtc, m_n->gmch_n = I915_READ(PIPE_DATA_N1(transcoder)); m_n->tu = ((I915_READ(PIPE_DATA_M1(transcoder)) & TU_SIZE_MASK) >> TU_SIZE_SHIFT) + 1; + /* Read M2_N2 registers only for gen < 8 (M2_N2 available for + * gen < 8) and if DRRS is supported (to make sure the + * registers are not unnecessarily read). + */ + if (m2_n2 && INTEL_INFO(dev)->gen < 8 && + crtc->config.has_drrs) { + m2_n2->link_m = I915_READ(PIPE_LINK_M2(transcoder)); + m2_n2->link_n = I915_READ(PIPE_LINK_N2(transcoder)); + m2_n2->gmch_m = I915_READ(PIPE_DATA_M2(transcoder)) + & ~TU_SIZE_MASK; + m2_n2->gmch_n = I915_READ(PIPE_DATA_N2(transcoder)); + m2_n2->tu = ((I915_READ(PIPE_DATA_M2(transcoder)) + & TU_SIZE_MASK) >> TU_SIZE_SHIFT) + 1; + } } else { m_n->link_m = I915_READ(PIPE_LINK_M_G4X(pipe)); m_n->link_n = I915_READ(PIPE_LINK_N_G4X(pipe)); @@ -7181,14 +7196,15 @@ void intel_dp_get_m_n(struct intel_crtc *crtc, intel_pch_transcoder_get_m_n(crtc, &pipe_config->dp_m_n); else intel_cpu_transcoder_get_m_n(crtc, pipe_config->cpu_transcoder, - &pipe_config->dp_m_n); + &pipe_config->dp_m_n, + &pipe_config->dp_m2_n2); } static void ironlake_get_fdi_m_n_config(struct intel_crtc *crtc, struct intel_crtc_config *pipe_config) { intel_cpu_transcoder_get_m_n(crtc, pipe_config->cpu_transcoder, - &pipe_config->fdi_m_n); + &pipe_config->fdi_m_n, NULL); } static void ironlake_get_pfit_config(struct intel_crtc *crtc, @@ -10005,6 +10021,15 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, pipe_config->dp_m_n.gmch_m, pipe_config->dp_m_n.gmch_n, pipe_config->dp_m_n.link_m, pipe_config->dp_m_n.link_n, pipe_config->dp_m_n.tu); + + DRM_DEBUG_KMS("dp: %i, gmch_m2: %u, gmch_n2: %u, link_m2: %u, link_n2: %u, tu2: %u\n", + pipe_config->has_dp_encoder, + pipe_config->dp_m2_n2.gmch_m, + pipe_config->dp_m2_n2.gmch_n, + pipe_config->dp_m2_n2.link_m, + pipe_config->dp_m2_n2.link_n, + pipe_config->dp_m2_n2.tu); + DRM_DEBUG_KMS("requested mode:\n"); drm_mode_debug_printmodeline(&pipe_config->requested_mode); DRM_DEBUG_KMS("adjusted mode:\n"); @@ -10385,6 +10410,22 @@ intel_pipe_config_compare(struct drm_device *dev, return false; \ } +/* This is required for BDW+ where there is only one set of registers for + * switching between high and low RR. + * This macro can be used whenever a comparison has to be made between one + * hw state and multiple sw state variables. + */ +#define PIPE_CONF_CHECK_I_ALT(name, alt_name) \ + if ((current_config->name != pipe_config->name) && \ + (current_config->alt_name != pipe_config->name)) { \ + DRM_ERROR("mismatch in " #name " " \ + "(expected %i or %i, found %i)\n", \ + current_config->name, \ + current_config->alt_name, \ + pipe_config->name); \ + return false; \ + } + #define PIPE_CONF_CHECK_FLAGS(name, mask) \ if ((current_config->name ^ pipe_config->name) & (mask)) { \ DRM_ERROR("mismatch in " #name "(" #mask ") " \ @@ -10417,11 +10458,28 @@ intel_pipe_config_compare(struct drm_device *dev, PIPE_CONF_CHECK_I(fdi_m_n.tu); PIPE_CONF_CHECK_I(has_dp_encoder); - PIPE_CONF_CHECK_I(dp_m_n.gmch_m); - PIPE_CONF_CHECK_I(dp_m_n.gmch_n); - PIPE_CONF_CHECK_I(dp_m_n.link_m); - PIPE_CONF_CHECK_I(dp_m_n.link_n); - PIPE_CONF_CHECK_I(dp_m_n.tu); + + if (INTEL_INFO(dev)->gen < 8) { + PIPE_CONF_CHECK_I(dp_m_n.gmch_m); + PIPE_CONF_CHECK_I(dp_m_n.gmch_n); + PIPE_CONF_CHECK_I(dp_m_n.link_m); + PIPE_CONF_CHECK_I(dp_m_n.link_n); + PIPE_CONF_CHECK_I(dp_m_n.tu); + + if (current_config->has_drrs) { + PIPE_CONF_CHECK_I(dp_m2_n2.gmch_m); + PIPE_CONF_CHECK_I(dp_m2_n2.gmch_n); + PIPE_CONF_CHECK_I(dp_m2_n2.link_m); + PIPE_CONF_CHECK_I(dp_m2_n2.link_n); + PIPE_CONF_CHECK_I(dp_m2_n2.tu); + } + } else { + PIPE_CONF_CHECK_I_ALT(dp_m_n.gmch_m, dp_m2_n2.gmch_m); + PIPE_CONF_CHECK_I_ALT(dp_m_n.gmch_n, dp_m2_n2.gmch_n); + PIPE_CONF_CHECK_I_ALT(dp_m_n.link_m, dp_m2_n2.link_m); + PIPE_CONF_CHECK_I_ALT(dp_m_n.link_n, dp_m2_n2.link_n); + PIPE_CONF_CHECK_I_ALT(dp_m_n.tu, dp_m2_n2.tu); + } PIPE_CONF_CHECK_I(adjusted_mode.crtc_hdisplay); PIPE_CONF_CHECK_I(adjusted_mode.crtc_htotal); @@ -10507,6 +10565,7 @@ intel_pipe_config_compare(struct drm_device *dev, #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I +#undef PIPE_CONF_CHECK_I_ALT #undef PIPE_CONF_CHECK_FLAGS #undef PIPE_CONF_CHECK_CLOCK_FUZZY #undef PIPE_CONF_QUIRK From 020178a1bcadf20b9d057988984f374c905d542e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 22 May 2014 19:36:03 +0300 Subject: [PATCH 025/640] drm: Add drm_crtc_vblank_waitqueue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a small static inline helper to grab the vblank wait queue based on the drm_crtc. This is useful for drivers to do internal vblank waits using wait_event() & co. v2: Pimp commit message (Daniel) Add kernel doc (Daniel) Suggested-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 1 + include/drm/drmP.h | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 1d3756d3176c..972759489376 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3400,6 +3400,7 @@ void (*disable_vblank) (struct drm_device *dev, int crtc); Vertical Blanking and Interrupt Handling Functions Reference !Edrivers/gpu/drm/drm_irq.c +!Iinclude/drm/drmP.h drm_crtc_vblank_waitqueue diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d3d9be6b83ef..bb44c1ee557d 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1344,6 +1344,17 @@ extern int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, extern void drm_calc_timestamping_constants(struct drm_crtc *crtc, const struct drm_display_mode *mode); +/** + * drm_crtc_vblank_waitqueue - get vblank waitqueue for the CRTC + * @crtc: which CRTC's vblank waitqueue to retrieve + * + * This function returns a pointer to the vblank waitqueue for the CRTC. + * Drivers can use this to implement vblank waits using wait_event() & co. + */ +static inline wait_queue_head_t *drm_crtc_vblank_waitqueue(struct drm_crtc *crtc) +{ + return &crtc->dev->vblank[drm_crtc_index(crtc)].queue; +} /* Modesetting support */ extern void drm_vblank_pre_modeset(struct drm_device *dev, int crtc); From 210871b67cd201c198b61ca80e1c51cd4b58c051 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 22 May 2014 19:00:50 +0300 Subject: [PATCH 026/640] drm/i915: Kill intel_crtc->vbl_wait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Share the waitqueue that drm_irq uses when performing the vblank evade trick for atomic pipe updates. v2: Keep intel_pipe_handle_vblank() (Chris) Suggested-by: Daniel Vetter Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 5 ----- drivers/gpu/drm/i915/intel_display.c | 2 -- drivers/gpu/drm/i915/intel_drv.h | 2 -- drivers/gpu/drm/i915/intel_sprite.c | 5 +++-- 4 files changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 390ccc2a3096..0e44c433cfc3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1989,14 +1989,9 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) static bool intel_pipe_handle_vblank(struct drm_device *dev, enum pipe pipe) { - struct intel_crtc *crtc; - if (!drm_handle_vblank(dev, pipe)) return false; - crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe)); - wake_up(&crtc->vbl_wait); - return true; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 620a89961d36..a9b351d1ff88 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11870,8 +11870,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_crtc->cursor_base = ~0; intel_crtc->cursor_cntl = ~0; - init_waitqueue_head(&intel_crtc->vbl_wait); - BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7a3cac095afe..3198de3007be 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -425,8 +425,6 @@ struct intel_crtc { struct intel_pipe_wm active; } wm; - wait_queue_head_t vbl_wait; - int scanline_offset; struct intel_mmio_flip mmio_flip; }; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 168c6652cda1..d34a5696ffb6 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -53,6 +53,7 @@ static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl enum pipe pipe = crtc->pipe; long timeout = msecs_to_jiffies_timeout(1); int scanline, min, max, vblank_start; + wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base); DEFINE_WAIT(wait); WARN_ON(!drm_modeset_is_locked(&crtc->base.mutex)); @@ -81,7 +82,7 @@ static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl * other CPUs can see the task state update by the time we * read the scanline. */ - prepare_to_wait(&crtc->vbl_wait, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); scanline = intel_get_crtc_scanline(crtc); if (scanline < min || scanline > max) @@ -100,7 +101,7 @@ static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl local_irq_disable(); } - finish_wait(&crtc->vbl_wait, &wait); + finish_wait(wq, &wait); drm_vblank_put(dev, pipe); From 4811ff4f2388727a161ea49c2b0ddca95e44c7f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:07 +0300 Subject: [PATCH 027/640] drm/i915: Add chv_power_wells[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add chv_power_wells[] so we can start to build up the power well support for chv. Just the "always on" well there initialy. Signed-off-by: Ville Syrjälä Tested-by: Rafael Barbalho Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 615e341682c3..7dbd7892b968 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6574,6 +6574,15 @@ static struct i915_power_well vlv_power_wells[] = { }, }; +static struct i915_power_well chv_power_wells[] = { + { + .name = "always-on", + .always_on = 1, + .domains = VLV_ALWAYS_ON_POWER_DOMAINS, + .ops = &i9xx_always_on_power_well_ops, + }, +}; + static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, enum punit_power_well power_well_id) { @@ -6610,6 +6619,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) } else if (IS_BROADWELL(dev_priv->dev)) { set_power_wells(power_domains, bdw_power_wells); hsw_pwr = power_domains; + } else if (IS_CHERRYVIEW(dev_priv->dev)) { + set_power_wells(power_domains, chv_power_wells); } else if (IS_VALLEYVIEW(dev_priv->dev)) { set_power_wells(power_domains, vlv_power_wells); } else { From 5d6f7ea752228788eddce0b9e268fa1f0eabdd7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:08 +0300 Subject: [PATCH 028/640] drm/i915: Add chv cmnlane power wells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV has two display PHYs so there are also two cmnlane power wells. Add the approriate code to power the wells up/down. Like on VLV we do the cmnreset assert/deassert and the DPLL refclock enabling at approriate times. This code actually works on my bsw. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 89 +++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e4d7607da2c4..c3338ca4ab17 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -514,6 +514,7 @@ enum punit_power_well { PUNIT_POWER_WELL_DPIO_TX_C_LANES_23 = 9, PUNIT_POWER_WELL_DPIO_RX0 = 10, PUNIT_POWER_WELL_DPIO_RX1 = 11, + PUNIT_POWER_WELL_DPIO_CMN_D = 12, PUNIT_POWER_WELL_NUM, }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7dbd7892b968..8a78015dd51e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6254,6 +6254,64 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } +static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + enum dpio_phy phy; + + WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && + power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); + + /* + * Enable the CRI clock source so we can get at the + * display and the reference clock for VGA + * hotplug / manual detection. + */ + if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { + phy = DPIO_PHY0; + I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | + DPLL_REFA_CLK_ENABLE_VLV); + I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) | + DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); + } else { + phy = DPIO_PHY1; + I915_WRITE(DPLL(PIPE_C), I915_READ(DPLL(PIPE_C)) | + DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV); + } + udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ + vlv_set_power_well(dev_priv, power_well, true); + + /* Poll for phypwrgood signal */ + if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & PHY_POWERGOOD(phy), 1)) + DRM_ERROR("Display PHY %d is not power up\n", phy); + + I915_WRITE(DISPLAY_PHY_CONTROL, + PHY_COM_LANE_RESET_DEASSERT(phy, I915_READ(DISPLAY_PHY_CONTROL))); +} + +static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + enum dpio_phy phy; + + WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && + power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); + + if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { + phy = DPIO_PHY0; + assert_pll_disabled(dev_priv, PIPE_A); + assert_pll_disabled(dev_priv, PIPE_B); + } else { + phy = DPIO_PHY1; + assert_pll_disabled(dev_priv, PIPE_C); + } + + I915_WRITE(DISPLAY_PHY_CONTROL, + PHY_COM_LANE_RESET_ASSERT(phy, I915_READ(DISPLAY_PHY_CONTROL))); + + vlv_set_power_well(dev_priv, power_well, false); +} + static void check_power_well_state(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -6445,6 +6503,18 @@ EXPORT_SYMBOL_GPL(i915_get_cdclk_freq); BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ BIT(POWER_DOMAIN_INIT)) +#define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + +#define CHV_DPIO_CMN_D_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_always_on_power_well_noop, .enable = i9xx_always_on_power_well_noop, @@ -6452,6 +6522,13 @@ static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .is_enabled = i9xx_always_on_power_well_enabled, }; +static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { + .sync_hw = vlv_power_well_sync_hw, + .enable = chv_dpio_cmn_power_well_enable, + .disable = chv_dpio_cmn_power_well_disable, + .is_enabled = vlv_power_well_enabled, +}; + static struct i915_power_well i9xx_always_on_power_well[] = { { .name = "always-on", @@ -6581,6 +6658,18 @@ static struct i915_power_well chv_power_wells[] = { .domains = VLV_ALWAYS_ON_POWER_DOMAINS, .ops = &i9xx_always_on_power_well_ops, }, + { + .name = "dpio-common-bc", + .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS, + .data = PUNIT_POWER_WELL_DPIO_CMN_BC, + .ops = &chv_dpio_cmn_power_well_ops, + }, + { + .name = "dpio-common-d", + .domains = CHV_DPIO_CMN_D_POWER_DOMAINS, + .data = PUNIT_POWER_WELL_DPIO_CMN_D, + .ops = &chv_dpio_cmn_power_well_ops, + }, }; static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, From a74d782c74644b2e50b3db61e115831cdc3e9010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:09 +0300 Subject: [PATCH 029/640] drm/i915: Kill intel_reset_dpio() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both VLV and CHV handle the cmnreset stuff in the power well code now, so intel_reset_dpio() is no longer needed. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 31 ---------------------------- 1 file changed, 31 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a9b351d1ff88..c0575ea1e196 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1513,34 +1513,6 @@ static void intel_init_dpio(struct drm_device *dev) } } -static void intel_reset_dpio(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - if (IS_CHERRYVIEW(dev)) { - enum dpio_phy phy; - u32 val; - - for (phy = DPIO_PHY0; phy < I915_NUM_PHYS_VLV; phy++) { - /* Poll for phypwrgood signal */ - if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & - PHY_POWERGOOD(phy), 1)) - DRM_ERROR("Display PHY %d is not power up\n", phy); - - /* - * Deassert common lane reset for PHY. - * - * This should only be done on init and resume from S3 - * with both PLLs disabled, or we risk losing DPIO and - * PLL synchronization. - */ - val = I915_READ(DISPLAY_PHY_CONTROL); - I915_WRITE(DISPLAY_PHY_CONTROL, - PHY_COM_LANE_RESET_DEASSERT(phy, val)); - } - } -} - static void vlv_enable_pll(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; @@ -12615,8 +12587,6 @@ void intel_modeset_init_hw(struct drm_device *dev) intel_init_clock_gating(dev); - intel_reset_dpio(dev); - intel_enable_gt_powersave(dev); } @@ -12687,7 +12657,6 @@ void intel_modeset_init(struct drm_device *dev) } intel_init_dpio(dev); - intel_reset_dpio(dev); intel_shared_dpll_init(dev); From f07057d13c62c5b925725c6e03a0c4d1c0244bef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:10 +0300 Subject: [PATCH 030/640] drm/i915: Add disp2d power well for chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not sure if it's still there since chv has per-pipe power wells. At least with current Punit this doesn't work. Also the display irq handling would need to be adjusted for pipe C. So leave the code iffed out for now. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8a78015dd51e..dc8719f29d03 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6658,6 +6658,14 @@ static struct i915_power_well chv_power_wells[] = { .domains = VLV_ALWAYS_ON_POWER_DOMAINS, .ops = &i9xx_always_on_power_well_ops, }, +#if 0 + { + .name = "display", + .domains = VLV_DISPLAY_POWER_DOMAINS, + .data = PUNIT_POWER_WELL_DISP2D, + .ops = &vlv_display_power_well_ops, + }, +#endif { .name = "dpio-common-bc", .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS, From 26972b0a80091ccece1cbd9422772ae625a612f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:11 +0300 Subject: [PATCH 031/640] drm/i915: Add per-pipe power wells for chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV has a power well for each pipe. Add the code to deal with them. The Punit in current hardware doesn't seem ready for this yet, so leave it iffed out. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 12 +++ drivers/gpu/drm/i915/intel_pm.c | 126 ++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c3338ca4ab17..9d54aee6f8c8 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -501,6 +501,18 @@ #define DSPFREQSTAT_MASK (0x3 << DSPFREQSTAT_SHIFT) #define DSPFREQGUAR_SHIFT 14 #define DSPFREQGUAR_MASK (0x3 << DSPFREQGUAR_SHIFT) +#define _DP_SSC(val, pipe) ((val) << (2 * (pipe))) +#define DP_SSC_MASK(pipe) _DP_SSC(0x3, (pipe)) +#define DP_SSC_PWR_ON(pipe) _DP_SSC(0x0, (pipe)) +#define DP_SSC_CLK_GATE(pipe) _DP_SSC(0x1, (pipe)) +#define DP_SSC_RESET(pipe) _DP_SSC(0x2, (pipe)) +#define DP_SSC_PWR_GATE(pipe) _DP_SSC(0x3, (pipe)) +#define _DP_SSS(val, pipe) ((val) << (2 * (pipe) + 16)) +#define DP_SSS_MASK(pipe) _DP_SSS(0x3, (pipe)) +#define DP_SSS_PWR_ON(pipe) _DP_SSS(0x0, (pipe)) +#define DP_SSS_CLK_GATE(pipe) _DP_SSS(0x1, (pipe)) +#define DP_SSS_RESET(pipe) _DP_SSS(0x2, (pipe)) +#define DP_SSS_PWR_GATE(pipe) _DP_SSS(0x3, (pipe)) /* See the PUNIT HAS v0.8 for the below bits */ enum punit_power_well { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index dc8719f29d03..95b3ca5964e9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6312,6 +6312,95 @@ static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } +static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + enum pipe pipe = power_well->data; + bool enabled; + u32 state, ctrl; + + mutex_lock(&dev_priv->rps.hw_lock); + + state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe); + /* + * We only ever set the power-on and power-gate states, anything + * else is unexpected. + */ + WARN_ON(state != DP_SSS_PWR_ON(pipe) && state != DP_SSS_PWR_GATE(pipe)); + enabled = state == DP_SSS_PWR_ON(pipe); + + /* + * A transient state at this point would mean some unexpected party + * is poking at the power controls too. + */ + ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe); + WARN_ON(ctrl << 16 != state); + + mutex_unlock(&dev_priv->rps.hw_lock); + + return enabled; +} + +static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well, + bool enable) +{ + enum pipe pipe = power_well->data; + u32 state; + u32 ctrl; + + state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe); + + mutex_lock(&dev_priv->rps.hw_lock); + +#define COND \ + ((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state) + + if (COND) + goto out; + + ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + ctrl &= ~DP_SSC_MASK(pipe); + ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl); + + if (wait_for(COND, 100)) + DRM_ERROR("timout setting power well state %08x (%08x)\n", + state, + vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ)); + +#undef COND + +out: + mutex_unlock(&dev_priv->rps.hw_lock); +} + +static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0); +} + +static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + WARN_ON_ONCE(power_well->data != PIPE_A && + power_well->data != PIPE_B && + power_well->data != PIPE_C); + + chv_set_pipe_power_well(dev_priv, power_well, true); +} + +static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) +{ + WARN_ON_ONCE(power_well->data != PIPE_A && + power_well->data != PIPE_B && + power_well->data != PIPE_C); + + chv_set_pipe_power_well(dev_priv, power_well, false); +} + static void check_power_well_state(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -6503,6 +6592,18 @@ EXPORT_SYMBOL_GPL(i915_get_cdclk_freq); BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) | \ BIT(POWER_DOMAIN_INIT)) +#define CHV_PIPE_A_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PIPE_A) | \ + BIT(POWER_DOMAIN_INIT)) + +#define CHV_PIPE_B_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PIPE_B) | \ + BIT(POWER_DOMAIN_INIT)) + +#define CHV_PIPE_C_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PIPE_C) | \ + BIT(POWER_DOMAIN_INIT)) + #define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ @@ -6522,6 +6623,13 @@ static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .is_enabled = i9xx_always_on_power_well_enabled, }; +static const struct i915_power_well_ops chv_pipe_power_well_ops = { + .sync_hw = chv_pipe_power_well_sync_hw, + .enable = chv_pipe_power_well_enable, + .disable = chv_pipe_power_well_disable, + .is_enabled = chv_pipe_power_well_enabled, +}; + static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { .sync_hw = vlv_power_well_sync_hw, .enable = chv_dpio_cmn_power_well_enable, @@ -6665,6 +6773,24 @@ static struct i915_power_well chv_power_wells[] = { .data = PUNIT_POWER_WELL_DISP2D, .ops = &vlv_display_power_well_ops, }, + { + .name = "pipe-a", + .domains = CHV_PIPE_A_POWER_DOMAINS, + .data = PIPE_A, + .ops = &chv_pipe_power_well_ops, + }, + { + .name = "pipe-b", + .domains = CHV_PIPE_B_POWER_DOMAINS, + .data = PIPE_B, + .ops = &chv_pipe_power_well_ops, + }, + { + .name = "pipe-c", + .domains = CHV_PIPE_C_POWER_DOMAINS, + .data = PIPE_C, + .ops = &chv_pipe_power_well_ops, + }, #endif { .name = "dpio-common-bc", From 8258356537871cf579868002552a0ed1762b0487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:12 +0300 Subject: [PATCH 032/640] drm/i915: Add chv port B and C TX wells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the TX wells for ports B and C just like on VLV. Again Punit doesn't seem ready (or the wells don't even exist anymore) so leave it iffed out. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 95b3ca5964e9..59157635d418 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6804,6 +6804,36 @@ static struct i915_power_well chv_power_wells[] = { .data = PUNIT_POWER_WELL_DPIO_CMN_D, .ops = &chv_dpio_cmn_power_well_ops, }, +#if 0 + { + .name = "dpio-tx-b-01", + .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01, + }, + { + .name = "dpio-tx-b-23", + .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23, + }, + { + .name = "dpio-tx-c-01", + .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01, + }, + { + .name = "dpio-tx-c-23", + .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | + VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, + }, +#endif }; static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, From 2ce147f36dc5a1f3b49abd8ce3164ad0f04ec863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:13 +0300 Subject: [PATCH 033/640] drm/i915: Add chv port D TX wells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the TX wells for port D. The Punit subsystem numbers are a total guess at this time. Also I'm not sure these even exist. Certainly the Punit in current hardware doesn't deal with these. Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_pm.c | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9d54aee6f8c8..e01a1a0b9613 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -527,6 +527,10 @@ enum punit_power_well { PUNIT_POWER_WELL_DPIO_RX0 = 10, PUNIT_POWER_WELL_DPIO_RX1 = 11, PUNIT_POWER_WELL_DPIO_CMN_D = 12, + /* FIXME: guesswork below */ + PUNIT_POWER_WELL_DPIO_TX_D_LANES_01 = 13, + PUNIT_POWER_WELL_DPIO_TX_D_LANES_23 = 14, + PUNIT_POWER_WELL_DPIO_RX2 = 15, PUNIT_POWER_WELL_NUM, }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 59157635d418..f4a1837c231c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6616,6 +6616,15 @@ EXPORT_SYMBOL_GPL(i915_get_cdclk_freq); BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ BIT(POWER_DOMAIN_INIT)) +#define CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ + BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + +#define CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ + BIT(POWER_DOMAIN_INIT)) + static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_always_on_power_well_noop, .enable = i9xx_always_on_power_well_noop, @@ -6833,6 +6842,20 @@ static struct i915_power_well chv_power_wells[] = { .ops = &vlv_dpio_power_well_ops, .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, }, + { + .name = "dpio-tx-d-01", + .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS | + CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_01, + }, + { + .name = "dpio-tx-d-23", + .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS | + CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS, + .ops = &vlv_dpio_power_well_ops, + .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_23, + }, #endif }; From 026b96e293fbe48153ae868308e341f226d76c46 Mon Sep 17 00:00:00 2001 From: Rafael Barbalho Date: Mon, 28 Jul 2014 19:56:27 +0100 Subject: [PATCH 034/640] drm/i915: Fix read back of plane stride register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the specifications bit 6 is actually valid in the stride register. Cc: Jesse Barnes Cc: Ville Syrjälä Signed-off-by: Rafael Barbalho Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c0575ea1e196..71957e7184ef 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6211,7 +6211,7 @@ static void i9xx_get_plane_config(struct intel_crtc *crtc, crtc->base.primary->fb->height = ((val >> 0) & 0xfff) + 1; val = I915_READ(DSPSTRIDE(pipe)); - crtc->base.primary->fb->pitches[0] = val & 0xffffff80; + crtc->base.primary->fb->pitches[0] = val & 0xffffffc0; aligned_height = intel_align_height(dev, crtc->base.primary->fb->height, plane_config->tiled); @@ -7247,7 +7247,7 @@ static void ironlake_get_plane_config(struct intel_crtc *crtc, crtc->base.primary->fb->height = ((val >> 0) & 0xfff) + 1; val = I915_READ(DSPSTRIDE(pipe)); - crtc->base.primary->fb->pitches[0] = val & 0xffffff80; + crtc->base.primary->fb->pitches[0] = val & 0xffffffc0; aligned_height = intel_align_height(dev, crtc->base.primary->fb->height, plane_config->tiled); From a5043453aa2412ece984373294529d177324c901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:18 +0300 Subject: [PATCH 035/640] drm/i915: Split a few long debug prints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split some WM debug prints to multiple lines. This shouldn't hurt grappability since the important part is at the start and the rest is just repeated stuff for each pipe. Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f4a1837c231c..a318cd5ad8ed 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1387,7 +1387,8 @@ static void valleyview_update_wm(struct drm_crtc *crtc) plane_sr = cursor_sr = 0; } - DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", + DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " + "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, planeb_wm, cursorb_wm, plane_sr, cursor_sr); @@ -1443,7 +1444,8 @@ static void g4x_update_wm(struct drm_crtc *crtc) plane_sr = cursor_sr = 0; } - DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", + DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " + "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, planeb_wm, cursorb_wm, plane_sr, cursor_sr); From aad3d14d25c33c8e510c41aaaf2668e8d32811ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:25 +0300 Subject: [PATCH 036/640] drm/i915: Add DP training pattern 3 for CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV supports DP training pattern 3. Add the required stuff. Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_dp.c | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e01a1a0b9613..e355fc8d4d59 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3489,6 +3489,8 @@ enum punit_power_well { #define DP_LINK_TRAIN_OFF (3 << 28) #define DP_LINK_TRAIN_MASK (3 << 28) #define DP_LINK_TRAIN_SHIFT 28 +#define DP_LINK_TRAIN_PAT_3_CHV (1 << 14) +#define DP_LINK_TRAIN_MASK_CHV ((3 << 28)|(1<<14)) /* CPT Link training mode */ #define DP_LINK_TRAIN_PAT_1_CPT (0 << 8) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 3ea5cef9bbe6..0aa219dff19c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2959,7 +2959,10 @@ intel_dp_set_link_train(struct intel_dp *intel_dp, } } else { - *DP &= ~DP_LINK_TRAIN_MASK; + if (IS_CHERRYVIEW(dev)) + *DP &= ~DP_LINK_TRAIN_MASK_CHV; + else + *DP &= ~DP_LINK_TRAIN_MASK; switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) { case DP_TRAINING_PATTERN_DISABLE: @@ -2972,8 +2975,12 @@ intel_dp_set_link_train(struct intel_dp *intel_dp, *DP |= DP_LINK_TRAIN_PAT_2; break; case DP_TRAINING_PATTERN_3: - DRM_ERROR("DP training pattern 3 not supported\n"); - *DP |= DP_LINK_TRAIN_PAT_2; + if (IS_CHERRYVIEW(dev)) { + *DP |= DP_LINK_TRAIN_PAT_3_CHV; + } else { + DRM_ERROR("DP training pattern 3 not supported\n"); + *DP |= DP_LINK_TRAIN_PAT_2; + } break; } } @@ -3260,7 +3267,10 @@ intel_dp_link_down(struct intel_dp *intel_dp) DP &= ~DP_LINK_TRAIN_MASK_CPT; I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE_CPT); } else { - DP &= ~DP_LINK_TRAIN_MASK; + if (IS_CHERRYVIEW(dev)) + DP &= ~DP_LINK_TRAIN_MASK_CHV; + else + DP &= ~DP_LINK_TRAIN_MASK; I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE); } POSTING_READ(intel_dp->output_reg); From dcfc3552136fb6996e19b9f6980dc5a6721defd5 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:16 +0100 Subject: [PATCH 037/640] drm/i915: Specify when the PLL hw state fields are valid Not all those fields are valid on a given platform. Make it explicit. Unions could also be used, but were cluttering some code paths with if/else ladders. v2: Don't use anonymous unions (Daniel) Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c70b2b67282e..c3beb08813c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -197,10 +197,13 @@ enum intel_dpll_id { #define I915_NUM_PLLS 2 struct intel_dpll_hw_state { + /* i9xx, pch plls */ uint32_t dpll; uint32_t dpll_md; uint32_t fp0; uint32_t fp1; + + /* hsw, bdw */ uint32_t wrpll; }; From 74dd69280bc3f3e84d46b2a0f78901a0d9b4562c Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:17 +0100 Subject: [PATCH 038/640] drm/i915: Add a space to the shared DPLL debug message Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 71957e7184ef..0b8769b9422f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1778,7 +1778,7 @@ static void intel_enable_shared_dpll(struct intel_crtc *crtc) if (WARN_ON(pll->refcount == 0)) return; - DRM_DEBUG_KMS("enable %s (active %d, on? %d)for crtc %d\n", + DRM_DEBUG_KMS("enable %s (active %d, on? %d) for crtc %d\n", pll->name, pll->active, pll->on, crtc->base.base.id); From 7d2c81751c858442387fa5158d4cd80c2190d739 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:18 +0100 Subject: [PATCH 039/640] drm/i915: Extract the HSW DDI selection code into its own function Future platform will slightly change that. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0b8769b9422f..c3bb5f7fd21b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7607,6 +7607,22 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc, return 0; } +static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, + enum port port, + struct intel_crtc_config *pipe_config) +{ + pipe_config->ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); + + switch (pipe_config->ddi_pll_sel) { + case PORT_CLK_SEL_WRPLL1: + pipe_config->shared_dpll = DPLL_ID_WRPLL1; + break; + case PORT_CLK_SEL_WRPLL2: + pipe_config->shared_dpll = DPLL_ID_WRPLL2; + break; + } +} + static void haswell_get_ddi_port_state(struct intel_crtc *crtc, struct intel_crtc_config *pipe_config) { @@ -7620,16 +7636,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT; - pipe_config->ddi_pll_sel = I915_READ(PORT_CLK_SEL(port)); - - switch (pipe_config->ddi_pll_sel) { - case PORT_CLK_SEL_WRPLL1: - pipe_config->shared_dpll = DPLL_ID_WRPLL1; - break; - case PORT_CLK_SEL_WRPLL2: - pipe_config->shared_dpll = DPLL_ID_WRPLL2; - break; - } + haswell_get_ddi_pll(dev_priv, port, pipe_config); if (pipe_config->shared_dpll >= 0) { pll = &dev_priv->shared_dplls[pipe_config->shared_dpll]; From 143b307c43dcfeae41a3b6c24a29fae4c70884f4 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:19 +0100 Subject: [PATCH 040/640] drm/i915: Extract the HSW/BDW shared dpll init code So we can easily provide an alternate implementation in the future. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 5db0b5552e39..ee7a74c6e93a 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1248,10 +1248,8 @@ static const char * const hsw_ddi_pll_names[] = { "WRPLL 2", }; -void intel_ddi_pll_init(struct drm_device *dev) +static void hsw_shared_dplls_init(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t val = I915_READ(LCPLL_CTL); int i; dev_priv->num_shared_dpll = 2; @@ -1264,6 +1262,14 @@ void intel_ddi_pll_init(struct drm_device *dev) dev_priv->shared_dplls[i].get_hw_state = hsw_ddi_pll_get_hw_state; } +} + +void intel_ddi_pll_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t val = I915_READ(LCPLL_CTL); + + hsw_shared_dplls_init(dev_priv); /* The LCPLL register should be turned on by the BIOS. For now let's * just check its state and print errors in case something is wrong. From ea155f32cea99f17371bec00ee9c8e3713a15d4f Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:20 +0100 Subject: [PATCH 041/640] drm/i915: Restrict hsw_dp_set_ddi_pll_sel() to HSW/BDW Future platform will use config->ddi_pll_sel in a different way. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0aa219dff19c..16fcfc67f014 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -964,7 +964,7 @@ found: &pipe_config->dp_m2_n2); } - if (HAS_DDI(dev)) + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) hsw_dp_set_ddi_pll_sel(pipe_config, intel_dp->link_bw); else intel_dp_set_clock(encoder, pipe_config, intel_dp->link_bw); From bf9584bd0e99bd284e115ea8eba9b02a5a2d7b4d Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:21 +0100 Subject: [PATCH 042/640] drm/i915: Fix stale comment for intel_ddi_pll_select() Since the run-time PM on DPMS series, this function has an outdated comment. Refresh it a bit. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ee7a74c6e93a..3b6375d3e422 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -709,10 +709,11 @@ intel_ddi_calculate_wrpll(int clock /* in Hz */, } /* - * Tries to find a PLL for the CRTC. If it finds, it increases the refcount and - * stores it in intel_crtc->ddi_pll_sel, so other mode sets won't be able to - * steal the selected PLL. You need to call intel_ddi_pll_enable to actually - * enable the PLL. + * Tries to find a *shared* PLL for the CRTC and store it in + * intel_crtc->ddi_pll_sel. + * + * For private DPLLs, compute_config() should do the selection for us. This + * function should be folded into compute_config() eventually. */ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc) { From 0220ab6e00785da008bb3736737b877d45858608 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:22 +0100 Subject: [PATCH 043/640] drm/i915: Split the BDW/HSW specific shared pll selection We'll need a different algorithm to select the shared DPLL. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 39 +++++++++++++++++++------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 3b6375d3e422..09599851f1a8 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -708,23 +708,10 @@ intel_ddi_calculate_wrpll(int clock /* in Hz */, *r2_out = best.r2; } -/* - * Tries to find a *shared* PLL for the CRTC and store it in - * intel_crtc->ddi_pll_sel. - * - * For private DPLLs, compute_config() should do the selection for us. This - * function should be folded into compute_config() eventually. - */ -bool intel_ddi_pll_select(struct intel_crtc *intel_crtc) +static bool +hsw_ddi_pll_select(struct intel_crtc *intel_crtc, int output, int clock) { - struct drm_crtc *crtc = &intel_crtc->base; - struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - int type = intel_encoder->type; - int clock = intel_crtc->config.port_clock; - - intel_put_shared_dpll(intel_crtc); - - if (type == INTEL_OUTPUT_HDMI) { + if (output == INTEL_OUTPUT_HDMI) { struct intel_shared_dpll *pll; uint32_t val; unsigned p, n2, r2; @@ -750,6 +737,26 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc) return true; } + +/* + * Tries to find a *shared* PLL for the CRTC and store it in + * intel_crtc->ddi_pll_sel. + * + * For private DPLLs, compute_config() should do the selection for us. This + * function should be folded into compute_config() eventually. + */ +bool intel_ddi_pll_select(struct intel_crtc *intel_crtc) +{ + struct drm_crtc *crtc = &intel_crtc->base; + struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); + int type = intel_encoder->type; + int clock = intel_crtc->config.port_clock; + + intel_put_shared_dpll(intel_crtc); + + return hsw_ddi_pll_select(intel_crtc, type, clock); +} + void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) { struct drm_i915_private *dev_priv = crtc->dev->dev_private; From d664c0cece2dd410d8134aa820112e471e3592dd Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:23 +0100 Subject: [PATCH 044/640] drm/i915: Make intel_ddi_calculate_wrpll() HSW/BDW specific Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 09599851f1a8..eb8e494ce569 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -644,8 +644,8 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, } static void -intel_ddi_calculate_wrpll(int clock /* in Hz */, - unsigned *r2_out, unsigned *n2_out, unsigned *p_out) +hsw_ddi_calculate_wrpll(int clock /* in Hz */, + unsigned *r2_out, unsigned *n2_out, unsigned *p_out) { uint64_t freq2k; unsigned p, n2, r2; @@ -709,14 +709,16 @@ intel_ddi_calculate_wrpll(int clock /* in Hz */, } static bool -hsw_ddi_pll_select(struct intel_crtc *intel_crtc, int output, int clock) +hsw_ddi_pll_select(struct intel_crtc *intel_crtc, + struct intel_encoder *intel_encoder, + int clock) { - if (output == INTEL_OUTPUT_HDMI) { + if (intel_encoder->type == INTEL_OUTPUT_HDMI) { struct intel_shared_dpll *pll; uint32_t val; unsigned p, n2, r2; - intel_ddi_calculate_wrpll(clock * 1000, &r2, &n2, &p); + hsw_ddi_calculate_wrpll(clock * 1000, &r2, &n2, &p); val = WRPLL_PLL_ENABLE | WRPLL_PLL_LCPLL | WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) | @@ -749,12 +751,11 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc) { struct drm_crtc *crtc = &intel_crtc->base; struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - int type = intel_encoder->type; int clock = intel_crtc->config.port_clock; intel_put_shared_dpll(intel_crtc); - return hsw_ddi_pll_select(intel_crtc, type, clock); + return hsw_ddi_pll_select(intel_crtc, intel_encoder, clock); } void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) From ad13d6048f5002f1c5ab21c71a5ee136a2d8e889 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 29 Jul 2014 18:06:24 +0100 Subject: [PATCH 045/640] drm/i915: Split the CDCLK retrieval per-platform This is only going to get worse, so split it now to avoid adding more cases to the if/else ladder. Suggested-by: Daniel Vetter Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 55 ++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index eb8e494ce569..b5870fd920ff 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1192,31 +1192,52 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder) } } -int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv) +static int bdw_get_cdclk_freq(struct drm_i915_private *dev_priv) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + return 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + return 450000; + else if (freq == LCPLL_CLK_FREQ_450) + return 450000; + else if (freq == LCPLL_CLK_FREQ_54O_BDW) + return 540000; + else if (freq == LCPLL_CLK_FREQ_337_5_BDW) + return 337500; + else + return 675000; +} + +static int hsw_get_cdclk_freq(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - if (lcpll & LCPLL_CD_SOURCE_FCLK) { + if (lcpll & LCPLL_CD_SOURCE_FCLK) return 800000; - } else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) { + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) return 450000; - } else if (freq == LCPLL_CLK_FREQ_450) { + else if (freq == LCPLL_CLK_FREQ_450) return 450000; - } else if (IS_HASWELL(dev)) { - if (IS_ULT(dev)) - return 337500; - else - return 540000; - } else { - if (freq == LCPLL_CLK_FREQ_54O_BDW) - return 540000; - else if (freq == LCPLL_CLK_FREQ_337_5_BDW) - return 337500; - else - return 675000; - } + else if (IS_ULT(dev)) + return 337500; + else + return 540000; +} + +int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + + if (IS_BROADWELL(dev)) + return bdw_get_cdclk_freq(dev_priv); + + /* Haswell */ + return hsw_get_cdclk_freq(dev_priv); } static void hsw_ddi_pll_enable(struct drm_i915_private *dev_priv, From 3d51278af91f8e96077dad3a4c1cc0b19fa8ca25 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 29 Jul 2014 20:57:08 +0200 Subject: [PATCH 046/640] drm/i915: Make ddi_clock_gate() HSW/BDW specific Turns out we were again way too naive and optimistic, of course things will change. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b5870fd920ff..3634575534b4 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -587,8 +587,8 @@ static int intel_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv, return (refclk * n * 100) / (p * r); } -void intel_ddi_clock_get(struct intel_encoder *encoder, - struct intel_crtc_config *pipe_config) +static void hsw_ddi_clock_get(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) { struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; int link_clock = 0; @@ -643,6 +643,12 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, pipe_config->adjusted_mode.crtc_clock = pipe_config->port_clock; } +void intel_ddi_clock_get(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) +{ + hsw_ddi_clock_get(encoder, pipe_config); +} + static void hsw_ddi_calculate_wrpll(int clock /* in Hz */, unsigned *r2_out, unsigned *n2_out, unsigned *p_out) @@ -1480,7 +1486,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder, dev_priv->vbt.edp_bpp = pipe_config->pipe_bpp; } - intel_ddi_clock_get(encoder, pipe_config); + hsw_ddi_clock_get(encoder, pipe_config); } static void intel_ddi_destroy(struct drm_encoder *encoder) From 06ffc7789e76a095e85814dbcf7b660344f6b679 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 17 Jul 2014 17:43:46 -0300 Subject: [PATCH 047/640] d rm/i915: freeze display before the interrupts and GT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since we started using intel_runtime_pm_disable_interrupts() at normal (non-runtime) suspend/resume, we had to remove a WARN from ironlake_disable_display_irq to avoid a case where we were doing the correct thing and the WARN was not really needed. The problem is that the WARN was useful in other cases, and its removal can hide some bugs that we would catch automatically. To be able to add back the WARN, we have to call intel_crtc_control() before interrupts are disabled, which is what this patch currently does. Also notice that Ville's patch from the Watermarks series "drm/i915: Leave interrupts enabled while disabling crtcs during suspend" also did a change that's equivalent to the one we're doing on this patch, with the exception that its original patch, when applied to the current tree, procduces a WARN. Related commits: commit daa390e5ee45cc051d6bf37b296901f2f92b002d Author: Jesse Barnes drm/i915: don't warn if IRQs are disabled when shutting down display IRQs commit e11aa362308f5de467ce355a2a2471321b15a35c Author: Jesse Barnes drm/i915: use runtime irq suspend/resume in freeze/thaw Note that the function part of this patch has already been done in commit 0e32b39ceed665bfa4a77a4bc307b6652b991632 Author: Dave Airlie Date: Fri May 2 14:02:48 2014 +1000 drm/i915: add DP 1.2 MST support (v0.7) with the fixup commit 09b64267c1f72f2670fcde9f11e5453ce365ca23 Author: Dave Airlie Date: Wed Jul 23 14:25:24 2014 +1000 drm/i915: don't suspend gt until after we disable irqs and display (v2) so all that's left from Paulo's patch is reinstating the WARNING. Cc: Ville Syrjälä Cc: Jesse Barnes Signed-off-by: Paulo Zanoni Reviewed-by: Jesse Barnes [danvet: Explain conflict resolution with Dave's DP MST patches with a note in the commit message.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0e44c433cfc3..379cfb5dc731 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -151,7 +151,7 @@ ironlake_disable_display_irq(struct drm_i915_private *dev_priv, u32 mask) { assert_spin_locked(&dev_priv->irq_lock); - if (!intel_irqs_enabled(dev_priv)) + if (WARN_ON(!intel_irqs_enabled(dev_priv))) return; if ((dev_priv->irq_mask & mask) != mask) { From 383c5a6a4682f6816fb5a07aebd89c5813c3d1c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:03:57 +0300 Subject: [PATCH 048/640] drm/i915: Add cdclk change support for chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like the Punit is supposed to support the 400MHz cdclk directly on chv, so we don't need the vlv tricks. FIXME: Punit doesn't seem ready for this yet on current hw Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 4 +++ drivers/gpu/drm/i915/intel_display.c | 50 ++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e355fc8d4d59..697c04976cd3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -497,6 +497,10 @@ #define BUNIT_REG_BISOC 0x11 #define PUNIT_REG_DSPFREQ 0x36 +#define DSPFREQSTAT_SHIFT_CHV 24 +#define DSPFREQSTAT_MASK_CHV (0x1f << DSPFREQSTAT_SHIFT_CHV) +#define DSPFREQGUAR_SHIFT_CHV 8 +#define DSPFREQGUAR_MASK_CHV (0x1f << DSPFREQGUAR_SHIFT_CHV) #define DSPFREQSTAT_SHIFT 30 #define DSPFREQSTAT_MASK (0x3 << DSPFREQSTAT_SHIFT) #define DSPFREQGUAR_SHIFT 14 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c3bb5f7fd21b..fdf5ec88866a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4501,6 +4501,47 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) vlv_update_cdclk(dev); } +static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 val, cmd; + + WARN_ON(dev_priv->display.get_display_clock_speed(dev) != dev_priv->vlv_cdclk_freq); + + switch (cdclk) { + case 400000: + cmd = 3; + break; + case 333333: + case 320000: + cmd = 2; + break; + case 266667: + cmd = 1; + break; + case 200000: + cmd = 0; + break; + default: + WARN_ON(1); + return; + } + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK_CHV; + val |= (cmd << DSPFREQGUAR_SHIFT_CHV); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + vlv_update_cdclk(dev); +} + static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv, int max_pixclk) { @@ -4569,8 +4610,13 @@ static void valleyview_modeset_global_resources(struct drm_device *dev) int max_pixclk = intel_mode_max_pixclk(dev_priv); int req_cdclk = valleyview_calc_cdclk(dev_priv, max_pixclk); - if (req_cdclk != dev_priv->vlv_cdclk_freq) - valleyview_set_cdclk(dev, req_cdclk); + if (req_cdclk != dev_priv->vlv_cdclk_freq) { + if (IS_CHERRYVIEW(dev)) + cherryview_set_cdclk(dev, req_cdclk); + else + valleyview_set_cdclk(dev, req_cdclk); + } + modeset_update_crtc_power_domains(dev); } From d49a340d6eb6de45c1a886b71469d110f2dbb57b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:03:58 +0300 Subject: [PATCH 049/640] drm/i915: Disable cdclk changes for chv until Punit is ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Punit seems a bit WIP still. Disable cdclk changes until we have hardware where it works. Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fdf5ec88866a..a7d0c88a620d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4548,6 +4548,10 @@ static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv, int vco = valleyview_get_vco(dev_priv); int freq_320 = (vco << 1) % 320000 != 0 ? 333333 : 320000; + /* FIXME: Punit isn't quite ready yet */ + if (IS_CHERRYVIEW(dev_priv->dev)) + return 400000; + /* * Really only a few cases to deal with, as only 4 CDclks are supported: * 200MHz @@ -5283,6 +5287,10 @@ static int valleyview_get_display_clock_speed(struct drm_device *dev) u32 val; int divider; + /* FIXME: Punit isn't quite ready yet */ + if (IS_CHERRYVIEW(dev)) + return 400000; + mutex_lock(&dev_priv->dpio_lock); val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); mutex_unlock(&dev_priv->dpio_lock); From d17ec4ced6c0907f80f51677a44236da94ecd92d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:03:59 +0300 Subject: [PATCH 050/640] drm/i915: Leave DPLL ref clocks on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We enable the DPLL refclock already when bringing up the cmnlane power well, so also leave it on when otherwise disabling the DPLL. Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a7d0c88a620d..6ca53b372a4c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1684,7 +1684,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) assert_pipe_disabled(dev_priv, pipe); /* Set PLL en = 0 */ - val = DPLL_SSC_REF_CLOCK_CHV; + val = DPLL_SSC_REF_CLOCK_CHV | DPLL_REFA_CLK_ENABLE_VLV; if (pipe != PIPE_A) val |= DPLL_INTEGRATED_CRI_CLK_VLV; I915_WRITE(DPLL(pipe), val); From 1ae0d1377fda91367b27596001c82e877ec2057e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:00 +0300 Subject: [PATCH 051/640] drm/i915: Split chv_update_pll() apart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split chv_update_pll() into two parts ala: commit bdd4b6a655749970cc632aafc5fd596c07b60b1c Author: Daniel Vetter Date: Thu Apr 24 23:55:11 2014 +0200 drm/i915: Extract vlv_prepare_pll Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 30 ++++++++++++++++++---------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6ca53b372a4c..60ba6962026b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -100,6 +100,7 @@ static void ironlake_set_pipeconf(struct drm_crtc *crtc); static void haswell_set_pipeconf(struct drm_crtc *crtc); static void intel_set_pipe_csc(struct drm_crtc *crtc); static void vlv_prepare_pll(struct intel_crtc *crtc); +static void chv_prepare_pll(struct intel_crtc *crtc); static struct intel_encoder *intel_find_encoder(struct intel_connector *connector, int pipe) { @@ -4642,8 +4643,12 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) is_dsi = intel_pipe_has_type(crtc, INTEL_OUTPUT_DSI); - if (!is_dsi && !IS_CHERRYVIEW(dev)) - vlv_prepare_pll(intel_crtc); + if (!is_dsi) { + if (IS_CHERRYVIEW(dev)) + chv_prepare_pll(intel_crtc); + else + vlv_prepare_pll(intel_crtc); + } /* Set up the display plane register */ dspcntr = DISPPLANE_GAMMA_ENABLE; @@ -5691,6 +5696,18 @@ static void vlv_prepare_pll(struct intel_crtc *crtc) } static void chv_update_pll(struct intel_crtc *crtc) +{ + crtc->config.dpll_hw_state.dpll = DPLL_SSC_REF_CLOCK_CHV | + DPLL_REFA_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS | + DPLL_VCO_ENABLE; + if (crtc->pipe != PIPE_A) + crtc->config.dpll_hw_state.dpll |= DPLL_INTEGRATED_CRI_CLK_VLV; + + crtc->config.dpll_hw_state.dpll_md = + (crtc->config.pixel_multiplier - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT; +} + +static void chv_prepare_pll(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -5701,15 +5718,6 @@ static void chv_update_pll(struct intel_crtc *crtc) u32 bestn, bestm1, bestm2, bestp1, bestp2, bestm2_frac; int refclk; - crtc->config.dpll_hw_state.dpll = DPLL_SSC_REF_CLOCK_CHV | - DPLL_REFA_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS | - DPLL_VCO_ENABLE; - if (pipe != PIPE_A) - crtc->config.dpll_hw_state.dpll |= DPLL_INTEGRATED_CRI_CLK_VLV; - - crtc->config.dpll_hw_state.dpll_md = - (crtc->config.pixel_multiplier - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT; - bestn = crtc->config.dpll.n; bestm2_frac = crtc->config.dpll.m2 & 0x3fffff; bestm1 = crtc->config.dpll.m1; From 625695f8c3383765fd8974616aa57ffdbc644f83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:02 +0300 Subject: [PATCH 052/640] drm/i915: Call intel_{dp, hdmi}_prepare for chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV was forgotten the intel_{dp,hdmi}_prepare() were introduced (or the chv patches were still in flight?). Call these when enabling the ports. Things tend to work much better when we actually write something to the port registers :) Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 2 ++ drivers/gpu/drm/i915/intel_hdmi.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 16fcfc67f014..33a45a819525 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2281,6 +2281,8 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder) enum pipe pipe = intel_crtc->pipe; u32 val; + intel_dp_prepare(encoder); + mutex_lock(&dev_priv->dpio_lock); /* program left/right clock distribution */ diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index f9151f6641d9..df25b740b348 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1260,6 +1260,8 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder) enum pipe pipe = intel_crtc->pipe; u32 val; + intel_hdmi_prepare(encoder); + mutex_lock(&dev_priv->dpio_lock); /* program left/right clock distribution */ From 1fb44505f6c547742fcbcba4d3999fb324b5f587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 28 Jun 2014 02:04:03 +0300 Subject: [PATCH 053/640] drm/i915: Clarify CHV swing margin/deemph bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV display PHY registes have two swing margin/deemph settings. Make it clear which ones we're using. Signed-off-by: Ville Syrjälä Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++-- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- drivers/gpu/drm/i915/intel_hdmi.c | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 697c04976cd3..35553aa0a82f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -855,8 +855,8 @@ enum punit_power_well { #define _VLV_TX_DW2_CH0 0x8288 #define _VLV_TX_DW2_CH1 0x8488 -#define DPIO_SWING_MARGIN_SHIFT 16 -#define DPIO_SWING_MARGIN_MASK (0xff << DPIO_SWING_MARGIN_SHIFT) +#define DPIO_SWING_MARGIN000_SHIFT 16 +#define DPIO_SWING_MARGIN000_MASK (0xff << DPIO_SWING_MARGIN000_SHIFT) #define DPIO_UNIQ_TRANS_SCALE_SHIFT 8 #define VLV_TX_DW2(ch) _PORT(ch, _VLV_TX_DW2_CH0, _VLV_TX_DW2_CH1) @@ -864,12 +864,16 @@ enum punit_power_well { #define _VLV_TX_DW3_CH1 0x848c /* The following bit for CHV phy */ #define DPIO_TX_UNIQ_TRANS_SCALE_EN (1<<27) +#define DPIO_SWING_MARGIN101_SHIFT 16 +#define DPIO_SWING_MARGIN101_MASK (0xff << DPIO_SWING_MARGIN101_SHIFT) #define VLV_TX_DW3(ch) _PORT(ch, _VLV_TX_DW3_CH0, _VLV_TX_DW3_CH1) #define _VLV_TX_DW4_CH0 0x8290 #define _VLV_TX_DW4_CH1 0x8490 #define DPIO_SWING_DEEMPH9P5_SHIFT 24 #define DPIO_SWING_DEEMPH9P5_MASK (0xff << DPIO_SWING_DEEMPH9P5_SHIFT) +#define DPIO_SWING_DEEMPH6P0_SHIFT 16 +#define DPIO_SWING_DEEMPH6P0_MASK (0xff << DPIO_SWING_DEEMPH6P0_SHIFT) #define VLV_TX_DW4(ch) _PORT(ch, _VLV_TX_DW4_CH0, _VLV_TX_DW4_CH1) #define _VLV_TX3_DW4_CH0 0x690 diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 33a45a819525..95b972736733 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2649,8 +2649,8 @@ static uint32_t intel_chv_signal_levels(struct intel_dp *intel_dp) /* Program swing margin */ for (i = 0; i < 4; i++) { val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i)); - val &= ~DPIO_SWING_MARGIN_MASK; - val |= margin_reg_value << DPIO_SWING_MARGIN_SHIFT; + val &= ~DPIO_SWING_MARGIN000_MASK; + val |= margin_reg_value << DPIO_SWING_MARGIN000_SHIFT; vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val); } diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index df25b740b348..5f47d359a991 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1431,8 +1431,8 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) for (i = 0; i < 4; i++) { val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i)); - val &= ~DPIO_SWING_MARGIN_MASK; - val |= 102 << DPIO_SWING_MARGIN_SHIFT; + val &= ~DPIO_SWING_MARGIN000_MASK; + val |= 102 << DPIO_SWING_MARGIN000_SHIFT; vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val); } From 7f0c860533ff2de4b3bb84f71d5ce238fffe4d63 Mon Sep 17 00:00:00 2001 From: Shobhit Kumar Date: Wed, 30 Jul 2014 20:34:57 +0530 Subject: [PATCH 054/640] drm/i915: Add support for Video Burst Mode for MIPI DSI v2: Updated the error log as suggested by Imre Signed-off-by: Shobhit Kumar Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_bios.h | 3 +- drivers/gpu/drm/i915/intel_dsi.c | 22 ++++++++----- drivers/gpu/drm/i915/intel_dsi.h | 2 ++ drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 38 ++++++++++++++++++++-- drivers/gpu/drm/i915/intel_dsi_pll.c | 9 ++--- 5 files changed, 57 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h index b98667796337..905999bee2ac 100644 --- a/drivers/gpu/drm/i915/intel_bios.h +++ b/drivers/gpu/drm/i915/intel_bios.h @@ -802,7 +802,8 @@ struct mipi_config { u16 rsvd4; - u8 rsvd5[5]; + u8 rsvd5; + u32 target_burst_mode_freq; u32 dsi_ddr_clk; u32 bridge_ref_clk; diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 670c29a7b5dd..aea8f3383c26 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -423,9 +423,11 @@ static u16 txclkesc(u32 divider, unsigned int us) } /* return pixels in terms of txbyteclkhs */ -static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count) +static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count, + u16 burst_mode_ratio) { - return DIV_ROUND_UP(DIV_ROUND_UP(pixels * bpp, 8), lane_count); + return DIV_ROUND_UP(DIV_ROUND_UP(pixels * bpp * burst_mode_ratio, + 8 * 100), lane_count); } static void set_dsi_timings(struct drm_encoder *encoder, @@ -451,10 +453,12 @@ static void set_dsi_timings(struct drm_encoder *encoder, vbp = mode->vtotal - mode->vsync_end; /* horizontal values are in terms of high speed byte clock */ - hactive = txbyteclkhs(hactive, bpp, lane_count); - hfp = txbyteclkhs(hfp, bpp, lane_count); - hsync = txbyteclkhs(hsync, bpp, lane_count); - hbp = txbyteclkhs(hbp, bpp, lane_count); + hactive = txbyteclkhs(hactive, bpp, lane_count, + intel_dsi->burst_mode_ratio); + hfp = txbyteclkhs(hfp, bpp, lane_count, intel_dsi->burst_mode_ratio); + hsync = txbyteclkhs(hsync, bpp, lane_count, + intel_dsi->burst_mode_ratio); + hbp = txbyteclkhs(hbp, bpp, lane_count, intel_dsi->burst_mode_ratio); I915_WRITE(MIPI_HACTIVE_AREA_COUNT(pipe), hactive); I915_WRITE(MIPI_HFP_COUNT(pipe), hfp); @@ -541,12 +545,14 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder) intel_dsi->video_mode_format == VIDEO_MODE_BURST) { I915_WRITE(MIPI_HS_TX_TIMEOUT(pipe), txbyteclkhs(adjusted_mode->htotal, bpp, - intel_dsi->lane_count) + 1); + intel_dsi->lane_count, + intel_dsi->burst_mode_ratio) + 1); } else { I915_WRITE(MIPI_HS_TX_TIMEOUT(pipe), txbyteclkhs(adjusted_mode->vtotal * adjusted_mode->htotal, - bpp, intel_dsi->lane_count) + 1); + bpp, intel_dsi->lane_count, + intel_dsi->burst_mode_ratio) + 1); } I915_WRITE(MIPI_LP_RX_TIMEOUT(pipe), intel_dsi->lp_rx_timeout); I915_WRITE(MIPI_TURN_AROUND_TIMEOUT(pipe), intel_dsi->turn_arnd_val); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index fd51867fd0d3..657eb5c1b9d8 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -116,6 +116,8 @@ struct intel_dsi { u16 clk_hs_to_lp_count; u16 init_count; + u32 pclk; + u16 burst_mode_ratio; /* all delays in ms */ u16 backlight_off_delay; diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 47c7584a4aa0..f6bdd44069ce 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -271,6 +271,8 @@ static bool generic_init(struct intel_dsi_device *dsi) u32 ths_prepare_ns, tclk_trail_ns; u32 tclk_prepare_clkzero, ths_prepare_hszero; u32 lp_to_hs_switch, hs_to_lp_switch; + u32 pclk, computed_ddr; + u16 burst_mode_ratio; DRM_DEBUG_KMS("\n"); @@ -284,8 +286,6 @@ static bool generic_init(struct intel_dsi_device *dsi) else if (intel_dsi->pixel_format == VID_MODE_FORMAT_RGB565) bits_per_pixel = 16; - bitrate = (mode->clock * bits_per_pixel) / intel_dsi->lane_count; - intel_dsi->operation_mode = mipi_config->is_cmd_mode; intel_dsi->video_mode_format = mipi_config->video_transfer_mode; intel_dsi->escape_clk_div = mipi_config->byte_clk_sel; @@ -297,6 +297,40 @@ static bool generic_init(struct intel_dsi_device *dsi) intel_dsi->video_frmt_cfg_bits = mipi_config->bta_enabled ? DISABLE_VIDEO_BTA : 0; + pclk = mode->clock; + + /* Burst Mode Ratio + * Target ddr frequency from VBT / non burst ddr freq + * multiply by 100 to preserve remainder + */ + if (intel_dsi->video_mode_format == VIDEO_MODE_BURST) { + if (mipi_config->target_burst_mode_freq) { + computed_ddr = + (pclk * bits_per_pixel) / intel_dsi->lane_count; + + if (mipi_config->target_burst_mode_freq < + computed_ddr) { + DRM_ERROR("Burst mode freq is less than computed\n"); + return false; + } + + burst_mode_ratio = DIV_ROUND_UP( + mipi_config->target_burst_mode_freq * 100, + computed_ddr); + + pclk = DIV_ROUND_UP(pclk * burst_mode_ratio, 100); + } else { + DRM_ERROR("Burst mode target is not set\n"); + return false; + } + } else + burst_mode_ratio = 100; + + intel_dsi->burst_mode_ratio = burst_mode_ratio; + intel_dsi->pclk = pclk; + + bitrate = (pclk * bits_per_pixel) / intel_dsi->lane_count; + switch (intel_dsi->escape_clk_div) { case 0: tlpx_ns = 50; diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index d8bb1ea2f0da..06fad93a68c8 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -134,8 +134,7 @@ static u32 dsi_rr_formula(const struct drm_display_mode *mode, #else /* Get DSI clock from pixel clock */ -static u32 dsi_clk_from_pclk(const struct drm_display_mode *mode, - int pixel_format, int lane_count) +static u32 dsi_clk_from_pclk(u32 pclk, int pixel_format, int lane_count) { u32 dsi_clk_khz; u32 bpp; @@ -156,7 +155,7 @@ static u32 dsi_clk_from_pclk(const struct drm_display_mode *mode, /* DSI data rate = pixel clock * bits per pixel / lane count pixel clock is converted from KHz to Hz */ - dsi_clk_khz = DIV_ROUND_CLOSEST(mode->clock * bpp, lane_count); + dsi_clk_khz = DIV_ROUND_CLOSEST(pclk * bpp, lane_count); return dsi_clk_khz; } @@ -228,14 +227,12 @@ static int dsi_calc_mnp(u32 dsi_clk, struct dsi_mnp *dsi_mnp) static void vlv_configure_dsi_pll(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - const struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); int ret; struct dsi_mnp dsi_mnp; u32 dsi_clk; - dsi_clk = dsi_clk_from_pclk(mode, intel_dsi->pixel_format, + dsi_clk = dsi_clk_from_pclk(intel_dsi->pclk, intel_dsi->pixel_format, intel_dsi->lane_count); ret = dsi_calc_mnp(dsi_clk, &dsi_mnp); From 7f3de8336fc8c44bede43c57e40448171b12ef68 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 30 Jul 2014 22:34:27 +0200 Subject: [PATCH 055/640] drm/i915: Align intel_dsi*.c files a bit I'm not really that insisting on checkpath compliance, but ragged function paramter alignment does get me. Please adjust your editor to just do this for you. Cc: Shobhit Kumar Cc: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dsi.c | 24 ++++++++++++------------ drivers/gpu/drm/i915/intel_dsi_cmd.c | 2 +- drivers/gpu/drm/i915/intel_dsi_pll.c | 8 ++++---- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index aea8f3383c26..5bd9e09ad3c5 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -184,7 +184,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder) /* update the hw state for DPLL */ intel_crtc->config.dpll_hw_state.dpll = DPLL_INTEGRATED_CLOCK_VLV | - DPLL_REFA_CLK_ENABLE_VLV; + DPLL_REFA_CLK_ENABLE_VLV; tmp = I915_READ(DSPCLK_GATE_D); tmp |= DPOUNIT_CLOCK_GATE_DISABLE; @@ -259,8 +259,8 @@ static void intel_dsi_disable(struct intel_encoder *encoder) temp = I915_READ(MIPI_CTRL(pipe)); temp &= ~ESCAPE_CLOCK_DIVIDER_MASK; I915_WRITE(MIPI_CTRL(pipe), temp | - intel_dsi->escape_clk_div << - ESCAPE_CLOCK_DIVIDER_SHIFT); + intel_dsi->escape_clk_div << + ESCAPE_CLOCK_DIVIDER_SHIFT); I915_WRITE(MIPI_EOT_DISABLE(pipe), CLOCKSTOP); @@ -297,7 +297,7 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) usleep_range(2000, 2500); if (wait_for(((I915_READ(MIPI_PORT_CTRL(pipe)) & AFE_LATCHOUT) - == 0x00000), 30)) + == 0x00000), 30)) DRM_ERROR("DSI LP not going Low\n"); val = I915_READ(MIPI_PORT_CTRL(pipe)); @@ -427,7 +427,7 @@ static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count, u16 burst_mode_ratio) { return DIV_ROUND_UP(DIV_ROUND_UP(pixels * bpp * burst_mode_ratio, - 8 * 100), lane_count); + 8 * 100), lane_count); } static void set_dsi_timings(struct drm_encoder *encoder, @@ -454,10 +454,10 @@ static void set_dsi_timings(struct drm_encoder *encoder, /* horizontal values are in terms of high speed byte clock */ hactive = txbyteclkhs(hactive, bpp, lane_count, - intel_dsi->burst_mode_ratio); + intel_dsi->burst_mode_ratio); hfp = txbyteclkhs(hfp, bpp, lane_count, intel_dsi->burst_mode_ratio); hsync = txbyteclkhs(hsync, bpp, lane_count, - intel_dsi->burst_mode_ratio); + intel_dsi->burst_mode_ratio); hbp = txbyteclkhs(hbp, bpp, lane_count, intel_dsi->burst_mode_ratio); I915_WRITE(MIPI_HACTIVE_AREA_COUNT(pipe), hactive); @@ -582,7 +582,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder) * XXX: write MIPI_STOP_STATE_STALL? */ I915_WRITE(MIPI_HIGH_LOW_SWITCH_COUNT(pipe), - intel_dsi->hs_to_lp_count); + intel_dsi->hs_to_lp_count); /* XXX: low power clock equivalence in terms of byte clock. the number * of byte clocks occupied in one low power clock. based on txbyteclkhs @@ -607,10 +607,10 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder) * 64 like 1366 x 768. Enable RANDOM resolution support for such * panels by default */ I915_WRITE(MIPI_VIDEO_MODE_FORMAT(pipe), - intel_dsi->video_frmt_cfg_bits | - intel_dsi->video_mode_format | - IP_TG_CONFIG | - RANDOM_DPI_DISPLAY_RESOLUTION); + intel_dsi->video_frmt_cfg_bits | + intel_dsi->video_mode_format | + IP_TG_CONFIG | + RANDOM_DPI_DISPLAY_RESOLUTION); } static void intel_dsi_pre_pll_enable(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_dsi_cmd.c b/drivers/gpu/drm/i915/intel_dsi_cmd.c index 7f1430ac8543..f4767fd2ebeb 100644 --- a/drivers/gpu/drm/i915/intel_dsi_cmd.c +++ b/drivers/gpu/drm/i915/intel_dsi_cmd.c @@ -430,7 +430,7 @@ void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi) u32 mask; mask = LP_CTRL_FIFO_EMPTY | HS_CTRL_FIFO_EMPTY | - LP_DATA_FIFO_EMPTY | HS_DATA_FIFO_EMPTY; + LP_DATA_FIFO_EMPTY | HS_DATA_FIFO_EMPTY; if (wait_for((I915_READ(MIPI_GEN_FIFO_STAT(pipe)) & mask) == mask, 100)) DRM_ERROR("DPI FIFOs are not empty\n"); diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 06fad93a68c8..fa7a6ca34cd6 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -190,7 +190,7 @@ static int dsi_calc_mnp(u32 dsi_clk, struct dsi_mnp *dsi_mnp) for (m = 62; m <= 92; m++) { for (p = 2; p <= 6; p++) { /* Find the optimal m and p divisors - with minimal error +/- the required clock */ + with minimal error +/- the required clock */ calc_dsi_clk = (m * ref_clk) / p; if (calc_dsi_clk == target_dsi_clk) { calc_m = m; @@ -233,7 +233,7 @@ static void vlv_configure_dsi_pll(struct intel_encoder *encoder) u32 dsi_clk; dsi_clk = dsi_clk_from_pclk(intel_dsi->pclk, intel_dsi->pixel_format, - intel_dsi->lane_count); + intel_dsi->lane_count); ret = dsi_calc_mnp(dsi_clk, &dsi_mnp); if (ret) { @@ -315,8 +315,8 @@ static void assert_bpp_mismatch(int pixel_format, int pipe_bpp) } WARN(bpp != pipe_bpp, - "bpp match assertion failure (expected %d, current %d)\n", - bpp, pipe_bpp); + "bpp match assertion failure (expected %d, current %d)\n", + bpp, pipe_bpp); } u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp) From da46f936bb0396f6a0fb87c2786f541e9f19a73c Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Fri, 1 Aug 2014 02:04:45 -0700 Subject: [PATCH 056/640] drm/i915: Introduce FBC False Color for debug purposes. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this bit enabled, HW changes the color when compressing frames for debug purposes. ALthough the simple way to enable a single bit is over intel_reg_write, this value is overwriten on next update_fbc so depending on the workload it is not possible to set this bit with intel-gpu-tools. So this patch introduces a persistent way to enable false color over debugfs. v2: Use DEFINE_SIMPLE_ATTRIBUTE as Daniel suggested v3: (Ville) only do false color for IVB+ since according to spec bit is MBZ before IVB. v4: We don't have FBC on valleyview nor on cherryview (Ben) v5: s/!HAS_PCH_SPLIT/!HAS_FBC (Ville) Cc: Ville Syrjälä Reviewed-by: Ben Widawsky Signed-off-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 42 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 3 +++ 4 files changed, 48 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9e737b771c40..aea1a819c775 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1433,6 +1433,47 @@ static int i915_fbc_status(struct seq_file *m, void *unused) return 0; } +static int i915_fbc_fc_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev->dev_private; + + if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev)) + return -ENODEV; + + drm_modeset_lock_all(dev); + *val = dev_priv->fbc.false_color; + drm_modeset_unlock_all(dev); + + return 0; +} + +static int i915_fbc_fc_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 reg; + + if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev)) + return -ENODEV; + + drm_modeset_lock_all(dev); + + reg = I915_READ(ILK_DPFC_CONTROL); + dev_priv->fbc.false_color = val; + + I915_WRITE(ILK_DPFC_CONTROL, val ? + (reg | FBC_CTL_FALSE_COLOR) : + (reg & ~FBC_CTL_FALSE_COLOR)); + + drm_modeset_unlock_all(dev); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_fc_fops, + i915_fbc_fc_get, i915_fbc_fc_set, + "%llu\n"); + static int i915_ips_status(struct seq_file *m, void *unused) { struct drm_info_node *node = m->private; @@ -3957,6 +3998,7 @@ static const struct i915_debugfs_files { {"i915_pri_wm_latency", &i915_pri_wm_latency_fops}, {"i915_spr_wm_latency", &i915_spr_wm_latency_fops}, {"i915_cur_wm_latency", &i915_cur_wm_latency_fops}, + {"i915_fbc_false_color", &i915_fbc_fc_fops}, }; void intel_display_crc_init(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c3beb08813c6..ce30bb3ed566 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -637,6 +637,8 @@ struct i915_fbc { struct drm_mm_node compressed_fb; struct drm_mm_node *compressed_llb; + bool false_color; + struct intel_fbc_work { struct delayed_work work; struct drm_crtc *crtc; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 35553aa0a82f..ca37febbc2ae 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1540,6 +1540,7 @@ enum punit_power_well { /* Framebuffer compression for Ironlake */ #define ILK_DPFC_CB_BASE 0x43200 #define ILK_DPFC_CONTROL 0x43208 +#define FBC_CTL_FALSE_COLOR (1<<10) /* The bit 28-8 is reserved */ #define DPFC_RESERVED (0x1FFFFF00) #define ILK_DPFC_RECOMP_CTL 0x4320c diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a318cd5ad8ed..ab80df2909e0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -309,6 +309,9 @@ static void gen7_enable_fbc(struct drm_crtc *crtc) dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; + if (dev_priv->fbc.false_color) + dpfc_ctl |= FBC_CTL_FALSE_COLOR; + I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); if (IS_IVYBRIDGE(dev)) { From ce4dd49e97813740bd2b03ecdc51521be10f3bf1 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 1 Aug 2014 11:07:54 +0100 Subject: [PATCH 057/640] drm/i915: Gather the HDMI level shifter logic into one place The knowledge about the HDMI/DVI DDI translation table was scattered around. - info->hdmi_level_shift was initialized with 6, the index of the 800 mV, 0dB translation - A check on the VBT value was done to ensure it wasn't overflowing the translation table (< 0xC) - The actual programming was done in intel_ddi.c As we need to change that knowledge for Broadwell, let's gather everything into one place. Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/intel_bios.c | 13 +++++-------- drivers/gpu/drm/i915/intel_ddi.c | 14 +++++++++++++- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce30bb3ed566..0d7e55f2a8a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1232,6 +1232,12 @@ enum modeset_restore { }; struct ddi_vbt_port_info { + /* + * This is an index in the HDMI/DVI DDI buffer translation table. + * The special value HDMI_LEVEL_SHIFT_UNKNOWN means the VBT didn't + * populate this field. + */ +#define HDMI_LEVEL_SHIFT_UNKNOWN 0xff uint8_t hdmi_level_shift; uint8_t supports_dvi:1; diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index a66955037e4e..031c5657255d 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -976,12 +976,10 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (bdb->version >= 158) { /* The VBT HDMI level shift values match the table we have. */ hdmi_level_shift = child->raw[7] & 0xF; - if (hdmi_level_shift < 0xC) { - DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", - port_name(port), - hdmi_level_shift); - info->hdmi_level_shift = hdmi_level_shift; - } + DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", + port_name(port), + hdmi_level_shift); + info->hdmi_level_shift = hdmi_level_shift; } } @@ -1114,8 +1112,7 @@ init_vbt_defaults(struct drm_i915_private *dev_priv) struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; - /* Recommended BSpec default: 800mV 0dB. */ - info->hdmi_level_shift = 6; + info->hdmi_level_shift = HDMI_LEVEL_SHIFT_UNKNOWN; info->supports_dvi = (port != PORT_A && port != PORT_E); info->supports_hdmi = info->supports_dvi; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 3634575534b4..3af8340b21ec 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -145,7 +145,7 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) { struct drm_i915_private *dev_priv = dev->dev_private; u32 reg; - int i; + int i, n_hdmi_entries, hdmi_800mV_0dB; int hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; const u32 *ddi_translations_fdi; const u32 *ddi_translations_dp; @@ -156,15 +156,21 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; ddi_translations_edp = bdw_ddi_translations_edp; + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_800mV_0dB = 6; } else if (IS_HASWELL(dev)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; ddi_translations_edp = hsw_ddi_translations_dp; + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_800mV_0dB = 6; } else { WARN(1, "ddi translation table missing\n"); ddi_translations_edp = bdw_ddi_translations_dp; ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_800mV_0dB = 6; } switch (port) { @@ -193,6 +199,12 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) I915_WRITE(reg, ddi_translations[i]); reg += 4; } + + /* Choose a good default if VBT is badly populated */ + if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || + hdmi_level >= n_hdmi_entries) + hdmi_level = hdmi_800mV_0dB; + /* Entry 9 is for HDMI: */ for (i = 0; i < 2; i++) { I915_WRITE(reg, hsw_ddi_translations_hdmi[hdmi_level * 2 + i]); From a26aa8baee6c274fc23efccf46e891e63c8d0a30 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 1 Aug 2014 11:07:55 +0100 Subject: [PATCH 058/640] drm/i915/bdw: Provide the BDW specific HDMI buffer translation table Among the changes, the tables has only 10 entries instead of 12 on HSW and the index the the 800mV/0dB entry has changed. Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 3af8340b21ec..75ac0b29aa3e 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -111,6 +111,20 @@ static const u32 bdw_ddi_translations_fdi[] = { 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ }; +static const u32 bdw_ddi_translations_hdmi[] = { + /* Idx NT mV diff T mV diff db */ + 0x00FFFFFF, 0x0007000E, /* 0: 400 400 0 */ + 0x00D75FFF, 0x000E000A, /* 1: 400 600 3.5 */ + 0x00BEFFFF, 0x00140006, /* 2: 400 800 6 */ + 0x00FFFFFF, 0x0009000D, /* 3: 450 450 0 */ + 0x00FFFFFF, 0x000E000A, /* 4: 600 600 0 */ + 0x00D7FFFF, 0x00140006, /* 5: 600 800 2.5 */ + 0x80CB2FFF, 0x001B0002, /* 6: 600 1000 4.5 */ + 0x00FFFFFF, 0x00140006, /* 7: 800 800 0 */ + 0x80E79FFF, 0x001B0002, /* 8: 800 1000 2 */ + 0x80FFFFFF, 0x001B0002, /* 9: 1000 1000 0 */ +}; + enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder) { struct drm_encoder *encoder = &intel_encoder->base; @@ -150,18 +164,21 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) const u32 *ddi_translations_fdi; const u32 *ddi_translations_dp; const u32 *ddi_translations_edp; + const u32 *ddi_translations_hdmi; const u32 *ddi_translations; if (IS_BROADWELL(dev)) { ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; ddi_translations_edp = bdw_ddi_translations_edp; - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_800mV_0dB = 6; + ddi_translations_hdmi = bdw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_800mV_0dB = 7; } else if (IS_HASWELL(dev)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; ddi_translations_edp = hsw_ddi_translations_dp; + ddi_translations_hdmi = hsw_ddi_translations_hdmi; n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); hdmi_800mV_0dB = 6; } else { @@ -169,8 +186,9 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) ddi_translations_edp = bdw_ddi_translations_dp; ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_800mV_0dB = 6; + ddi_translations_hdmi = bdw_ddi_translations_hdmi; + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_800mV_0dB = 7; } switch (port) { @@ -207,7 +225,7 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) /* Entry 9 is for HDMI: */ for (i = 0; i < 2; i++) { - I915_WRITE(reg, hsw_ddi_translations_hdmi[hdmi_level * 2 + i]); + I915_WRITE(reg, ddi_translations_hdmi[hdmi_level * 2 + i]); reg += 4; } } From 156ae28c9f327d2c026e91cfacb5e224bb760d66 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 1 Aug 2014 11:07:56 +0100 Subject: [PATCH 059/640] drm/i915/bdw: Remove the HDMI/DVI entry from the DP/eDP/FDI tables We always write entries 0 to 8 from the DDI translation tables and then entry 9 for HDMI/DVI with the help of the VBT. We then don't need the failsafe HDMI entry in the DP/eDP/FDI tables. Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 75ac0b29aa3e..694e1c6118aa 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -82,7 +82,6 @@ static const u32 bdw_ddi_translations_edp[] = { 0x00BEEFFF, 0x000A000C, 0x00FFFFFF, 0x0005000F, 0x00DB6FFF, 0x000A000C, - 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ }; static const u32 bdw_ddi_translations_dp[] = { @@ -95,7 +94,6 @@ static const u32 bdw_ddi_translations_dp[] = { 0x80CB2FFF, 0x001B0002, 0x00F7DFFF, 0x00180004, 0x80D75FFF, 0x001B0002, - 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ }; static const u32 bdw_ddi_translations_fdi[] = { @@ -108,7 +106,6 @@ static const u32 bdw_ddi_translations_fdi[] = { 0x00C30FFF, 0x000C0000, 0x00FFFFFF, 0x00070006, 0x00D75FFF, 0x000C0000, - 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ }; static const u32 bdw_ddi_translations_hdmi[] = { From ac921bdde92a354e8c59ea185dff26dc2611ee81 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 1 Aug 2014 11:07:57 +0100 Subject: [PATCH 060/640] drm/i915: Remove now useless comments about the translation values We used to carry a default HDMI value in entry 9, but this entry got removed for both HSW and BDW. Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 694e1c6118aa..ca1f9a8a7d03 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -33,7 +33,7 @@ * automatically adapt to HDMI connections as well */ static const u32 hsw_ddi_translations_dp[] = { - 0x00FFFFFF, 0x0006000E, /* DP parameters */ + 0x00FFFFFF, 0x0006000E, 0x00D75FFF, 0x0005000A, 0x00C30FFF, 0x00040006, 0x80AAAFFF, 0x000B0000, @@ -45,7 +45,7 @@ static const u32 hsw_ddi_translations_dp[] = { }; static const u32 hsw_ddi_translations_fdi[] = { - 0x00FFFFFF, 0x0007000E, /* FDI parameters */ + 0x00FFFFFF, 0x0007000E, 0x00D75FFF, 0x000F000A, 0x00C30FFF, 0x00060006, 0x00AAAFFF, 0x001E0000, @@ -73,7 +73,7 @@ static const u32 hsw_ddi_translations_hdmi[] = { }; static const u32 bdw_ddi_translations_edp[] = { - 0x00FFFFFF, 0x00000012, /* eDP parameters */ + 0x00FFFFFF, 0x00000012, 0x00EBAFFF, 0x00020011, 0x00C71FFF, 0x0006000F, 0x00AAAFFF, 0x000E000A, @@ -85,7 +85,7 @@ static const u32 bdw_ddi_translations_edp[] = { }; static const u32 bdw_ddi_translations_dp[] = { - 0x00FFFFFF, 0x0007000E, /* DP parameters */ + 0x00FFFFFF, 0x0007000E, 0x00D75FFF, 0x000E000A, 0x00BEFFFF, 0x00140006, 0x80B2CFFF, 0x001B0002, @@ -97,7 +97,7 @@ static const u32 bdw_ddi_translations_dp[] = { }; static const u32 bdw_ddi_translations_fdi[] = { - 0x00FFFFFF, 0x0001000E, /* FDI parameters */ + 0x00FFFFFF, 0x0001000E, 0x00D75FFF, 0x0004000A, 0x00C30FFF, 0x00070006, 0x00AAAFFF, 0x000C0000, From 7fad3594bf2c66843c7d1e09005ff845a94524ab Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 1 Aug 2014 16:19:54 -0300 Subject: [PATCH 061/640] drm/i915: remove duplicate register defines cat i915_reg.h | sort | uniq -d | grep define Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ca37febbc2ae..7f9a81087ebc 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3756,7 +3756,6 @@ enum punit_power_well { #define PIPE_VSYNC_INTERRUPT_STATUS (1UL<<9) #define PIPE_DISPLAY_LINE_COMPARE_STATUS (1UL<<8) #define PIPE_DPST_EVENT_STATUS (1UL<<7) -#define PIPE_LEGACY_BLC_EVENT_STATUS (1UL<<6) #define PIPE_A_PSR_STATUS_VLV (1UL<<6) #define PIPE_LEGACY_BLC_EVENT_STATUS (1UL<<6) #define PIPE_ODD_FIELD_INTERRUPT_STATUS (1UL<<5) @@ -5431,7 +5430,6 @@ enum punit_power_well { #define VLV_GTLC_ALLOWWAKEERR (1 << 1) #define VLV_GTLC_PW_MEDIA_STATUS_MASK (1 << 5) #define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7) -#define VLV_GTLC_SURVIVABILITY_REG 0x130098 #define FORCEWAKE_MT 0xa188 /* multi-threaded */ #define FORCEWAKE_KERNEL 0x1 #define FORCEWAKE_USER 0x2 From 4079b8d1c3e38b6f18fb31e2997fa25276feea07 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 5 Aug 2014 10:39:42 +0100 Subject: [PATCH 062/640] drm/i915: Demote the DRRS messages to debug messages While those messages are interesting, there aren't _that_ interesting. We don't need them in the kernel logs by default. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_dp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 95b972736733..3b88255d87dc 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4429,7 +4429,7 @@ intel_dp_drrs_init(struct intel_digital_port *intel_dig_port, } if (dev_priv->vbt.drrs_type != SEAMLESS_DRRS_SUPPORT) { - DRM_INFO("VBT doesn't support DRRS\n"); + DRM_DEBUG_KMS("VBT doesn't support DRRS\n"); return NULL; } @@ -4437,7 +4437,7 @@ intel_dp_drrs_init(struct intel_digital_port *intel_dig_port, (dev, fixed_mode, connector); if (!downclock_mode) { - DRM_INFO("DRRS not supported\n"); + DRM_DEBUG_KMS("DRRS not supported\n"); return NULL; } @@ -4448,7 +4448,7 @@ intel_dp_drrs_init(struct intel_digital_port *intel_dig_port, intel_dp->drrs_state.type = dev_priv->vbt.drrs_type; intel_dp->drrs_state.refresh_rate_type = DRRS_HIGH_RR; - DRM_INFO("seamless DRRS supported for eDP panel.\n"); + DRM_DEBUG_KMS("seamless DRRS supported for eDP panel.\n"); return downclock_mode; } From b2784e151903628a086d2ee12cf943690216cd6c Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Tue, 5 Aug 2014 11:29:37 +0100 Subject: [PATCH 063/640] drm/i915: Introduce a for_each_intel_encoder() macro Following the established idom, let's provide a macro to iterate through the encoders. spatch helps, once more, for the substitution: @@ iterator name list_for_each_entry; iterator name for_each_intel_encoder; struct intel_encoder * encoder; struct drm_device * dev; @@ -list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { +for_each_intel_encoder(dev, encoder) { ... } I also modified a few call sites by hand where a pointer to mode_config was directly used (to avoid overflowing 80 chars). Signed-off-by: Damien Lespiau [danvet: Wrap paramters correctly in the macro and remove spurious space checkpatch noticed.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 3 +- drivers/gpu/drm/i915/i915_drv.h | 5 +++ drivers/gpu/drm/i915/i915_irq.c | 8 ++-- drivers/gpu/drm/i915/intel_display.c | 59 ++++++++++------------------ drivers/gpu/drm/i915/intel_hdmi.c | 2 +- drivers/gpu/drm/i915/intel_lvds.c | 3 +- 6 files changed, 31 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index aea1a819c775..330caa1ab9f9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2708,8 +2708,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe, *source = INTEL_PIPE_CRC_SOURCE_PIPE; drm_modeset_lock_all(dev); - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (!encoder->base.crtc) continue; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0d7e55f2a8a2..73d2308e2377 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -171,6 +171,11 @@ enum hpd_pin { #define for_each_intel_crtc(dev, intel_crtc) \ list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head) +#define for_each_intel_encoder(dev, intel_encoder) \ + list_for_each_entry(intel_encoder, \ + &(dev)->mode_config.encoder_list, \ + base.head) + #define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \ list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \ if ((intel_encoder)->base.crtc == (__crtc)) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 379cfb5dc731..87abe8679495 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3522,18 +3522,17 @@ static void cherryview_irq_preinstall(struct drm_device *dev) static void ibx_hpd_irq_setup(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *intel_encoder; u32 hotplug_irqs, hotplug, enabled_irqs = 0; if (HAS_PCH_IBX(dev)) { hotplug_irqs = SDE_HOTPLUG_MASK; - list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head) + for_each_intel_encoder(dev, intel_encoder) if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED) enabled_irqs |= hpd_ibx[intel_encoder->hpd_pin]; } else { hotplug_irqs = SDE_HOTPLUG_MASK_CPT; - list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head) + for_each_intel_encoder(dev, intel_encoder) if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED) enabled_irqs |= hpd_cpt[intel_encoder->hpd_pin]; } @@ -4452,7 +4451,6 @@ static int i965_irq_postinstall(struct drm_device *dev) static void i915_hpd_irq_setup(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *intel_encoder; u32 hotplug_en; @@ -4463,7 +4461,7 @@ static void i915_hpd_irq_setup(struct drm_device *dev) hotplug_en &= ~HOTPLUG_INT_EN_MASK; /* Note HDMI and DP share hotplug bits */ /* enable bits are the same for all generations */ - list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head) + for_each_intel_encoder(dev, intel_encoder) if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED) hotplug_en |= hpd_mask_i915[intel_encoder->hpd_pin]; /* Programming the CRT detection parameters tends diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 60ba6962026b..f32a94544940 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6405,7 +6405,6 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, static void ironlake_init_pch_refclk(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *encoder; u32 val, final; bool has_lvds = false; @@ -6415,8 +6414,7 @@ static void ironlake_init_pch_refclk(struct drm_device *dev) bool can_ssc = false; /* We need to take the global config into account */ - list_for_each_entry(encoder, &mode_config->encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { switch (encoder->type) { case INTEL_OUTPUT_LVDS: has_panel = true; @@ -6723,11 +6721,10 @@ static void lpt_disable_clkout_dp(struct drm_device *dev) static void lpt_init_pch_refclk(struct drm_device *dev) { - struct drm_mode_config *mode_config = &dev->mode_config; struct intel_encoder *encoder; bool has_vga = false; - list_for_each_entry(encoder, &mode_config->encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { switch (encoder->type) { case INTEL_OUTPUT_ANALOG: has_vga = true; @@ -9902,8 +9899,7 @@ static void intel_modeset_update_staged_output_state(struct drm_device *dev) to_intel_encoder(connector->base.encoder); } - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { encoder->new_crtc = to_intel_crtc(encoder->base.crtc); } @@ -9934,8 +9930,7 @@ static void intel_modeset_commit_output_state(struct drm_device *dev) connector->base.encoder = &connector->new_encoder->base; } - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { encoder->base.crtc = &encoder->new_crtc->base; } @@ -10105,8 +10100,7 @@ static bool check_single_encoder_cloning(struct intel_crtc *crtc, struct drm_device *dev = crtc->base.dev; struct intel_encoder *source_encoder; - list_for_each_entry(source_encoder, - &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, source_encoder) { if (source_encoder->new_crtc != crtc) continue; @@ -10122,8 +10116,7 @@ static bool check_encoder_cloning(struct intel_crtc *crtc) struct drm_device *dev = crtc->base.dev; struct intel_encoder *encoder; - list_for_each_entry(encoder, - &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->new_crtc != crtc) continue; @@ -10207,8 +10200,7 @@ encoder_retry: * adjust it according to limitations or connector properties, and also * a chance to reject the mode entirely. */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (&encoder->new_crtc->base != crtc) continue; @@ -10286,8 +10278,7 @@ intel_modeset_affected_pipes(struct drm_crtc *crtc, unsigned *modeset_pipes, 1 << connector->new_encoder->new_crtc->pipe; } - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->base.crtc == &encoder->new_crtc->base) continue; @@ -10361,8 +10352,7 @@ intel_modeset_update_state(struct drm_device *dev, unsigned prepare_pipes) struct intel_crtc *intel_crtc; struct drm_connector *connector; - list_for_each_entry(intel_encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, intel_encoder) { if (!intel_encoder->base.crtc) continue; @@ -10636,8 +10626,7 @@ check_encoder_state(struct drm_device *dev) struct intel_encoder *encoder; struct intel_connector *connector; - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { bool enabled = false; bool active = false; enum pipe pipe, tracked_pipe; @@ -10716,8 +10705,7 @@ check_crtc_state(struct drm_device *dev) WARN(crtc->active && !crtc->base.enabled, "active crtc, but not enabled in sw tracking\n"); - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->base.crtc != &crtc->base) continue; enabled = true; @@ -10739,8 +10727,7 @@ check_crtc_state(struct drm_device *dev) if (crtc->pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) active = crtc->active; - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { enum pipe pipe; if (encoder->base.crtc != &crtc->base) continue; @@ -11108,7 +11095,7 @@ static void intel_set_config_restore_state(struct drm_device *dev, } count = 0; - list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { encoder->new_crtc = to_intel_crtc(config->save_encoder_crtcs[count++]); } @@ -11267,8 +11254,7 @@ intel_modeset_stage_output_state(struct drm_device *dev, } /* Check for any encoders that needs to be disabled. */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { int num_connectors = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, @@ -11301,9 +11287,7 @@ intel_modeset_stage_output_state(struct drm_device *dev, for_each_intel_crtc(dev, crtc) { crtc->new_enabled = false; - list_for_each_entry(encoder, - &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->new_crtc == crtc) { crtc->new_enabled = true; break; @@ -11340,7 +11324,7 @@ static void disable_crtc_nofb(struct intel_crtc *crtc) connector->new_encoder = NULL; } - list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->new_crtc == crtc) encoder->new_crtc = NULL; } @@ -11972,8 +11956,7 @@ static int intel_encoder_clones(struct intel_encoder *encoder) int index_mask = 0; int entry = 0; - list_for_each_entry(source_encoder, - &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, source_encoder) { if (encoders_cloneable(encoder, source_encoder)) index_mask |= (1 << entry); @@ -12162,7 +12145,7 @@ static void intel_setup_outputs(struct drm_device *dev) intel_edp_psr_init(dev); - list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { encoder->base.possible_crtcs = encoder->crtc_mask; encoder->base.possible_clones = intel_encoder_clones(encoder); @@ -13056,8 +13039,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS); } - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { pipe = 0; if (encoder->get_hw_state(encoder, &pipe)) { @@ -13121,8 +13103,7 @@ void intel_modeset_setup_hw_state(struct drm_device *dev, } /* HW state is read out, now we need to sanitize this mess. */ - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 5f47d359a991..9169786dbbc3 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -885,7 +885,7 @@ static bool hdmi_12bpc_possible(struct intel_crtc *crtc) if (HAS_GMCH_DISPLAY(dev)) return false; - list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->new_crtc != crtc) continue; diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 881361c0f27e..1987491723a5 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -823,8 +823,7 @@ bool intel_is_dual_link_lvds(struct drm_device *dev) struct intel_encoder *encoder; struct intel_lvds_encoder *lvds_encoder; - list_for_each_entry(encoder, &dev->mode_config.encoder_list, - base.head) { + for_each_intel_encoder(dev, encoder) { if (encoder->type == INTEL_OUTPUT_LVDS) { lvds_encoder = to_lvds_encoder(&encoder->base); From 76eebda727c76b5712f6ce75a45a9917d3873a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Aug 2014 11:26:52 +0530 Subject: [PATCH 064/640] drm/i915: Add 180 degree sprite rotation support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sprite planes (in fact all display planes starting from gen4) support 180 degree rotation. Add the relevant low level bits to the sprite code to make use of that feature. The upper layers are not yet plugged in. v2: HSW handles the rotated buffer offset automagically v3: BDW also handles the rotated buffer offset automagically Testcase: igt/kms_rotation_crc Cc: dri-devel@lists.freedesktop.org Signed-off-by: Ville Syrjälä Signed-off-by: Sagar Kamble Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_sprite.c | 38 +++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7f9a81087ebc..5ebac620bbcd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4218,6 +4218,7 @@ enum punit_power_well { #define DVS_YUV_ORDER_UYVY (1<<16) #define DVS_YUV_ORDER_YVYU (2<<16) #define DVS_YUV_ORDER_VYUY (3<<16) +#define DVS_ROTATE_180 (1<<15) #define DVS_DEST_KEY (1<<2) #define DVS_TRICKLE_FEED_DISABLE (1<<14) #define DVS_TILED (1<<10) @@ -4288,6 +4289,7 @@ enum punit_power_well { #define SPRITE_YUV_ORDER_UYVY (1<<16) #define SPRITE_YUV_ORDER_YVYU (2<<16) #define SPRITE_YUV_ORDER_VYUY (3<<16) +#define SPRITE_ROTATE_180 (1<<15) #define SPRITE_TRICKLE_FEED_DISABLE (1<<14) #define SPRITE_INT_GAMMA_ENABLE (1<<13) #define SPRITE_TILED (1<<10) @@ -4361,6 +4363,7 @@ enum punit_power_well { #define SP_YUV_ORDER_UYVY (1<<16) #define SP_YUV_ORDER_YVYU (2<<16) #define SP_YUV_ORDER_VYUY (3<<16) +#define SP_ROTATE_180 (1<<15) #define SP_TILED (1<<10) #define _SPALINOFF (VLV_DISPLAY_BASE + 0x72184) #define _SPASTRIDE (VLV_DISPLAY_BASE + 0x72188) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3198de3007be..b63df4416301 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -448,6 +448,7 @@ struct intel_plane { unsigned int crtc_w, crtc_h; uint32_t src_x, src_y; uint32_t src_w, src_h; + unsigned int rotation; /* Since we need to change the watermarks before/after * enabling/disabling the planes, we need to store the parameters here diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index d34a5696ffb6..f4d10c4b2781 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -164,6 +164,7 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc, sprctl &= ~SP_PIXFORMAT_MASK; sprctl &= ~SP_YUV_BYTE_ORDER_MASK; sprctl &= ~SP_TILED; + sprctl &= ~SP_ROTATE_180; switch (fb->pixel_format) { case DRM_FORMAT_YUYV: @@ -236,6 +237,14 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc, fb->pitches[0]); linear_offset -= sprsurf_offset; + if (intel_plane->rotation == BIT(DRM_ROTATE_180)) { + sprctl |= SP_ROTATE_180; + + x += src_w; + y += src_h; + linear_offset += src_h * fb->pitches[0] + src_w * pixel_size; + } + atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count); intel_update_primary_plane(intel_crtc); @@ -365,6 +374,7 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, sprctl &= ~SPRITE_RGB_ORDER_RGBX; sprctl &= ~SPRITE_YUV_BYTE_ORDER_MASK; sprctl &= ~SPRITE_TILED; + sprctl &= ~SPRITE_ROTATE_180; switch (fb->pixel_format) { case DRM_FORMAT_XBGR8888: @@ -427,6 +437,18 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, pixel_size, fb->pitches[0]); linear_offset -= sprsurf_offset; + if (intel_plane->rotation == BIT(DRM_ROTATE_180)) { + sprctl |= SPRITE_ROTATE_180; + + /* HSW and BDW does this automagically in hardware */ + if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) { + x += src_w; + y += src_h; + linear_offset += src_h * fb->pitches[0] + + src_w * pixel_size; + } + } + atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count); intel_update_primary_plane(intel_crtc); @@ -572,6 +594,7 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, dvscntr &= ~DVS_RGB_ORDER_XBGR; dvscntr &= ~DVS_YUV_BYTE_ORDER_MASK; dvscntr &= ~DVS_TILED; + dvscntr &= ~DVS_ROTATE_180; switch (fb->pixel_format) { case DRM_FORMAT_XBGR8888: @@ -629,6 +652,14 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, pixel_size, fb->pitches[0]); linear_offset -= dvssurf_offset; + if (intel_plane->rotation == BIT(DRM_ROTATE_180)) { + dvscntr |= DVS_ROTATE_180; + + x += src_w; + y += src_h; + linear_offset += src_h * fb->pitches[0] + src_w * pixel_size; + } + atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count); intel_update_primary_plane(intel_crtc); @@ -896,6 +927,9 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, max_scale = intel_plane->max_downscale << 16; min_scale = intel_plane->can_scale ? 1 : (1 << 16); + drm_rect_rotate(&src, fb->width << 16, fb->height << 16, + intel_plane->rotation); + hscale = drm_rect_calc_hscale_relaxed(&src, &dst, min_scale, max_scale); BUG_ON(hscale < 0); @@ -934,6 +968,9 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, drm_rect_width(&dst) * hscale - drm_rect_width(&src), drm_rect_height(&dst) * vscale - drm_rect_height(&src)); + drm_rect_rotate_inv(&src, fb->width << 16, fb->height << 16, + intel_plane->rotation); + /* sanity check to make sure the src viewport wasn't enlarged */ WARN_ON(src.x1 < (int) src_x || src.y1 < (int) src_y || @@ -1311,6 +1348,7 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane) intel_plane->pipe = pipe; intel_plane->plane = plane; + intel_plane->rotation = BIT(DRM_ROTATE_0); possible_crtcs = (1 << pipe); ret = drm_plane_init(dev, &intel_plane->base, possible_crtcs, &intel_plane_funcs, From e57465f35192246b6587c3bc89b5ed96a8fdfb00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Aug 2014 11:26:53 +0530 Subject: [PATCH 065/640] drm/i915: Make intel_plane_restore() return an error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Propagate the error from intel_update_plane() up through intel_plane_restore() to the caller. This will be used for rollback purposes when setting properties fails. Cc: dri-devel@lists.freedesktop.org Signed-off-by: Ville Syrjälä Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_sprite.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index b63df4416301..79782094c8f9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1086,7 +1086,7 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob); int intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane); void intel_flush_primary_plane(struct drm_i915_private *dev_priv, enum plane plane); -void intel_plane_restore(struct drm_plane *plane); +int intel_plane_restore(struct drm_plane *plane); void intel_plane_disable(struct drm_plane *plane); int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index f4d10c4b2781..611826209c99 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -1218,18 +1218,18 @@ out_unlock: return ret; } -void intel_plane_restore(struct drm_plane *plane) +int intel_plane_restore(struct drm_plane *plane) { struct intel_plane *intel_plane = to_intel_plane(plane); if (!plane->crtc || !plane->fb) - return; + return 0; - intel_update_plane(plane, plane->crtc, plane->fb, - intel_plane->crtc_x, intel_plane->crtc_y, - intel_plane->crtc_w, intel_plane->crtc_h, - intel_plane->src_x, intel_plane->src_y, - intel_plane->src_w, intel_plane->src_h); + return intel_update_plane(plane, plane->crtc, plane->fb, + intel_plane->crtc_x, intel_plane->crtc_y, + intel_plane->crtc_w, intel_plane->crtc_h, + intel_plane->src_x, intel_plane->src_y, + intel_plane->src_w, intel_plane->src_h); } void intel_plane_disable(struct drm_plane *plane) From 2a297cce2e775812e9d6ca84c3ab92cee5c38e25 Mon Sep 17 00:00:00 2001 From: Sonika Jindal Date: Tue, 5 Aug 2014 11:26:54 +0530 Subject: [PATCH 066/640] drm: Add rotation_property to mode_config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sonika Jindal Reviewed-by: Ville Syrjälä Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- include/drm/drm_crtc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index f1105d0da059..62f73bdbcc47 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -821,6 +821,7 @@ struct drm_mode_config { struct drm_property *dpms_property; struct drm_property *path_property; struct drm_property *plane_type_property; + struct drm_property *rotation_property; /* DVI-I properties */ struct drm_property *dvi_i_subconnector_property; From 7ed6eeeec60c98ec824e27a5052aa249d98bd63a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 5 Aug 2014 11:26:55 +0530 Subject: [PATCH 067/640] drm/i915: Add rotation property for sprites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sprite planes support 180 degree rotation. The lower layers are now in place, so hook in the standard rotation property to expose the feature to the users. v2: Moving rotation_property to mode_config Cc: dri-devel@lists.freedesktop.org Signed-off-by: Ville Syrjälä Signed-off-by: Sonika Jindal Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_sprite.c | 41 ++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 611826209c99..0bdb00b7c59c 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -1218,6 +1218,30 @@ out_unlock: return ret; } +static int intel_plane_set_property(struct drm_plane *plane, + struct drm_property *prop, + uint64_t val) +{ + struct drm_device *dev = plane->dev; + struct intel_plane *intel_plane = to_intel_plane(plane); + uint64_t old_val; + int ret = -ENOENT; + + if (prop == dev->mode_config.rotation_property) { + /* exactly one rotation angle please */ + if (hweight32(val & 0xf) != 1) + return -EINVAL; + + old_val = intel_plane->rotation; + intel_plane->rotation = val; + ret = intel_plane_restore(plane); + if (ret) + intel_plane->rotation = old_val; + } + + return ret; +} + int intel_plane_restore(struct drm_plane *plane) { struct intel_plane *intel_plane = to_intel_plane(plane); @@ -1244,6 +1268,7 @@ static const struct drm_plane_funcs intel_plane_funcs = { .update_plane = intel_update_plane, .disable_plane = intel_disable_plane, .destroy = intel_destroy_plane, + .set_property = intel_plane_set_property, }; static uint32_t ilk_plane_formats[] = { @@ -1354,8 +1379,22 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane) &intel_plane_funcs, plane_formats, num_plane_formats, false); - if (ret) + if (ret) { kfree(intel_plane); + goto out; + } + if (!dev->mode_config.rotation_property) + dev->mode_config.rotation_property = + drm_mode_create_rotation_property(dev, + BIT(DRM_ROTATE_0) | + BIT(DRM_ROTATE_180)); + + if (dev->mode_config.rotation_property) + drm_object_attach_property(&intel_plane->base.base, + dev->mode_config.rotation_property, + intel_plane->rotation); + + out: return ret; } From 9783de20967a59d7627772bf77fc8066c47bef79 Mon Sep 17 00:00:00 2001 From: Sonika Jindal Date: Tue, 5 Aug 2014 11:26:57 +0530 Subject: [PATCH 068/640] drm: Resetting rotation property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reset rotation property to 0. v2: Resetting after disabling the plane Signed-off-by: Sonika Jindal Reviewed-by: Ville Syrjälä Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_fb_helper.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 3144db9dc0f1..d139eddb3d61 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -345,10 +345,17 @@ static bool restore_fbdev_mode(struct drm_fb_helper *fb_helper) drm_warn_on_modeset_not_all_locked(dev); - list_for_each_entry(plane, &dev->mode_config.plane_list, head) + list_for_each_entry(plane, &dev->mode_config.plane_list, head) { if (plane->type != DRM_PLANE_TYPE_PRIMARY) drm_plane_force_disable(plane); + if (dev->mode_config.rotation_property) { + drm_object_property_set_value(&plane->base, + dev->mode_config.rotation_property, + BIT(DRM_ROTATE_0)); + } + } + for (i = 0; i < fb_helper->crtc_count; i++) { struct drm_mode_set *mode_set = &fb_helper->crtc_info[i].mode_set; struct drm_crtc *crtc = mode_set->crtc; From 0a56067469bde6662ce7c89a3d290171f878bac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 11 Jun 2014 16:51:18 +0300 Subject: [PATCH 069/640] drm/i915: Fill out the FWx watermark register defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add defines for all the watermark registers on modernish gmch platforms. VLV has increased the number of bits available for certain watermaks so expand the masks appropriately. Also vlv and chv have added some extra FW registers. Not sure what happened on chv because a new register called FW9 is now at the offset where FW7 was on vlv, while FW7 and FW8 (another new register) have been moved off somewhere else. Oh well, well just need two defines for FW7 then. v2: Fix DSPHOWM1 offset (Paulo) Reviewed-by: Paulo Zanoni Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 138 ++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_pm.c | 11 ++- 2 files changed, 130 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5ebac620bbcd..a87eb18b4c90 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3865,28 +3865,136 @@ enum punit_power_well { #define DSPARB_BEND_SHIFT 9 /* on 855 */ #define DSPARB_AEND_SHIFT 0 +/* pnv/gen4/g4x/vlv/chv */ #define DSPFW1 (dev_priv->info.display_mmio_offset + 0x70034) -#define DSPFW_SR_SHIFT 23 -#define DSPFW_SR_MASK (0x1ff<<23) -#define DSPFW_CURSORB_SHIFT 16 -#define DSPFW_CURSORB_MASK (0x3f<<16) -#define DSPFW_PLANEB_SHIFT 8 -#define DSPFW_PLANEB_MASK (0x7f<<8) -#define DSPFW_PLANEA_MASK (0x7f) +#define DSPFW_SR_SHIFT 23 +#define DSPFW_SR_MASK (0x1ff<<23) +#define DSPFW_CURSORB_SHIFT 16 +#define DSPFW_CURSORB_MASK (0x3f<<16) +#define DSPFW_PLANEB_SHIFT 8 +#define DSPFW_PLANEB_MASK (0x7f<<8) +#define DSPFW_PLANEB_MASK_VLV (0xff<<8) /* vlv/chv */ +#define DSPFW_PLANEA_SHIFT 0 +#define DSPFW_PLANEA_MASK (0x7f<<0) +#define DSPFW_PLANEA_MASK_VLV (0xff<<0) /* vlv/chv */ #define DSPFW2 (dev_priv->info.display_mmio_offset + 0x70038) -#define DSPFW_CURSORA_MASK 0x00003f00 -#define DSPFW_CURSORA_SHIFT 8 -#define DSPFW_PLANEC_MASK (0x7f) +#define DSPFW_FBC_SR_EN (1<<31) /* g4x */ +#define DSPFW_FBC_SR_SHIFT 28 +#define DSPFW_FBC_SR_MASK (0x7<<28) /* g4x */ +#define DSPFW_FBC_HPLL_SR_SHIFT 24 +#define DSPFW_FBC_HPLL_SR_MASK (0xf<<24) /* g4x */ +#define DSPFW_SPRITEB_SHIFT (16) +#define DSPFW_SPRITEB_MASK (0x7f<<16) /* g4x */ +#define DSPFW_SPRITEB_MASK_VLV (0xff<<16) /* vlv/chv */ +#define DSPFW_CURSORA_SHIFT 8 +#define DSPFW_CURSORA_MASK (0x3f<<8) +#define DSPFW_PLANEC_SHIFT_OLD 0 +#define DSPFW_PLANEC_MASK_OLD (0x7f<<0) /* pre-gen4 sprite C */ +#define DSPFW_SPRITEA_SHIFT 0 +#define DSPFW_SPRITEA_MASK (0x7f<<0) /* g4x */ +#define DSPFW_SPRITEA_MASK_VLV (0xff<<0) /* vlv/chv */ #define DSPFW3 (dev_priv->info.display_mmio_offset + 0x7003c) -#define DSPFW_HPLL_SR_EN (1<<31) -#define DSPFW_CURSOR_SR_SHIFT 24 +#define DSPFW_HPLL_SR_EN (1<<31) #define PINEVIEW_SELF_REFRESH_EN (1<<30) +#define DSPFW_CURSOR_SR_SHIFT 24 #define DSPFW_CURSOR_SR_MASK (0x3f<<24) #define DSPFW_HPLL_CURSOR_SHIFT 16 #define DSPFW_HPLL_CURSOR_MASK (0x3f<<16) -#define DSPFW_HPLL_SR_MASK (0x1ff) -#define DSPFW4 (dev_priv->info.display_mmio_offset + 0x70070) -#define DSPFW7 (dev_priv->info.display_mmio_offset + 0x7007c) +#define DSPFW_HPLL_SR_SHIFT 0 +#define DSPFW_HPLL_SR_MASK (0x1ff<<0) + +/* vlv/chv */ +#define DSPFW4 (VLV_DISPLAY_BASE + 0x70070) +#define DSPFW_SPRITEB_WM1_SHIFT 16 +#define DSPFW_SPRITEB_WM1_MASK (0xff<<16) +#define DSPFW_CURSORA_WM1_SHIFT 8 +#define DSPFW_CURSORA_WM1_MASK (0x3f<<8) +#define DSPFW_SPRITEA_WM1_SHIFT 0 +#define DSPFW_SPRITEA_WM1_MASK (0xff<<0) +#define DSPFW5 (VLV_DISPLAY_BASE + 0x70074) +#define DSPFW_PLANEB_WM1_SHIFT 24 +#define DSPFW_PLANEB_WM1_MASK (0xff<<24) +#define DSPFW_PLANEA_WM1_SHIFT 16 +#define DSPFW_PLANEA_WM1_MASK (0xff<<16) +#define DSPFW_CURSORB_WM1_SHIFT 8 +#define DSPFW_CURSORB_WM1_MASK (0x3f<<8) +#define DSPFW_CURSOR_SR_WM1_SHIFT 0 +#define DSPFW_CURSOR_SR_WM1_MASK (0x3f<<0) +#define DSPFW6 (VLV_DISPLAY_BASE + 0x70078) +#define DSPFW_SR_WM1_SHIFT 0 +#define DSPFW_SR_WM1_MASK (0x1ff<<0) +#define DSPFW7 (VLV_DISPLAY_BASE + 0x7007c) +#define DSPFW7_CHV (VLV_DISPLAY_BASE + 0x700b4) /* wtf #1? */ +#define DSPFW_SPRITED_WM1_SHIFT 24 +#define DSPFW_SPRITED_WM1_MASK (0xff<<24) +#define DSPFW_SPRITED_SHIFT 16 +#define DSPFW_SPRITED_MASK (0xff<<16) +#define DSPFW_SPRITEC_WM1_SHIFT 8 +#define DSPFW_SPRITEC_WM1_MASK (0xff<<8) +#define DSPFW_SPRITEC_SHIFT 0 +#define DSPFW_SPRITEC_MASK (0xff<<0) +#define DSPFW8_CHV (VLV_DISPLAY_BASE + 0x700b8) +#define DSPFW_SPRITEF_WM1_SHIFT 24 +#define DSPFW_SPRITEF_WM1_MASK (0xff<<24) +#define DSPFW_SPRITEF_SHIFT 16 +#define DSPFW_SPRITEF_MASK (0xff<<16) +#define DSPFW_SPRITEE_WM1_SHIFT 8 +#define DSPFW_SPRITEE_WM1_MASK (0xff<<8) +#define DSPFW_SPRITEE_SHIFT 0 +#define DSPFW_SPRITEE_MASK (0xff<<0) +#define DSPFW9_CHV (VLV_DISPLAY_BASE + 0x7007c) /* wtf #2? */ +#define DSPFW_PLANEC_WM1_SHIFT 24 +#define DSPFW_PLANEC_WM1_MASK (0xff<<24) +#define DSPFW_PLANEC_SHIFT 16 +#define DSPFW_PLANEC_MASK (0xff<<16) +#define DSPFW_CURSORC_WM1_SHIFT 8 +#define DSPFW_CURSORC_WM1_MASK (0x3f<<16) +#define DSPFW_CURSORC_SHIFT 0 +#define DSPFW_CURSORC_MASK (0x3f<<0) + +/* vlv/chv high order bits */ +#define DSPHOWM (VLV_DISPLAY_BASE + 0x70064) +#define DSPFW_SR_HI_SHIFT 24 +#define DSPFW_SR_HI_MASK (1<<24) +#define DSPFW_SPRITEF_HI_SHIFT 23 +#define DSPFW_SPRITEF_HI_MASK (1<<23) +#define DSPFW_SPRITEE_HI_SHIFT 22 +#define DSPFW_SPRITEE_HI_MASK (1<<22) +#define DSPFW_PLANEC_HI_SHIFT 21 +#define DSPFW_PLANEC_HI_MASK (1<<21) +#define DSPFW_SPRITED_HI_SHIFT 20 +#define DSPFW_SPRITED_HI_MASK (1<<20) +#define DSPFW_SPRITEC_HI_SHIFT 16 +#define DSPFW_SPRITEC_HI_MASK (1<<16) +#define DSPFW_PLANEB_HI_SHIFT 12 +#define DSPFW_PLANEB_HI_MASK (1<<12) +#define DSPFW_SPRITEB_HI_SHIFT 8 +#define DSPFW_SPRITEB_HI_MASK (1<<8) +#define DSPFW_SPRITEA_HI_SHIFT 4 +#define DSPFW_SPRITEA_HI_MASK (1<<4) +#define DSPFW_PLANEA_HI_SHIFT 0 +#define DSPFW_PLANEA_HI_MASK (1<<0) +#define DSPHOWM1 (VLV_DISPLAY_BASE + 0x70068) +#define DSPFW_SR_WM1_HI_SHIFT 24 +#define DSPFW_SR_WM1_HI_MASK (1<<24) +#define DSPFW_SPRITEF_WM1_HI_SHIFT 23 +#define DSPFW_SPRITEF_WM1_HI_MASK (1<<23) +#define DSPFW_SPRITEE_WM1_HI_SHIFT 22 +#define DSPFW_SPRITEE_WM1_HI_MASK (1<<22) +#define DSPFW_PLANEC_WM1_HI_SHIFT 21 +#define DSPFW_PLANEC_WM1_HI_MASK (1<<21) +#define DSPFW_SPRITED_WM1_HI_SHIFT 20 +#define DSPFW_SPRITED_WM1_HI_MASK (1<<20) +#define DSPFW_SPRITEC_WM1_HI_SHIFT 16 +#define DSPFW_SPRITEC_WM1_HI_MASK (1<<16) +#define DSPFW_PLANEB_WM1_HI_SHIFT 12 +#define DSPFW_PLANEB_WM1_HI_MASK (1<<12) +#define DSPFW_SPRITEB_WM1_HI_SHIFT 8 +#define DSPFW_SPRITEB_WM1_HI_MASK (1<<8) +#define DSPFW_SPRITEA_WM1_HI_SHIFT 4 +#define DSPFW_SPRITEA_WM1_HI_MASK (1<<4) +#define DSPFW_PLANEA_WM1_HI_SHIFT 0 +#define DSPFW_PLANEA_WM1_HI_MASK (1<<0) /* drain latency register values*/ #define DRAIN_LATENCY_PRECISION_32 32 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ab80df2909e0..0f9164d854de 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1400,7 +1400,7 @@ static void valleyview_update_wm(struct drm_crtc *crtc) (plane_sr << DSPFW_SR_SHIFT) | (cursorb_wm << DSPFW_CURSORB_SHIFT) | (planeb_wm << DSPFW_PLANEB_SHIFT) | - planea_wm); + (planea_wm << DSPFW_PLANEA_SHIFT)); I915_WRITE(DSPFW2, (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | (cursora_wm << DSPFW_CURSORA_SHIFT)); @@ -1457,7 +1457,7 @@ static void g4x_update_wm(struct drm_crtc *crtc) (plane_sr << DSPFW_SR_SHIFT) | (cursorb_wm << DSPFW_CURSORB_SHIFT) | (planeb_wm << DSPFW_PLANEB_SHIFT) | - planea_wm); + (planea_wm << DSPFW_PLANEA_SHIFT)); I915_WRITE(DSPFW2, (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | (cursora_wm << DSPFW_CURSORA_SHIFT)); @@ -1531,8 +1531,11 @@ static void i965_update_wm(struct drm_crtc *unused_crtc) /* 965 has limitations... */ I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) | - (8 << 16) | (8 << 8) | (8 << 0)); - I915_WRITE(DSPFW2, (8 << 8) | (8 << 0)); + (8 << DSPFW_CURSORB_SHIFT) | + (8 << DSPFW_PLANEB_SHIFT) | + (8 << DSPFW_PLANEA_SHIFT)); + I915_WRITE(DSPFW2, (8 << DSPFW_CURSORA_SHIFT) | + (8 << DSPFW_PLANEC_SHIFT_OLD)); /* update cursor SR watermark */ I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); From 1abc4dc7e2ca7886b6b06bcf2ff1451702c83f9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 26 Jun 2014 17:02:37 +0300 Subject: [PATCH 070/640] drm/i915: Parametrize VLV_DDL registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VLV/CHV DDL registers are uniform, and neatly enough the register offsets are sane so we can easily unify them to a single set of defines and just pass the pipe as the parameter to compute the register offset. Note that we now fill out the drain latency for pipe C on CHV which we didn't do before. The rest of the pipe C watermarks are still untouched but that will be remedied later by adding a proper cherryview_update_wm() function. v2: Add a note about CHV pipe C changes (Paulo) Reviewed-by: Paulo Zanoni Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 54 ++++++++------------------------- drivers/gpu/drm/i915/intel_pm.c | 44 ++++++++++++--------------- 2 files changed, 32 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a87eb18b4c90..fbdb08f4cd76 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3999,47 +3999,19 @@ enum punit_power_well { /* drain latency register values*/ #define DRAIN_LATENCY_PRECISION_32 32 #define DRAIN_LATENCY_PRECISION_64 64 -#define VLV_DDL1 (VLV_DISPLAY_BASE + 0x70050) -#define DDL_CURSORA_PRECISION_64 (1<<31) -#define DDL_CURSORA_PRECISION_32 (0<<31) -#define DDL_CURSORA_SHIFT 24 -#define DDL_SPRITEB_PRECISION_64 (1<<23) -#define DDL_SPRITEB_PRECISION_32 (0<<23) -#define DDL_SPRITEB_SHIFT 16 -#define DDL_SPRITEA_PRECISION_64 (1<<15) -#define DDL_SPRITEA_PRECISION_32 (0<<15) -#define DDL_SPRITEA_SHIFT 8 -#define DDL_PLANEA_PRECISION_64 (1<<7) -#define DDL_PLANEA_PRECISION_32 (0<<7) -#define DDL_PLANEA_SHIFT 0 - -#define VLV_DDL2 (VLV_DISPLAY_BASE + 0x70054) -#define DDL_CURSORB_PRECISION_64 (1<<31) -#define DDL_CURSORB_PRECISION_32 (0<<31) -#define DDL_CURSORB_SHIFT 24 -#define DDL_SPRITED_PRECISION_64 (1<<23) -#define DDL_SPRITED_PRECISION_32 (0<<23) -#define DDL_SPRITED_SHIFT 16 -#define DDL_SPRITEC_PRECISION_64 (1<<15) -#define DDL_SPRITEC_PRECISION_32 (0<<15) -#define DDL_SPRITEC_SHIFT 8 -#define DDL_PLANEB_PRECISION_64 (1<<7) -#define DDL_PLANEB_PRECISION_32 (0<<7) -#define DDL_PLANEB_SHIFT 0 - -#define VLV_DDL3 (VLV_DISPLAY_BASE + 0x70058) -#define DDL_CURSORC_PRECISION_64 (1<<31) -#define DDL_CURSORC_PRECISION_32 (0<<31) -#define DDL_CURSORC_SHIFT 24 -#define DDL_SPRITEF_PRECISION_64 (1<<23) -#define DDL_SPRITEF_PRECISION_32 (0<<23) -#define DDL_SPRITEF_SHIFT 16 -#define DDL_SPRITEE_PRECISION_64 (1<<15) -#define DDL_SPRITEE_PRECISION_32 (0<<15) -#define DDL_SPRITEE_SHIFT 8 -#define DDL_PLANEC_PRECISION_64 (1<<7) -#define DDL_PLANEC_PRECISION_32 (0<<7) -#define DDL_PLANEC_SHIFT 0 +#define VLV_DDL(pipe) (VLV_DISPLAY_BASE + 0x70050 + 4 * (pipe)) +#define DDL_CURSOR_PRECISION_64 (1<<31) +#define DDL_CURSOR_PRECISION_32 (0<<31) +#define DDL_CURSOR_SHIFT 24 +#define DDL_SPRITE1_PRECISION_64 (1<<23) +#define DDL_SPRITE1_PRECISION_32 (0<<23) +#define DDL_SPRITE1_SHIFT 16 +#define DDL_SPRITE0_PRECISION_64 (1<<15) +#define DDL_SPRITE0_PRECISION_32 (0<<15) +#define DDL_SPRITE0_SHIFT 8 +#define DDL_PLANE_PRECISION_64 (1<<7) +#define DDL_PLANE_PRECISION_32 (0<<7) +#define DDL_PLANE_SHIFT 0 /* FIFO watermark sizes etc */ #define G4X_FIFO_LINE_SIZE 64 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0f9164d854de..2573c6967559 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1313,35 +1313,29 @@ static bool vlv_compute_drain_latency(struct drm_device *dev, static void vlv_update_drain_latency(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int planea_prec, planea_dl, planeb_prec, planeb_dl; - int cursora_prec, cursora_dl, cursorb_prec, cursorb_dl; - int plane_prec_mult, cursor_prec_mult; /* Precision multiplier is - either 16 or 32 */ + enum pipe pipe; - /* For plane A, Cursor A */ - if (vlv_compute_drain_latency(dev, 0, &plane_prec_mult, &planea_dl, - &cursor_prec_mult, &cursora_dl)) { - cursora_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ? - DDL_CURSORA_PRECISION_32 : DDL_CURSORA_PRECISION_64; - planea_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ? - DDL_PLANEA_PRECISION_32 : DDL_PLANEA_PRECISION_64; + for_each_pipe(pipe) { + int plane_prec, plane_dl; + int cursor_prec, cursor_dl; + int plane_prec_mult, cursor_prec_mult; - I915_WRITE(VLV_DDL1, cursora_prec | - (cursora_dl << DDL_CURSORA_SHIFT) | - planea_prec | planea_dl); - } + if (!vlv_compute_drain_latency(dev, pipe, &plane_prec_mult, &plane_dl, + &cursor_prec_mult, &cursor_dl)) + continue; - /* For plane B, Cursor B */ - if (vlv_compute_drain_latency(dev, 1, &plane_prec_mult, &planeb_dl, - &cursor_prec_mult, &cursorb_dl)) { - cursorb_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ? - DDL_CURSORB_PRECISION_32 : DDL_CURSORB_PRECISION_64; - planeb_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ? - DDL_PLANEB_PRECISION_32 : DDL_PLANEB_PRECISION_64; + /* + * FIXME CHV spec still lists 16 and 32 as the precision + * values. Need to figure out if spec is outdated or what. + */ + cursor_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_64) ? + DDL_CURSOR_PRECISION_64 : DDL_CURSOR_PRECISION_32; + plane_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_64) ? + DDL_PLANE_PRECISION_64 : DDL_PLANE_PRECISION_32; - I915_WRITE(VLV_DDL2, cursorb_prec | - (cursorb_dl << DDL_CURSORB_SHIFT) | - planeb_prec | planeb_dl); + I915_WRITE(VLV_DDL(pipe), cursor_prec | + (cursor_dl << DDL_CURSOR_SHIFT) | + plane_prec | (plane_dl << DDL_PLANE_SHIFT)); } } From 41aad816d7061f6cc3d92f39fc655f034bbfb1c0 Mon Sep 17 00:00:00 2001 From: Gajanan Bhat Date: Wed, 16 Jul 2014 18:24:03 +0530 Subject: [PATCH 071/640] drm/i915: Update DDL only for current CRTC Instead of looping through all CRTCs, update DDL for current CRTC for which watermark is being updated. CHV is confirmed to have precision of 32/64 which is same as VLV. Reviewed-by: Imre Deak Signed-off-by: Gajanan Bhat Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2573c6967559..0feeae845f12 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1310,24 +1310,17 @@ static bool vlv_compute_drain_latency(struct drm_device *dev, * latency value. */ -static void vlv_update_drain_latency(struct drm_device *dev) +static void vlv_update_drain_latency(struct drm_crtc *crtc) { + struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; - enum pipe pipe; + enum pipe pipe = to_intel_crtc(crtc)->pipe; + int plane_prec, plane_dl; + int cursor_prec, cursor_dl; + int plane_prec_mult, cursor_prec_mult; - for_each_pipe(pipe) { - int plane_prec, plane_dl; - int cursor_prec, cursor_dl; - int plane_prec_mult, cursor_prec_mult; - - if (!vlv_compute_drain_latency(dev, pipe, &plane_prec_mult, &plane_dl, - &cursor_prec_mult, &cursor_dl)) - continue; - - /* - * FIXME CHV spec still lists 16 and 32 as the precision - * values. Need to figure out if spec is outdated or what. - */ + if (vlv_compute_drain_latency(dev, pipe, &plane_prec_mult, &plane_dl, + &cursor_prec_mult, &cursor_dl)) { cursor_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_64) ? DDL_CURSOR_PRECISION_64 : DDL_CURSOR_PRECISION_32; plane_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_64) ? @@ -1352,7 +1345,7 @@ static void valleyview_update_wm(struct drm_crtc *crtc) unsigned int enabled = 0; bool cxsr_enabled; - vlv_update_drain_latency(dev); + vlv_update_drain_latency(crtc); if (g4x_compute_wm0(dev, PIPE_A, &valleyview_wm_info, latency_ns, From 3c2777fd2faa5d1c1d5867baa086f2fd8b05479e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 26 Jun 2014 17:03:06 +0300 Subject: [PATCH 072/640] drm/i915: Add cherryview_update_wm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CHV has a third pipe so we need to compute the watermarks for its planes. Add cherryview_update_wm() to do just that. v2: Rebase on top of Imre's cxsr changes v3: Pass crtc to vlv_update_drain_latency() Reviewed-by: Paulo Zanoni Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 81 ++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0feeae845f12..da43e30ad60b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1399,6 +1399,85 @@ static void valleyview_update_wm(struct drm_crtc *crtc) intel_set_memory_cxsr(dev_priv, true); } +static void cherryview_update_wm(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + static const int sr_latency_ns = 12000; + struct drm_i915_private *dev_priv = dev->dev_private; + int planea_wm, planeb_wm, planec_wm; + int cursora_wm, cursorb_wm, cursorc_wm; + int plane_sr, cursor_sr; + int ignore_plane_sr, ignore_cursor_sr; + unsigned int enabled = 0; + bool cxsr_enabled; + + vlv_update_drain_latency(crtc); + + if (g4x_compute_wm0(dev, PIPE_A, + &valleyview_wm_info, latency_ns, + &valleyview_cursor_wm_info, latency_ns, + &planea_wm, &cursora_wm)) + enabled |= 1 << PIPE_A; + + if (g4x_compute_wm0(dev, PIPE_B, + &valleyview_wm_info, latency_ns, + &valleyview_cursor_wm_info, latency_ns, + &planeb_wm, &cursorb_wm)) + enabled |= 1 << PIPE_B; + + if (g4x_compute_wm0(dev, PIPE_C, + &valleyview_wm_info, latency_ns, + &valleyview_cursor_wm_info, latency_ns, + &planec_wm, &cursorc_wm)) + enabled |= 1 << PIPE_C; + + if (single_plane_enabled(enabled) && + g4x_compute_srwm(dev, ffs(enabled) - 1, + sr_latency_ns, + &valleyview_wm_info, + &valleyview_cursor_wm_info, + &plane_sr, &ignore_cursor_sr) && + g4x_compute_srwm(dev, ffs(enabled) - 1, + 2*sr_latency_ns, + &valleyview_wm_info, + &valleyview_cursor_wm_info, + &ignore_plane_sr, &cursor_sr)) { + cxsr_enabled = true; + } else { + cxsr_enabled = false; + intel_set_memory_cxsr(dev_priv, false); + plane_sr = cursor_sr = 0; + } + + DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, " + "B: plane=%d, cursor=%d, C: plane=%d, cursor=%d, " + "SR: plane=%d, cursor=%d\n", + planea_wm, cursora_wm, + planeb_wm, cursorb_wm, + planec_wm, cursorc_wm, + plane_sr, cursor_sr); + + I915_WRITE(DSPFW1, + (plane_sr << DSPFW_SR_SHIFT) | + (cursorb_wm << DSPFW_CURSORB_SHIFT) | + (planeb_wm << DSPFW_PLANEB_SHIFT) | + (planea_wm << DSPFW_PLANEA_SHIFT)); + I915_WRITE(DSPFW2, + (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) | + (cursora_wm << DSPFW_CURSORA_SHIFT)); + I915_WRITE(DSPFW3, + (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) | + (cursor_sr << DSPFW_CURSOR_SR_SHIFT)); + I915_WRITE(DSPFW9_CHV, + (I915_READ(DSPFW9_CHV) & ~(DSPFW_PLANEC_MASK | + DSPFW_CURSORC_MASK)) | + (planec_wm << DSPFW_PLANEC_SHIFT) | + (cursorc_wm << DSPFW_CURSORC_SHIFT)); + + if (cxsr_enabled) + intel_set_memory_cxsr(dev_priv, true); +} + static void g4x_update_wm(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -7119,7 +7198,7 @@ void intel_init_pm(struct drm_device *dev) else if (INTEL_INFO(dev)->gen == 8) dev_priv->display.init_clock_gating = gen8_init_clock_gating; } else if (IS_CHERRYVIEW(dev)) { - dev_priv->display.update_wm = valleyview_update_wm; + dev_priv->display.update_wm = cherryview_update_wm; dev_priv->display.init_clock_gating = cherryview_init_clock_gating; } else if (IS_VALLEYVIEW(dev)) { From 3dd7b97458e8aa2d8985b46622d226fa635071e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Jun 2014 19:49:57 +0300 Subject: [PATCH 073/640] drm/i915: Hack to tie both common lanes together on chv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It looks like frobbing the cmnreset line on pne PHY disturbs the other PHY on chv. The result is a black screen. On HDMI it's just a flash of black, but DP usually falls over and can't get back up. As a workaround set up the power domains so that both common lane wells power up and down together. I also tried leaving the cmnreset deasserted even the if the power well goes down but that didn't seem acceptable to the PHY. Reviewed-by: Rafael Barbalho Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index da43e30ad60b..aaae22815965 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6877,13 +6877,23 @@ static struct i915_power_well chv_power_wells[] = { #endif { .name = "dpio-common-bc", - .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS, + /* + * XXX: cmnreset for one PHY seems to disturb the other. + * As a workaround keep both powered on at the same + * time for now. + */ + .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS, .data = PUNIT_POWER_WELL_DPIO_CMN_BC, .ops = &chv_dpio_cmn_power_well_ops, }, { .name = "dpio-common-d", - .domains = CHV_DPIO_CMN_D_POWER_DOMAINS, + /* + * XXX: cmnreset for one PHY seems to disturb the other. + * As a workaround keep both powered on at the same + * time for now. + */ + .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS, .data = PUNIT_POWER_WELL_DPIO_CMN_D, .ops = &chv_dpio_cmn_power_well_ops, }, From efd814b73cfcf4ed1cb561561d8df6daccb3ceb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 27 Jun 2014 19:52:13 +0300 Subject: [PATCH 074/640] drm/i915: Polish the chv cmnlane resrt macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the semi-funky cmnlane assert/deassert macros with something a bit more conventional. Also protect the macro arguments properly (also for PHY_POWERGOOD()). Reviewed-by: Rafael Barbalho Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 7 ++----- drivers/gpu/drm/i915/intel_pm.c | 8 ++++---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fbdb08f4cd76..468ef09d698d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1697,12 +1697,9 @@ enum punit_power_well { #define DPIO_PHY_STATUS (VLV_DISPLAY_BASE + 0x6240) #define DPLL_PORTD_READY_MASK (0xf) #define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100) -#define PHY_COM_LANE_RESET_DEASSERT(phy, val) \ - ((phy == DPIO_PHY0) ? (val | 1) : (val | 2)) -#define PHY_COM_LANE_RESET_ASSERT(phy, val) \ - ((phy == DPIO_PHY0) ? (val & ~1) : (val & ~2)) +#define PHY_COM_LANE_RESET_DEASSERT(phy) (1 << (phy)) #define DISPLAY_PHY_STATUS (VLV_DISPLAY_BASE + 0x60104) -#define PHY_POWERGOOD(phy) ((phy == DPIO_PHY0) ? (1<<31) : (1<<30)) +#define PHY_POWERGOOD(phy) (((phy) == DPIO_PHY0) ? (1<<31) : (1<<30)) /* * The i830 generation, in LVDS mode, defines P1 as the bit number set within diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index aaae22815965..42bb329b2d05 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6359,8 +6359,8 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & PHY_POWERGOOD(phy), 1)) DRM_ERROR("Display PHY %d is not power up\n", phy); - I915_WRITE(DISPLAY_PHY_CONTROL, - PHY_COM_LANE_RESET_DEASSERT(phy, I915_READ(DISPLAY_PHY_CONTROL))); + I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) | + PHY_COM_LANE_RESET_DEASSERT(phy)); } static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, @@ -6380,8 +6380,8 @@ static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, assert_pll_disabled(dev_priv, PIPE_C); } - I915_WRITE(DISPLAY_PHY_CONTROL, - PHY_COM_LANE_RESET_ASSERT(phy, I915_READ(DISPLAY_PHY_CONTROL))); + I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) & + ~PHY_COM_LANE_RESET_DEASSERT(phy)); vlv_set_power_well(dev_priv, power_well, false); } From 692ef70c016b5035ad1942ccc2bc4040aa290044 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 5 Aug 2014 07:51:18 -0700 Subject: [PATCH 075/640] drm/i915: clean up PPGTT checking logic sanitize_enable_ppgtt is the function that checks all the conditions, honoring a forced ppgtt status or doing auto-detect as necessary. Just make sure it returns the right value in all cases and use that in the macros instead of the confusing intel_enable_ppgtt() function. Signed-off-by: Jesse Barnes Signed-off-by: Rodrigo Vivi [danvet: Don't reenable full ppgtt through the backdoor.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 11 ----------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 73d2308e2377..125a83c70768 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2065,8 +2065,8 @@ struct drm_i915_cmd_table { #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_GEN8(dev)) -#define USES_PPGTT(dev) intel_enable_ppgtt(dev, false) -#define USES_FULL_PPGTT(dev) intel_enable_ppgtt(dev, true) +#define USES_PPGTT(dev) (i915.enable_ppgtt) +#define USES_FULL_PPGTT(dev) (i915.enable_ppgtt == 2) #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1411613f2174..b4b7cfd226b7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -33,17 +33,6 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv); static void chv_setup_private_ppat(struct drm_i915_private *dev_priv); -bool intel_enable_ppgtt(struct drm_device *dev, bool full) -{ - if (i915.enable_ppgtt == 0) - return false; - - if (i915.enable_ppgtt == 1 && full) - return false; - - return true; -} - static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) { if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev)) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 8d6f7c18c404..666c938a51e3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -272,7 +272,6 @@ void i915_gem_init_global_gtt(struct drm_device *dev); void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); -bool intel_enable_ppgtt(struct drm_device *dev, bool full); int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); void i915_check_and_clear_faults(struct drm_device *dev); From e2fcdaa9c951c51d558fea2cc020d89b382d702e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 6 Aug 2014 14:02:51 +0300 Subject: [PATCH 076/640] drm/i915: Free pending page flip events at .preclose() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there are pending page flips when the fd gets closed those page flips may have events associated to them. When the page flip eventually completes it will queue the event to file_priv->event_list, but that may be too late and file_priv->event_list has already been cleaned up. Thus we leak a bit of kernel memory in the form of the event structure. To avoid such problems clear out such pending events from intel_crtc->unpin_work at ->preclose(). Any event that already made it to file_priv->event_list will get cleaned up by the drm_release_events() a bit later. We can ignore the file_priv->event_space accounting since file_priv is going away. This is already how drm core deals with pending vblank events, which are maintained by the drm core. What saves us from a total disaster (ie. dereferencing and alrady freed file_priv) is the fact that the fb descruction triggers a modeset and there we wait for pending flips. Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/i915/intel_drv.h | 1 + 3 files changed, 26 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 2e7f03ad5ee2..c965698a8bac 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1981,6 +1981,9 @@ void i915_driver_preclose(struct drm_device *dev, struct drm_file *file) i915_gem_context_close(dev, file); i915_gem_release(dev, file); mutex_unlock(&dev->struct_mutex); + + if (drm_core_check_feature(dev, DRIVER_MODESET)) + intel_modeset_preclose(dev, file); } void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f32a94544940..24295694e493 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13470,3 +13470,25 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m, err_printf(m, " VSYNC: %08x\n", error->transcoder[i].vsync); } } + +void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(dev, crtc) { + struct intel_unpin_work *work; + unsigned long irqflags; + + spin_lock_irqsave(&dev->event_lock, irqflags); + + work = crtc->unpin_work; + + if (work && work->event && + work->event->base.file_priv == file) { + kfree(work->event); + work->event = NULL; + } + + spin_unlock_irqrestore(&dev->event_lock, irqflags); + } +} diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 79782094c8f9..666ca8a044ea 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -892,6 +892,7 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_config *pipe_config); int intel_format_to_fourcc(int format); void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc); +void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file); /* intel_dp.c */ void intel_dp_init(struct drm_device *dev, int output_reg, enum port port); From 0948c2651413d56c90d7ee9c99d75bef82d4c351 Mon Sep 17 00:00:00 2001 From: Gajanan Bhat Date: Thu, 7 Aug 2014 01:58:24 +0530 Subject: [PATCH 077/640] drm/i915: Generalize drain latency computation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modify drain latency computation to use it for any plane. Same function can be used for primary, cursor and sprite planes. v2: Adressed review comments by Imre and Ville. - Moved clock round up in separate patch - Added WARN check for clock and pixel size - Simplified bit masking - Use cursor_base instead of reg read v3: Changed to bitwise shorthand operator for plane_dl assignment. Signed-off-by: Gajanan Bhat Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 83 +++++++++++++++++++-------------- 2 files changed, 49 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 468ef09d698d..d0cff5630569 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4009,6 +4009,7 @@ enum punit_power_well { #define DDL_PLANE_PRECISION_64 (1<<7) #define DDL_PLANE_PRECISION_32 (0<<7) #define DDL_PLANE_SHIFT 0 +#define DRAIN_LATENCY_MASK 0x7f /* FIFO watermark sizes etc */ #define G4X_FIFO_LINE_SIZE 64 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 42bb329b2d05..de27439636e8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1271,33 +1271,24 @@ static bool g4x_compute_srwm(struct drm_device *dev, display, cursor); } -static bool vlv_compute_drain_latency(struct drm_device *dev, - int plane, - int *plane_prec_mult, - int *plane_dl, - int *cursor_prec_mult, - int *cursor_dl) +static bool vlv_compute_drain_latency(struct drm_crtc *crtc, + int pixel_size, + int *prec_mult, + int *drain_latency) { - struct drm_crtc *crtc; - int clock, pixel_size; int entries; + int clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock; - crtc = intel_get_crtc_for_plane(dev, plane); - if (!intel_crtc_active(crtc)) + if (WARN(clock == 0, "Pixel clock is zero!\n")) return false; - clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock; - pixel_size = crtc->primary->fb->bits_per_pixel / 8; /* BPP */ + if (WARN(pixel_size == 0, "Pixel size is zero!\n")) + return false; entries = (clock / 1000) * pixel_size; - *plane_prec_mult = (entries > 128) ? - DRAIN_LATENCY_PRECISION_64 : DRAIN_LATENCY_PRECISION_32; - *plane_dl = (64 * (*plane_prec_mult) * 4) / entries; - - entries = (clock / 1000) * 4; /* BPP is always 4 for cursor */ - *cursor_prec_mult = (entries > 128) ? - DRAIN_LATENCY_PRECISION_64 : DRAIN_LATENCY_PRECISION_32; - *cursor_dl = (64 * (*cursor_prec_mult) * 4) / entries; + *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 : + DRAIN_LATENCY_PRECISION_32; + *drain_latency = (64 * (*prec_mult) * 4) / entries; return true; } @@ -1312,24 +1303,46 @@ static bool vlv_compute_drain_latency(struct drm_device *dev, static void vlv_update_drain_latency(struct drm_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - enum pipe pipe = to_intel_crtc(crtc)->pipe; - int plane_prec, plane_dl; - int cursor_prec, cursor_dl; - int plane_prec_mult, cursor_prec_mult; + struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pixel_size; + int drain_latency; + enum pipe pipe = intel_crtc->pipe; + int plane_prec, prec_mult, plane_dl; - if (vlv_compute_drain_latency(dev, pipe, &plane_prec_mult, &plane_dl, - &cursor_prec_mult, &cursor_dl)) { - cursor_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_64) ? - DDL_CURSOR_PRECISION_64 : DDL_CURSOR_PRECISION_32; - plane_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_64) ? - DDL_PLANE_PRECISION_64 : DDL_PLANE_PRECISION_32; + plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_64 | + DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_64 | + (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT)); - I915_WRITE(VLV_DDL(pipe), cursor_prec | - (cursor_dl << DDL_CURSOR_SHIFT) | - plane_prec | (plane_dl << DDL_PLANE_SHIFT)); + if (!intel_crtc_active(crtc)) { + I915_WRITE(VLV_DDL(pipe), plane_dl); + return; } + + /* Primary plane Drain Latency */ + pixel_size = crtc->primary->fb->bits_per_pixel / 8; /* BPP */ + if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { + plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? + DDL_PLANE_PRECISION_64 : + DDL_PLANE_PRECISION_32; + plane_dl |= plane_prec | drain_latency; + } + + /* Cursor Drain Latency + * BPP is always 4 for cursor + */ + pixel_size = 4; + + /* Program cursor DL only if it is enabled */ + if (intel_crtc->cursor_base && + vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) { + plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? + DDL_CURSOR_PRECISION_64 : + DDL_CURSOR_PRECISION_32; + plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT); + } + + I915_WRITE(VLV_DDL(pipe), plane_dl); } #define single_plane_enabled(mask) is_power_of_2(mask) From a398e9c79eea74ba2f3f24ac08902661682f008c Mon Sep 17 00:00:00 2001 From: Gajanan Bhat Date: Tue, 5 Aug 2014 23:15:54 +0530 Subject: [PATCH 078/640] drm/i915: Round-up clock and limit drain latency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round up clock computation and limit drain latency to maximum of 0x7F. Signed-off-by: Gajanan Bhat Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index de27439636e8..19bd7212f4a2 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1285,11 +1285,14 @@ static bool vlv_compute_drain_latency(struct drm_crtc *crtc, if (WARN(pixel_size == 0, "Pixel size is zero!\n")) return false; - entries = (clock / 1000) * pixel_size; + entries = DIV_ROUND_UP(clock, 1000) * pixel_size; *prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 : DRAIN_LATENCY_PRECISION_32; *drain_latency = (64 * (*prec_mult) * 4) / entries; + if (*drain_latency > DRAIN_LATENCY_MASK) + *drain_latency = DRAIN_LATENCY_MASK; + return true; } From 01e184cc85d4516cab0ecea7c2c43a2dd3ad432b Mon Sep 17 00:00:00 2001 From: Gajanan Bhat Date: Thu, 7 Aug 2014 17:03:30 +0530 Subject: [PATCH 079/640] drm/i915: Add sprite watermark programming for VLV and CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Program DDL register as part of sprite watermark programming for CHV and VLV. v2: Rename DRAIN_LATENCY_MAX by DRAIN_LATENCY_MASK v3: Addressed review comments by Ville - Changed Sprite DDL definitions to more generic to avoid multiple if-else - Changed bit masking to customary form - Changed to bitwise shorthand operator for sprite_dl assignment Signed-off-by: Gajanan Bhat Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 9 +++------ drivers/gpu/drm/i915/intel_pm.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d0cff5630569..7a6cc69cdc2b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4000,12 +4000,9 @@ enum punit_power_well { #define DDL_CURSOR_PRECISION_64 (1<<31) #define DDL_CURSOR_PRECISION_32 (0<<31) #define DDL_CURSOR_SHIFT 24 -#define DDL_SPRITE1_PRECISION_64 (1<<23) -#define DDL_SPRITE1_PRECISION_32 (0<<23) -#define DDL_SPRITE1_SHIFT 16 -#define DDL_SPRITE0_PRECISION_64 (1<<15) -#define DDL_SPRITE0_PRECISION_32 (0<<15) -#define DDL_SPRITE0_SHIFT 8 +#define DDL_SPRITE_PRECISION_64(sprite) (1<<(15+8*(sprite))) +#define DDL_SPRITE_PRECISION_32(sprite) (0<<(15+8*(sprite))) +#define DDL_SPRITE_SHIFT(sprite) (8+8*(sprite)) #define DDL_PLANE_PRECISION_64 (1<<7) #define DDL_PLANE_PRECISION_32 (0<<7) #define DDL_PLANE_SHIFT 0 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 19bd7212f4a2..41de760bf1d4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1494,6 +1494,37 @@ static void cherryview_update_wm(struct drm_crtc *crtc) intel_set_memory_cxsr(dev_priv, true); } +static void valleyview_update_sprite_wm(struct drm_plane *plane, + struct drm_crtc *crtc, + uint32_t sprite_width, + uint32_t sprite_height, + int pixel_size, + bool enabled, bool scaled) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe = to_intel_plane(plane)->pipe; + int sprite = to_intel_plane(plane)->plane; + int drain_latency; + int plane_prec; + int sprite_dl; + int prec_mult; + + sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_64(sprite) | + (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite))); + + if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, + &drain_latency)) { + plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ? + DDL_SPRITE_PRECISION_64(sprite) : + DDL_SPRITE_PRECISION_32(sprite); + sprite_dl |= plane_prec | + (drain_latency << DDL_SPRITE_SHIFT(sprite)); + } + + I915_WRITE(VLV_DDL(pipe), sprite_dl); +} + static void g4x_update_wm(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -7225,10 +7256,12 @@ void intel_init_pm(struct drm_device *dev) dev_priv->display.init_clock_gating = gen8_init_clock_gating; } else if (IS_CHERRYVIEW(dev)) { dev_priv->display.update_wm = cherryview_update_wm; + dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm; dev_priv->display.init_clock_gating = cherryview_init_clock_gating; } else if (IS_VALLEYVIEW(dev)) { dev_priv->display.update_wm = valleyview_update_wm; + dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm; dev_priv->display.init_clock_gating = valleyview_init_clock_gating; } else if (IS_PINEVIEW(dev)) { From 403bdd10c815029694046adf5ffde0577cbd2866 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 7 Aug 2014 16:05:39 +0200 Subject: [PATCH 080/640] drm/i915: No busy-loop wait_for in the ring init code Doing a 1s wait (tops) with the cpu is a bit excessive. Tune it down like everything else in that code. v2: Also insert the missing space Chris spotted. Cc: Naresh Kumar Kachhi Cc: Chris Wilson Acked-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 16371a444426..117543e58d48 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -476,8 +476,8 @@ static bool stop_ring(struct intel_engine_cs *ring) if (!IS_GEN2(ring->dev)) { I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); - if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { - DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); + if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { + DRM_ERROR("%s : timed out trying to stop ring\n", ring->name); return false; } } From 10f637bf292ba501f9b9e9df6dfe21d8fa521fbd Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 29 Jul 2014 13:47:11 +0200 Subject: [PATCH 081/640] drm: Add drm_plane/connector_index In the atomic state we'll have an array of states for crtcs, planes and connectors and need to be able to at them by their index. We already have a drm_crtc_index function so add the missing ones for planes and connectors. If it later on turns out that the list walking is too expensive we can add the index to the relevant modeset objects. Rob Clark doesn't like the loops too much, but we can always add an obj->idx parameter later on. And for now reiterating is actually safer since nowadays we have hotpluggable connectors (thanks to DP MST). v2: Fix embarrassing copypasta fail in kerneldoc and header declarations, spotted by Matt Roper. Cc: Matt Roper Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 46 ++++++++++++++++++++++++++++++++++++++ include/drm/drm_crtc.h | 2 ++ 2 files changed, 48 insertions(+) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 66d3bfb8d264..f3ef461deeb8 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1021,6 +1021,29 @@ void drm_connector_cleanup(struct drm_connector *connector) } EXPORT_SYMBOL(drm_connector_cleanup); +/** + * drm_connector_index - find the index of a registered connector + * @connector: connector to find index for + * + * Given a registered connector, return the index of that connector within a DRM + * device's list of connectors. + */ +unsigned int drm_connector_index(struct drm_connector *connector) +{ + unsigned int index = 0; + struct drm_connector *tmp; + + list_for_each_entry(tmp, &connector->dev->mode_config.connector_list, head) { + if (tmp == connector) + return index; + + index++; + } + + BUG(); +} +EXPORT_SYMBOL(drm_connector_index); + /** * drm_connector_register - register a connector * @connector: the connector to register @@ -1325,6 +1348,29 @@ void drm_plane_cleanup(struct drm_plane *plane) } EXPORT_SYMBOL(drm_plane_cleanup); +/** + * drm_plane_index - find the index of a registered plane + * @plane: plane to find index for + * + * Given a registered plane, return the index of that CRTC within a DRM + * device's list of planes. + */ +unsigned int drm_plane_index(struct drm_plane *plane) +{ + unsigned int index = 0; + struct drm_plane *tmp; + + list_for_each_entry(tmp, &plane->dev->mode_config.plane_list, head) { + if (tmp == plane) + return index; + + index++; + } + + BUG(); +} +EXPORT_SYMBOL(drm_plane_index); + /** * drm_plane_force_disable - Forcibly disable a plane * @plane: plane to disable diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index c530b4920a09..9f18e7022ab3 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -904,6 +904,7 @@ int drm_connector_register(struct drm_connector *connector); void drm_connector_unregister(struct drm_connector *connector); extern void drm_connector_cleanup(struct drm_connector *connector); +extern unsigned int drm_connector_index(struct drm_connector *connector); /* helper to unplug all connectors from sysfs for device */ extern void drm_connector_unplug_all(struct drm_device *dev); @@ -943,6 +944,7 @@ extern int drm_plane_init(struct drm_device *dev, const uint32_t *formats, uint32_t format_count, bool is_primary); extern void drm_plane_cleanup(struct drm_plane *plane); +extern unsigned int drm_plane_index(struct drm_plane *plane); extern void drm_plane_force_disable(struct drm_plane *plane); extern int drm_crtc_check_viewport(const struct drm_crtc *crtc, int x, int y, From a6a8bb848d5ca40bc0eb708ddeb23df2b0eca1fb Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 25 Jul 2014 17:47:18 +0200 Subject: [PATCH 082/640] drm: Move modeset_lock_all helpers to drm_modeset_lock.[hc] Somehow we've forgotten about this little bit of OCD. Reviewed-by: Dave Airlie Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 95 ------------------------------ drivers/gpu/drm/drm_modeset_lock.c | 95 ++++++++++++++++++++++++++++++ include/drm/drm_crtc.h | 4 -- include/drm/drm_modeset_lock.h | 5 ++ 4 files changed, 100 insertions(+), 99 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index f3ef461deeb8..caaa01f3b353 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -45,101 +45,6 @@ static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev, struct drm_mode_fb_cmd2 *r, struct drm_file *file_priv); -/** - * drm_modeset_lock_all - take all modeset locks - * @dev: drm device - * - * This function takes all modeset locks, suitable where a more fine-grained - * scheme isn't (yet) implemented. Locks must be dropped with - * drm_modeset_unlock_all. - */ -void drm_modeset_lock_all(struct drm_device *dev) -{ - struct drm_mode_config *config = &dev->mode_config; - struct drm_modeset_acquire_ctx *ctx; - int ret; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (WARN_ON(!ctx)) - return; - - mutex_lock(&config->mutex); - - drm_modeset_acquire_init(ctx, 0); - -retry: - ret = drm_modeset_lock(&config->connection_mutex, ctx); - if (ret) - goto fail; - ret = drm_modeset_lock_all_crtcs(dev, ctx); - if (ret) - goto fail; - - WARN_ON(config->acquire_ctx); - - /* now we hold the locks, so now that it is safe, stash the - * ctx for drm_modeset_unlock_all(): - */ - config->acquire_ctx = ctx; - - drm_warn_on_modeset_not_all_locked(dev); - - return; - -fail: - if (ret == -EDEADLK) { - drm_modeset_backoff(ctx); - goto retry; - } -} -EXPORT_SYMBOL(drm_modeset_lock_all); - -/** - * drm_modeset_unlock_all - drop all modeset locks - * @dev: device - * - * This function drop all modeset locks taken by drm_modeset_lock_all. - */ -void drm_modeset_unlock_all(struct drm_device *dev) -{ - struct drm_mode_config *config = &dev->mode_config; - struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx; - - if (WARN_ON(!ctx)) - return; - - config->acquire_ctx = NULL; - drm_modeset_drop_locks(ctx); - drm_modeset_acquire_fini(ctx); - - kfree(ctx); - - mutex_unlock(&dev->mode_config.mutex); -} -EXPORT_SYMBOL(drm_modeset_unlock_all); - -/** - * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked - * @dev: device - * - * Useful as a debug assert. - */ -void drm_warn_on_modeset_not_all_locked(struct drm_device *dev) -{ - struct drm_crtc *crtc; - - /* Locking is currently fubar in the panic handler. */ - if (oops_in_progress) - return; - - list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) - WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); - - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); - WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); -} -EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked); - /* Avoid boilerplate. I'm tired of typing. */ #define DRM_ENUM_NAME_FN(fnname, list) \ const char *fnname(int val) \ diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 0dc57d5ecd10..73e6534fd0aa 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -56,6 +56,101 @@ */ +/** + * drm_modeset_lock_all - take all modeset locks + * @dev: drm device + * + * This function takes all modeset locks, suitable where a more fine-grained + * scheme isn't (yet) implemented. Locks must be dropped with + * drm_modeset_unlock_all. + */ +void drm_modeset_lock_all(struct drm_device *dev) +{ + struct drm_mode_config *config = &dev->mode_config; + struct drm_modeset_acquire_ctx *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (WARN_ON(!ctx)) + return; + + mutex_lock(&config->mutex); + + drm_modeset_acquire_init(ctx, 0); + +retry: + ret = drm_modeset_lock(&config->connection_mutex, ctx); + if (ret) + goto fail; + ret = drm_modeset_lock_all_crtcs(dev, ctx); + if (ret) + goto fail; + + WARN_ON(config->acquire_ctx); + + /* now we hold the locks, so now that it is safe, stash the + * ctx for drm_modeset_unlock_all(): + */ + config->acquire_ctx = ctx; + + drm_warn_on_modeset_not_all_locked(dev); + + return; + +fail: + if (ret == -EDEADLK) { + drm_modeset_backoff(ctx); + goto retry; + } +} +EXPORT_SYMBOL(drm_modeset_lock_all); + +/** + * drm_modeset_unlock_all - drop all modeset locks + * @dev: device + * + * This function drop all modeset locks taken by drm_modeset_lock_all. + */ +void drm_modeset_unlock_all(struct drm_device *dev) +{ + struct drm_mode_config *config = &dev->mode_config; + struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx; + + if (WARN_ON(!ctx)) + return; + + config->acquire_ctx = NULL; + drm_modeset_drop_locks(ctx); + drm_modeset_acquire_fini(ctx); + + kfree(ctx); + + mutex_unlock(&dev->mode_config.mutex); +} +EXPORT_SYMBOL(drm_modeset_unlock_all); + +/** + * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked + * @dev: device + * + * Useful as a debug assert. + */ +void drm_warn_on_modeset_not_all_locked(struct drm_device *dev) +{ + struct drm_crtc *crtc; + + /* Locking is currently fubar in the panic handler. */ + if (oops_in_progress) + return; + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) + WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); + + WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); + WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); +} +EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked); + /** * drm_modeset_acquire_init - initialize acquire context * @ctx: the acquire context diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 9f18e7022ab3..a11d73422e7f 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -218,10 +218,6 @@ struct drm_property { struct list_head enum_blob_list; }; -void drm_modeset_lock_all(struct drm_device *dev); -void drm_modeset_unlock_all(struct drm_device *dev); -void drm_warn_on_modeset_not_all_locked(struct drm_device *dev); - struct drm_crtc; struct drm_connector; struct drm_encoder; diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h index 402aa7a6a058..cf61e857bc06 100644 --- a/include/drm/drm_modeset_lock.h +++ b/include/drm/drm_modeset_lock.h @@ -120,6 +120,11 @@ int drm_modeset_lock_interruptible(struct drm_modeset_lock *lock, void drm_modeset_unlock(struct drm_modeset_lock *lock); struct drm_device; + +void drm_modeset_lock_all(struct drm_device *dev); +void drm_modeset_unlock_all(struct drm_device *dev); +void drm_warn_on_modeset_not_all_locked(struct drm_device *dev); + int drm_modeset_lock_all_crtcs(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx); From d059f652e73c35678d28d4cd09ab2cec89696af9 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 25 Jul 2014 18:07:40 +0200 Subject: [PATCH 083/640] drm: Handle legacy per-crtc locking with full acquire ctx So drivers using the atomic interfaces expect that they can acquire additional locks internal to the driver as-needed. Examples would be locks to protect shared state like shared display PLLs. Unfortunately the legacy ioctls assume that all locking is fully done by the drm core. Now for those paths which grab all locks we already have to keep around an acquire context in dev->mode_config. Helper functions that implement legacy interfaces in terms of atomic support can therefore grab this acquire contexts and reuse it. The only interfaces left are the cursor and pageflip ioctls. So add functions to grab the crtc lock these need using an acquire context and preserve it for atomic drivers to reuse. v2: - Fixup comments&kerneldoc. - Drop the WARNING from modeset_lock_all_crtcs since that can be used in legacy paths with crtc locking. v3: Fix a type on the kerneldoc Dave spotted. Cc: Dave Airlie Reviewed-by: Dave Airlie Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 8 +-- drivers/gpu/drm/drm_modeset_lock.c | 84 ++++++++++++++++++++++++++++++ include/drm/drm_crtc.h | 6 +++ include/drm/drm_modeset_lock.h | 5 ++ 4 files changed, 99 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index caaa01f3b353..ab121b6d980c 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -2801,7 +2801,7 @@ static int drm_mode_cursor_common(struct drm_device *dev, if (crtc->cursor) return drm_mode_cursor_universal(crtc, req, file_priv); - drm_modeset_lock(&crtc->mutex, NULL); + drm_modeset_lock_crtc(crtc); if (req->flags & DRM_MODE_CURSOR_BO) { if (!crtc->funcs->cursor_set && !crtc->funcs->cursor_set2) { ret = -ENXIO; @@ -2825,7 +2825,7 @@ static int drm_mode_cursor_common(struct drm_device *dev, } } out: - drm_modeset_unlock(&crtc->mutex); + drm_modeset_unlock_crtc(crtc); return ret; @@ -4561,7 +4561,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, if (!crtc) return -ENOENT; - drm_modeset_lock(&crtc->mutex, NULL); + drm_modeset_lock_crtc(crtc); if (crtc->primary->fb == NULL) { /* The framebuffer is currently unbound, presumably * due to a hotplug event, that userspace has not @@ -4645,7 +4645,7 @@ out: drm_framebuffer_unreference(fb); if (old_fb) drm_framebuffer_unreference(old_fb); - drm_modeset_unlock(&crtc->mutex); + drm_modeset_unlock_crtc(crtc); return ret; } diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 73e6534fd0aa..4753c8bd5ab5 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -129,6 +129,90 @@ void drm_modeset_unlock_all(struct drm_device *dev) } EXPORT_SYMBOL(drm_modeset_unlock_all); +/** + * drm_modeset_lock_crtc - lock crtc with hidden acquire ctx + * @crtc: drm crtc + * + * This function locks the given crtc using a hidden acquire context. This is + * necessary so that drivers internally using the atomic interfaces can grab + * further locks with the lock acquire context. + */ +void drm_modeset_lock_crtc(struct drm_crtc *crtc) +{ + struct drm_modeset_acquire_ctx *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (WARN_ON(!ctx)) + return; + + drm_modeset_acquire_init(ctx, 0); + +retry: + ret = drm_modeset_lock(&crtc->mutex, ctx); + if (ret) + goto fail; + + WARN_ON(crtc->acquire_ctx); + + /* now we hold the locks, so now that it is safe, stash the + * ctx for drm_modeset_unlock_crtc(): + */ + crtc->acquire_ctx = ctx; + + return; + +fail: + if (ret == -EDEADLK) { + drm_modeset_backoff(ctx); + goto retry; + } +} +EXPORT_SYMBOL(drm_modeset_lock_crtc); + +/** + * drm_modeset_legacy_acquire_ctx - find acquire ctx for legacy ioctls + * crtc: drm crtc + * + * Legacy ioctl operations like cursor updates or page flips only have per-crtc + * locking, and store the acquire ctx in the corresponding crtc. All other + * legacy operations take all locks and use a global acquire context. This + * function grabs the right one. + */ +struct drm_modeset_acquire_ctx * +drm_modeset_legacy_acquire_ctx(struct drm_crtc *crtc) +{ + if (crtc->acquire_ctx) + return crtc->acquire_ctx; + + WARN_ON(!crtc->dev->mode_config.acquire_ctx); + + return crtc->dev->mode_config.acquire_ctx; +} +EXPORT_SYMBOL(drm_modeset_legacy_acquire_ctx); + +/** + * drm_modeset_unlock_crtc - drop crtc lock + * @crtc: drm crtc + * + * This drops the crtc lock acquire with drm_modeset_lock_crtc() and all other + * locks acquired through the hidden context. + */ +void drm_modeset_unlock_crtc(struct drm_crtc *crtc) +{ + struct drm_modeset_acquire_ctx *ctx = crtc->acquire_ctx; + + if (WARN_ON(!ctx)) + return; + + crtc->acquire_ctx = NULL; + drm_modeset_drop_locks(ctx); + drm_modeset_acquire_fini(ctx); + + kfree(ctx); +} +EXPORT_SYMBOL(drm_modeset_unlock_crtc); + /** * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked * @dev: device diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index a11d73422e7f..508817bae538 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -371,6 +371,12 @@ struct drm_crtc { void *helper_private; struct drm_object_properties properties; + + /* + * For legacy crtc ioctls so that atomic drivers can get at the locking + * acquire context. + */ + struct drm_modeset_acquire_ctx *acquire_ctx; }; diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h index cf61e857bc06..d38e1508f11a 100644 --- a/include/drm/drm_modeset_lock.h +++ b/include/drm/drm_modeset_lock.h @@ -120,10 +120,15 @@ int drm_modeset_lock_interruptible(struct drm_modeset_lock *lock, void drm_modeset_unlock(struct drm_modeset_lock *lock); struct drm_device; +struct drm_crtc; void drm_modeset_lock_all(struct drm_device *dev); void drm_modeset_unlock_all(struct drm_device *dev); +void drm_modeset_lock_crtc(struct drm_crtc *crtc); +void drm_modeset_unlock_crtc(struct drm_crtc *crtc); void drm_warn_on_modeset_not_all_locked(struct drm_device *dev); +struct drm_modeset_acquire_ctx * +drm_modeset_legacy_acquire_ctx(struct drm_crtc *crtc); int drm_modeset_lock_all_crtcs(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx); From 3d30a59bfcb7c96d4aacdb053c2ccc49394b2311 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 27 Jul 2014 13:42:42 +0200 Subject: [PATCH 084/640] drm: Move ->old_fb from crtc to plane Atomic implemenations for legacy ioctls must be able to drop locks. Which doesn't cause havoc since we only do that while constructing the new state, so no driver or hardware state change has happened. The only troubling bit is the fb refcounting the core does - if someone else has snuck in then it might potentially unref an outdated framebuffer. To fix that move the old_fb temporary storage into struct drm_plane for all ioctls, so that the atomic helpers can update it. v2: Fix up the error case handling as suggested by Matt Roper and just grab locks uncoditionally - there's no point in optimizing the locking for when userspace gets it wrong. Cc: Matt Roper Cc: Dave Airlie Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 46 ++++++++++++++++++++------------------ include/drm/drm_crtc.h | 8 +++---- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index ab121b6d980c..cacb460a7145 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -1287,19 +1287,21 @@ EXPORT_SYMBOL(drm_plane_index); */ void drm_plane_force_disable(struct drm_plane *plane) { - struct drm_framebuffer *old_fb = plane->fb; int ret; - if (!old_fb) + if (!plane->fb) return; + plane->old_fb = plane->fb; ret = plane->funcs->disable_plane(plane); if (ret) { DRM_ERROR("failed to disable plane with busy fb\n"); + plane->old_fb = NULL; return; } /* disconnect the plane from the fb and crtc: */ - __drm_framebuffer_unreference(old_fb); + __drm_framebuffer_unreference(plane->old_fb); + plane->old_fb = NULL; plane->fb = NULL; plane->crtc = NULL; } @@ -2275,23 +2277,21 @@ static int setplane_internal(struct drm_plane *plane, uint32_t src_w, uint32_t src_h) { struct drm_device *dev = plane->dev; - struct drm_framebuffer *old_fb = NULL; int ret = 0; unsigned int fb_width, fb_height; int i; + drm_modeset_lock_all(dev); /* No fb means shut it down */ if (!fb) { - drm_modeset_lock_all(dev); - old_fb = plane->fb; + plane->old_fb = plane->fb; ret = plane->funcs->disable_plane(plane); if (!ret) { plane->crtc = NULL; plane->fb = NULL; } else { - old_fb = NULL; + plane->old_fb = NULL; } - drm_modeset_unlock_all(dev); goto out; } @@ -2331,8 +2331,7 @@ static int setplane_internal(struct drm_plane *plane, goto out; } - drm_modeset_lock_all(dev); - old_fb = plane->fb; + plane->old_fb = plane->fb; ret = plane->funcs->update_plane(plane, crtc, fb, crtc_x, crtc_y, crtc_w, crtc_h, src_x, src_y, src_w, src_h); @@ -2341,15 +2340,16 @@ static int setplane_internal(struct drm_plane *plane, plane->fb = fb; fb = NULL; } else { - old_fb = NULL; + plane->old_fb = NULL; } - drm_modeset_unlock_all(dev); out: if (fb) drm_framebuffer_unreference(fb); - if (old_fb) - drm_framebuffer_unreference(old_fb); + if (plane->old_fb) + drm_framebuffer_unreference(plane->old_fb); + plane->old_fb = NULL; + drm_modeset_unlock_all(dev); return ret; @@ -2456,7 +2456,7 @@ int drm_mode_set_config_internal(struct drm_mode_set *set) * crtcs. Atomic modeset will have saner semantics ... */ list_for_each_entry(tmp, &crtc->dev->mode_config.crtc_list, head) - tmp->old_fb = tmp->primary->fb; + tmp->primary->old_fb = tmp->primary->fb; fb = set->fb; @@ -2469,8 +2469,9 @@ int drm_mode_set_config_internal(struct drm_mode_set *set) list_for_each_entry(tmp, &crtc->dev->mode_config.crtc_list, head) { if (tmp->primary->fb) drm_framebuffer_reference(tmp->primary->fb); - if (tmp->old_fb) - drm_framebuffer_unreference(tmp->old_fb); + if (tmp->primary->old_fb) + drm_framebuffer_unreference(tmp->primary->old_fb); + tmp->primary->old_fb = NULL; } return ret; @@ -4545,7 +4546,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, { struct drm_mode_crtc_page_flip *page_flip = data; struct drm_crtc *crtc; - struct drm_framebuffer *fb = NULL, *old_fb = NULL; + struct drm_framebuffer *fb = NULL; struct drm_pending_vblank_event *e = NULL; unsigned long flags; int ret = -EINVAL; @@ -4617,7 +4618,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, (void (*) (struct drm_pending_event *)) kfree; } - old_fb = crtc->primary->fb; + crtc->primary->old_fb = crtc->primary->fb; ret = crtc->funcs->page_flip(crtc, fb, e, page_flip->flags); if (ret) { if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) { @@ -4627,7 +4628,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, kfree(e); } /* Keep the old fb, don't unref it. */ - old_fb = NULL; + crtc->primary->old_fb = NULL; } else { /* * Warn if the driver hasn't properly updated the crtc->fb @@ -4643,8 +4644,9 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, out: if (fb) drm_framebuffer_unreference(fb); - if (old_fb) - drm_framebuffer_unreference(old_fb); + if (crtc->primary->old_fb) + drm_framebuffer_unreference(crtc->primary->old_fb); + crtc->primary->old_fb = NULL; drm_modeset_unlock_crtc(crtc); return ret; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 508817bae538..279565aa0c33 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -341,10 +341,6 @@ struct drm_crtc { int cursor_x; int cursor_y; - /* Temporary tracking of the old fb while a modeset is ongoing. Used - * by drm_mode_set_config_internal to implement correct refcounting. */ - struct drm_framebuffer *old_fb; - bool enabled; /* Requested mode from modesetting. */ @@ -623,6 +619,10 @@ struct drm_plane { struct drm_crtc *crtc; struct drm_framebuffer *fb; + /* Temporary tracking of the old fb while a modeset is ongoing. Used + * by drm_mode_set_config_internal to implement correct refcounting. */ + struct drm_framebuffer *old_fb; + const struct drm_plane_funcs *funcs; struct drm_object_properties properties; From cb597bb3a2fbfc871cc1c703fb330d247bd21394 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 27 Jul 2014 19:09:33 +0200 Subject: [PATCH 085/640] drm: trylock modest locking for fbdev panics In the fbdev code we want to do trylocks only to avoid deadlocks and other ugly issues. Thus far we've only grabbed the overall modeset lock, but that already failed to exclude a pile of potential concurrent operations. With proper atomic support this will be worse. So add a trylock mode to the modeset locking code which attempts all locks only with trylocks, if possible. We need to track this in the locking functions themselves and can't restrict this to drivers since driver-private w/w mutexes must be treated the same way. There's still the issue that other driver private locks aren't handled here at all, but well can't have everything. With this we will at least not regress, even once atomic allows lots of concurrent kms activity. Aside: We should move the acquire context to stack-based allocation in the callers to get rid of that awful WARN_ON(kmalloc_failed) control flow which just blows up when memory is short. But that's material for separate patches. v2: - Fix logic inversion fumble in the fb helper. - Add proper kerneldoc. Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_fb_helper.c | 10 +++--- drivers/gpu/drm/drm_modeset_lock.c | 56 +++++++++++++++++++++++------- include/drm/drm_modeset_lock.h | 6 ++++ 3 files changed, 55 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 3a6b6635e3f5..7b7b9565188f 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -365,11 +365,11 @@ static bool drm_fb_helper_force_kernel_mode(void) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) continue; - /* NOTE: we use lockless flag below to avoid grabbing other - * modeset locks. So just trylock the underlying mutex - * directly: + /* + * NOTE: Use trylock mode to avoid deadlocks and sleeping in + * panic context. */ - if (!mutex_trylock(&dev->mode_config.mutex)) { + if (__drm_modeset_lock_all(dev, true) != 0) { error = true; continue; } @@ -378,7 +378,7 @@ static bool drm_fb_helper_force_kernel_mode(void) if (ret) error = true; - mutex_unlock(&dev->mode_config.mutex); + drm_modeset_unlock_all(dev); } return error; } diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 4753c8bd5ab5..5280b64a0230 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -57,26 +57,37 @@ /** - * drm_modeset_lock_all - take all modeset locks - * @dev: drm device + * __drm_modeset_lock_all - internal helper to grab all modeset locks + * @dev: DRM device + * @trylock: trylock mode for atomic contexts * - * This function takes all modeset locks, suitable where a more fine-grained - * scheme isn't (yet) implemented. Locks must be dropped with - * drm_modeset_unlock_all. + * This is a special version of drm_modeset_lock_all() which can also be used in + * atomic contexts. Then @trylock must be set to true. + * + * Returns: + * 0 on success or negative error code on failure. */ -void drm_modeset_lock_all(struct drm_device *dev) +int __drm_modeset_lock_all(struct drm_device *dev, + bool trylock) { struct drm_mode_config *config = &dev->mode_config; struct drm_modeset_acquire_ctx *ctx; int ret; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (WARN_ON(!ctx)) - return; + ctx = kzalloc(sizeof(*ctx), + trylock ? GFP_ATOMIC : GFP_KERNEL); + if (!ctx) + return -ENOMEM; - mutex_lock(&config->mutex); + if (trylock) { + if (!mutex_trylock(&config->mutex)) + return -EBUSY; + } else { + mutex_lock(&config->mutex); + } drm_modeset_acquire_init(ctx, 0); + ctx->trylock_only = trylock; retry: ret = drm_modeset_lock(&config->connection_mutex, ctx); @@ -95,13 +106,29 @@ retry: drm_warn_on_modeset_not_all_locked(dev); - return; + return 0; fail: if (ret == -EDEADLK) { drm_modeset_backoff(ctx); goto retry; } + + return ret; +} +EXPORT_SYMBOL(__drm_modeset_lock_all); + +/** + * drm_modeset_lock_all - take all modeset locks + * @dev: drm device + * + * This function takes all modeset locks, suitable where a more fine-grained + * scheme isn't (yet) implemented. Locks must be dropped with + * drm_modeset_unlock_all. + */ +void drm_modeset_lock_all(struct drm_device *dev) +{ + WARN_ON(__drm_modeset_lock_all(dev, false) != 0); } EXPORT_SYMBOL(drm_modeset_lock_all); @@ -287,7 +314,12 @@ static inline int modeset_lock(struct drm_modeset_lock *lock, WARN_ON(ctx->contended); - if (interruptible && slow) { + if (ctx->trylock_only) { + if (!ww_mutex_trylock(&lock->mutex)) + return -EBUSY; + else + return 0; + } else if (interruptible && slow) { ret = ww_mutex_lock_slow_interruptible(&lock->mutex, &ctx->ww_ctx); } else if (interruptible) { ret = ww_mutex_lock_interruptible(&lock->mutex, &ctx->ww_ctx); diff --git a/include/drm/drm_modeset_lock.h b/include/drm/drm_modeset_lock.h index d38e1508f11a..a3f736d24382 100644 --- a/include/drm/drm_modeset_lock.h +++ b/include/drm/drm_modeset_lock.h @@ -53,6 +53,11 @@ struct drm_modeset_acquire_ctx { * list of held locks (drm_modeset_lock) */ struct list_head locked; + + /** + * Trylock mode, use only for panic handlers! + */ + bool trylock_only; }; /** @@ -123,6 +128,7 @@ struct drm_device; struct drm_crtc; void drm_modeset_lock_all(struct drm_device *dev); +int __drm_modeset_lock_all(struct drm_device *dev, bool trylock); void drm_modeset_unlock_all(struct drm_device *dev); void drm_modeset_lock_crtc(struct drm_crtc *crtc); void drm_modeset_unlock_crtc(struct drm_crtc *crtc); From 2c0827cffca8ac0c654b888c58a1989a5172f007 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 8 Aug 2014 20:44:59 +0200 Subject: [PATCH 086/640] drm/i915: Update DRIVER_DATE to 20140808 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 125a83c70768..fab97bc3215f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -53,7 +53,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20140725" +#define DRIVER_DATE "20140808" enum pipe { INVALID_PIPE = -1, From 4fa790421c10e5c9c62406655c06d97a94555d54 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 8 Aug 2014 19:25:57 +0100 Subject: [PATCH 087/640] drm/i915: Fix erroneous conversion to u8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adj was defined as u8. The issue is last_adj can be negative and adj is initialized with: adj = dev_priv->rps.last_adj; and we were also happily doing things like: if (adj < 0) (thank static analysers!) v2: Make new_delay an int in case we overflow the u8 in the intermediate computations. new_delay will get clamped at the end anyway. (Ville) Cc: Deepak S Cc: Ville Syrjälä Signed-off-by: Damien Lespiau Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 87abe8679495..f0d24db76e72 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1327,10 +1327,10 @@ static u32 vlv_c0_residency(struct drm_i915_private *dev_priv, * @dev_priv: DRM device private * */ -static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) +static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) { u32 residency_C0_up = 0, residency_C0_down = 0; - u8 new_delay, adj; + int new_delay, adj; dev_priv->rps.ei_interrupt_count++; From f45651bae2ee73ae551699d481f76aa6ad92138f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 8 Aug 2014 21:51:10 +0300 Subject: [PATCH 088/640] drm/i915: Eliminate rmw from .update_primary_plane() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the entire DSPCNTR register setup into the .update_primary_plane() functions. That's where it belongs anyway and it'll also help 830M which has the extra problem that plane registers reads will return the value latched at the last vblank, not the value that was last written. Also move DSPPOS and DSPSIZE setup there. v2: Don't move variable initialization to avoid churn later Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 100 +++++++++------------------ 1 file changed, 32 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 24295694e493..041fd76a2ded 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2388,12 +2388,26 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc, int plane = intel_crtc->plane; unsigned long linear_offset; u32 dspcntr; - u32 reg; + u32 reg = DSPCNTR(plane); + + dspcntr = DISPPLANE_GAMMA_ENABLE; + + if (intel_crtc->primary_enabled) + dspcntr |= DISPLAY_PLANE_ENABLE; + + if (INTEL_INFO(dev)->gen < 4) { + if (intel_crtc->pipe == PIPE_B) + dspcntr |= DISPPLANE_SEL_PIPE_B; + + /* pipesrc and dspsize control the size that is scaled from, + * which should always be the user's requested size. + */ + I915_WRITE(DSPSIZE(plane), + ((intel_crtc->config.pipe_src_h - 1) << 16) | + (intel_crtc->config.pipe_src_w - 1)); + I915_WRITE(DSPPOS(plane), 0); + } - reg = DSPCNTR(plane); - dspcntr = I915_READ(reg); - /* Mask out pixel format bits in case we change it */ - dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; switch (fb->pixel_format) { case DRM_FORMAT_C8: dspcntr |= DISPPLANE_8BPP; @@ -2425,12 +2439,9 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc, BUG(); } - if (INTEL_INFO(dev)->gen >= 4) { - if (obj->tiling_mode != I915_TILING_NONE) - dspcntr |= DISPPLANE_TILED; - else - dspcntr &= ~DISPPLANE_TILED; - } + if (INTEL_INFO(dev)->gen >= 4 && + obj->tiling_mode != I915_TILING_NONE) + dspcntr |= DISPPLANE_TILED; if (IS_G4X(dev)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; @@ -2474,12 +2485,16 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc, int plane = intel_crtc->plane; unsigned long linear_offset; u32 dspcntr; - u32 reg; + u32 reg = DSPCNTR(plane); + + dspcntr = DISPPLANE_GAMMA_ENABLE; + + if (intel_crtc->primary_enabled) + dspcntr |= DISPLAY_PLANE_ENABLE; + + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; - reg = DSPCNTR(plane); - dspcntr = I915_READ(reg); - /* Mask out pixel format bits in case we change it */ - dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; switch (fb->pixel_format) { case DRM_FORMAT_C8: dspcntr |= DISPPLANE_8BPP; @@ -2509,12 +2524,8 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc, if (obj->tiling_mode != I915_TILING_NONE) dspcntr |= DISPPLANE_TILED; - else - dspcntr &= ~DISPPLANE_TILED; - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - dspcntr &= ~DISPPLANE_TRICKLE_FEED_DISABLE; - else + if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; I915_WRITE(reg, dspcntr); @@ -3936,7 +3947,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - enum plane plane = intel_crtc->plane; WARN_ON(!crtc->enabled); @@ -3958,10 +3968,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) ironlake_set_pipeconf(crtc); - /* Set up the display plane register */ - I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); @@ -4049,7 +4055,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - enum plane plane = intel_crtc->plane; WARN_ON(!crtc->enabled); @@ -4073,10 +4078,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) intel_set_pipe_csc(crtc); - /* Set up the display plane register */ - I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE | DISPPLANE_PIPE_CSC_ENABLE); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); @@ -4632,9 +4633,7 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - int plane = intel_crtc->plane; bool is_dsi; - u32 dspcntr; WARN_ON(!crtc->enabled); @@ -4650,27 +4649,13 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) vlv_prepare_pll(intel_crtc); } - /* Set up the display plane register */ - dspcntr = DISPPLANE_GAMMA_ENABLE; - if (intel_crtc->config.has_dp_encoder) intel_dp_set_m_n(intel_crtc); intel_set_pipe_timings(intel_crtc); - /* pipesrc and dspsize control the size that is scaled from, - * which should always be the user's requested size. - */ - I915_WRITE(DSPSIZE(plane), - ((intel_crtc->config.pipe_src_h - 1) << 16) | - (intel_crtc->config.pipe_src_w - 1)); - I915_WRITE(DSPPOS(plane), 0); - i9xx_set_pipeconf(intel_crtc); - I915_WRITE(DSPCNTR(plane), dspcntr); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); @@ -4725,8 +4710,6 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; - int plane = intel_crtc->plane; - u32 dspcntr; WARN_ON(!crtc->enabled); @@ -4735,32 +4718,13 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) i9xx_set_pll_dividers(intel_crtc); - /* Set up the display plane register */ - dspcntr = DISPPLANE_GAMMA_ENABLE; - - if (pipe == 0) - dspcntr &= ~DISPPLANE_SEL_PIPE_MASK; - else - dspcntr |= DISPPLANE_SEL_PIPE_B; - if (intel_crtc->config.has_dp_encoder) intel_dp_set_m_n(intel_crtc); intel_set_pipe_timings(intel_crtc); - /* pipesrc and dspsize control the size that is scaled from, - * which should always be the user's requested size. - */ - I915_WRITE(DSPSIZE(plane), - ((intel_crtc->config.pipe_src_h - 1) << 16) | - (intel_crtc->config.pipe_src_w - 1)); - I915_WRITE(DSPPOS(plane), 0); - i9xx_set_pipeconf(intel_crtc); - I915_WRITE(DSPCNTR(plane), dspcntr); - POSTING_READ(DSPCNTR(plane)); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, crtc->x, crtc->y); From fdd508a6419217cce28213f3c9bd27c02a0d4c71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 8 Aug 2014 21:51:11 +0300 Subject: [PATCH 089/640] drm/i915: Call .update_primary_plane in intel_{enable, disable}_primary_hw_plane() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the intel_{enable,disable}_primary_hw_plane() simply call .update_primary_plane(), thus eliminating the rmw from these functions which should help the poor old 830M. Now we can also remove the .update_primary_plane() from the .crtc_enable() hooks because we end up calling it via intel_crtc_enable_planes()->intel_enable_primary_hw_plane(). This also has the nice benefit of making primary planes a bit closer to the way we handle sprite planes during modesets. v2: Just write 0 to DSPCNTR and DSPSURF/DSPADDR if the plane is (to be) disabled. Quicker, and more importantly avoids an oops when fb==NULL due to BIOS fb takeover failure. Pimp the commit message a bit (Matt) v3: Drop useless primary_enabled checks when setting DISPLAY_PLANE_ENABLE Reviewed-by: Matt Roper Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 119 +++++++++++---------------- 1 file changed, 49 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 041fd76a2ded..f306c91b74a5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2082,35 +2082,28 @@ void intel_flush_primary_plane(struct drm_i915_private *dev_priv, /** * intel_enable_primary_hw_plane - enable the primary plane on a given pipe - * @dev_priv: i915 private structure - * @plane: plane to enable - * @pipe: pipe being fed + * @plane: plane to be enabled + * @crtc: crtc for the plane * - * Enable @plane on @pipe, making sure that @pipe is running first. + * Enable @plane on @crtc, making sure that the pipe is running first. */ -static void intel_enable_primary_hw_plane(struct drm_i915_private *dev_priv, - enum plane plane, enum pipe pipe) +static void intel_enable_primary_hw_plane(struct drm_plane *plane, + struct drm_crtc *crtc) { - struct drm_device *dev = dev_priv->dev; - struct intel_crtc *intel_crtc = - to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); - int reg; - u32 val; + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); /* If the pipe isn't enabled, we can't pump pixels and may hang */ - assert_pipe_enabled(dev_priv, pipe); + assert_pipe_enabled(dev_priv, intel_crtc->pipe); if (intel_crtc->primary_enabled) return; intel_crtc->primary_enabled = true; - reg = DSPCNTR(plane); - val = I915_READ(reg); - WARN_ON(val & DISPLAY_PLANE_ENABLE); - - I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE); - intel_flush_primary_plane(dev_priv, plane); + dev_priv->display.update_primary_plane(crtc, plane->fb, + crtc->x, crtc->y); /* * BDW signals flip done immediately if the plane @@ -2123,31 +2116,27 @@ static void intel_enable_primary_hw_plane(struct drm_i915_private *dev_priv, /** * intel_disable_primary_hw_plane - disable the primary hardware plane - * @dev_priv: i915 private structure - * @plane: plane to disable - * @pipe: pipe consuming the data + * @plane: plane to be disabled + * @crtc: crtc for the plane * - * Disable @plane; should be an independent operation. + * Disable @plane on @crtc, making sure that the pipe is running first. */ -static void intel_disable_primary_hw_plane(struct drm_i915_private *dev_priv, - enum plane plane, enum pipe pipe) +static void intel_disable_primary_hw_plane(struct drm_plane *plane, + struct drm_crtc *crtc) { - struct intel_crtc *intel_crtc = - to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); - int reg; - u32 val; + struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + + assert_pipe_enabled(dev_priv, intel_crtc->pipe); if (!intel_crtc->primary_enabled) return; intel_crtc->primary_enabled = false; - reg = DSPCNTR(plane); - val = I915_READ(reg); - WARN_ON((val & DISPLAY_PLANE_ENABLE) == 0); - - I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE); - intel_flush_primary_plane(dev_priv, plane); + dev_priv->display.update_primary_plane(crtc, plane->fb, + crtc->x, crtc->y); } static bool need_vtd_wa(struct drm_device *dev) @@ -2390,10 +2379,19 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc, u32 dspcntr; u32 reg = DSPCNTR(plane); + if (!intel_crtc->primary_enabled) { + I915_WRITE(reg, 0); + if (INTEL_INFO(dev)->gen >= 4) + I915_WRITE(DSPSURF(plane), 0); + else + I915_WRITE(DSPADDR(plane), 0); + POSTING_READ(reg); + return; + } + dspcntr = DISPPLANE_GAMMA_ENABLE; - if (intel_crtc->primary_enabled) - dspcntr |= DISPLAY_PLANE_ENABLE; + dspcntr |= DISPLAY_PLANE_ENABLE; if (INTEL_INFO(dev)->gen < 4) { if (intel_crtc->pipe == PIPE_B) @@ -2487,10 +2485,16 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc, u32 dspcntr; u32 reg = DSPCNTR(plane); + if (!intel_crtc->primary_enabled) { + I915_WRITE(reg, 0); + I915_WRITE(DSPSURF(plane), 0); + POSTING_READ(reg); + return; + } + dspcntr = DISPPLANE_GAMMA_ENABLE; - if (intel_crtc->primary_enabled) - dspcntr |= DISPLAY_PLANE_ENABLE; + dspcntr |= DISPLAY_PLANE_ENABLE; if (IS_HASWELL(dev) || IS_BROADWELL(dev)) dspcntr |= DISPPLANE_PIPE_CSC_ENABLE; @@ -3884,14 +3888,12 @@ static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable) static void intel_crtc_enable_planes(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; - int plane = intel_crtc->plane; drm_vblank_on(dev, pipe); - intel_enable_primary_hw_plane(dev_priv, plane, pipe); + intel_enable_primary_hw_plane(crtc->primary, crtc); intel_enable_planes(crtc); intel_crtc_update_cursor(crtc, true); intel_crtc_dpms_overlay(intel_crtc, true); @@ -3928,7 +3930,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc) intel_crtc_dpms_overlay(intel_crtc, false); intel_crtc_update_cursor(crtc, false); intel_disable_planes(crtc); - intel_disable_primary_hw_plane(dev_priv, plane, pipe); + intel_disable_primary_hw_plane(crtc->primary, crtc); /* * FIXME: Once we grow proper nuclear flip support out of this we need @@ -3968,9 +3970,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc) ironlake_set_pipeconf(crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev, pipe, true); @@ -4078,9 +4077,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc) intel_set_pipe_csc(crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev, pipe, true); @@ -4629,7 +4625,6 @@ static void valleyview_modeset_global_resources(struct drm_device *dev) static void valleyview_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; @@ -4656,9 +4651,6 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc) i9xx_set_pipeconf(intel_crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; intel_set_cpu_fifo_underrun_reporting(dev, pipe, true); @@ -4706,7 +4698,6 @@ static void i9xx_set_pll_dividers(struct intel_crtc *crtc) static void i9xx_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; int pipe = intel_crtc->pipe; @@ -4725,9 +4716,6 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc) i9xx_set_pipeconf(intel_crtc); - dev_priv->display.update_primary_plane(crtc, crtc->primary->fb, - crtc->x, crtc->y); - intel_crtc->active = true; if (!IS_GEN2(dev)) @@ -11351,7 +11339,6 @@ static int intel_crtc_set_config(struct drm_mode_set *set) ret = intel_set_mode(set->crtc, set->mode, set->x, set->y, set->fb); } else if (config->fb_changed) { - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(set->crtc); intel_crtc_wait_for_pending_flips(set->crtc); @@ -11365,8 +11352,7 @@ static int intel_crtc_set_config(struct drm_mode_set *set) */ if (!intel_crtc->primary_enabled && ret == 0) { WARN_ON(!intel_crtc->active); - intel_enable_primary_hw_plane(dev_priv, intel_crtc->plane, - intel_crtc->pipe); + intel_enable_primary_hw_plane(set->crtc->primary, set->crtc); } /* @@ -11519,8 +11505,6 @@ static int intel_primary_plane_disable(struct drm_plane *plane) { struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_plane *intel_plane = to_intel_plane(plane); struct intel_crtc *intel_crtc; if (!plane->fb) @@ -11543,8 +11527,8 @@ intel_primary_plane_disable(struct drm_plane *plane) goto disable_unpin; intel_crtc_wait_for_pending_flips(plane->crtc); - intel_disable_primary_hw_plane(dev_priv, intel_plane->plane, - intel_plane->pipe); + intel_disable_primary_hw_plane(plane, plane->crtc); + disable_unpin: mutex_lock(&dev->struct_mutex); i915_gem_track_fb(intel_fb_obj(plane->fb), NULL, @@ -11564,9 +11548,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc, uint32_t src_w, uint32_t src_h) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->fb); struct drm_rect dest = { @@ -11653,9 +11635,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc, INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe)); if (intel_crtc->primary_enabled) - intel_disable_primary_hw_plane(dev_priv, - intel_plane->plane, - intel_plane->pipe); + intel_disable_primary_hw_plane(plane, crtc); if (plane->fb != fb) @@ -11672,8 +11652,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc, return ret; if (!intel_crtc->primary_enabled) - intel_enable_primary_hw_plane(dev_priv, intel_crtc->plane, - intel_crtc->pipe); + intel_enable_primary_hw_plane(plane, crtc); return 0; } From 22c59960d9fe72f3fbd28de69cc43c5522dd5fe6 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 8 Aug 2014 17:45:32 -0300 Subject: [PATCH 090/640] drm/i915: fix i915_interrupt_info on BDW Currently, if the machine is runtime suspended an you read the file, you will get an "Unclaimed register" error message. Testcase: igt/pm_rpm/debugfs-read Signed-off-by: Paulo Zanoni Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 330caa1ab9f9..3b7decbeeed3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -703,6 +703,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data) } for_each_pipe(pipe) { + if (!intel_display_power_enabled(dev_priv, + POWER_DOMAIN_PIPE(pipe))) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } seq_printf(m, "Pipe %c IMR:\t%08x\n", pipe_name(pipe), I915_READ(GEN8_DE_PIPE_IMR(pipe))); From 3bb11b536c1037143765b4efc8056600438df7f6 Mon Sep 17 00:00:00 2001 From: Sonika Jindal Date: Mon, 11 Aug 2014 09:06:39 +0530 Subject: [PATCH 091/640] drm/i915: Continuation of future readiness series Removing the check for HAS_PCH_SPLIT, it looks redundant here. Anyways all the platforms are checked separately. v2: Reordering as per the gen (Ville) Signed-off-by: Sonika Jindal Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 42 +++++++++++++--------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f306c91b74a5..0746590ed4e3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12354,29 +12354,27 @@ static void intel_init_display(struct drm_device *dev) dev_priv->display.get_display_clock_speed = i830_get_display_clock_speed; - if (HAS_PCH_SPLIT(dev)) { - if (IS_GEN5(dev)) { - dev_priv->display.fdi_link_train = ironlake_fdi_link_train; - dev_priv->display.write_eld = ironlake_write_eld; - } else if (IS_GEN6(dev)) { - dev_priv->display.fdi_link_train = gen6_fdi_link_train; - dev_priv->display.write_eld = ironlake_write_eld; - dev_priv->display.modeset_global_resources = - snb_modeset_global_resources; - } else if (IS_IVYBRIDGE(dev)) { - /* FIXME: detect B0+ stepping and use auto training */ - dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train; - dev_priv->display.write_eld = ironlake_write_eld; - dev_priv->display.modeset_global_resources = - ivb_modeset_global_resources; - } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { - dev_priv->display.fdi_link_train = hsw_fdi_link_train; - dev_priv->display.write_eld = haswell_write_eld; - dev_priv->display.modeset_global_resources = - haswell_modeset_global_resources; - } - } else if (IS_G4X(dev)) { + if (IS_G4X(dev)) { dev_priv->display.write_eld = g4x_write_eld; + } else if (IS_GEN5(dev)) { + dev_priv->display.fdi_link_train = ironlake_fdi_link_train; + dev_priv->display.write_eld = ironlake_write_eld; + } else if (IS_GEN6(dev)) { + dev_priv->display.fdi_link_train = gen6_fdi_link_train; + dev_priv->display.write_eld = ironlake_write_eld; + dev_priv->display.modeset_global_resources = + snb_modeset_global_resources; + } else if (IS_IVYBRIDGE(dev)) { + /* FIXME: detect B0+ stepping and use auto training */ + dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train; + dev_priv->display.write_eld = ironlake_write_eld; + dev_priv->display.modeset_global_resources = + ivb_modeset_global_resources; + } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { + dev_priv->display.fdi_link_train = hsw_fdi_link_train; + dev_priv->display.write_eld = haswell_write_eld; + dev_priv->display.modeset_global_resources = + haswell_modeset_global_resources; } else if (IS_VALLEYVIEW(dev)) { dev_priv->display.modeset_global_resources = valleyview_modeset_global_resources; From d6699dd3a7f696a80a5f8e5bb6ecf6ff6dd7c998 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 9 Aug 2014 16:29:31 +0100 Subject: [PATCH 092/640] drm/i915: Fix wrong number of HDMI translation entries I keep telling myself that those tables aren't great because their size is the number of dwords we need to program and not the number of entries (number of dwords = number of entries * 2). And... I got it wrong when I refactored the code. Fortunately, it was only wrong when the VBT table (or the code parsing it) is itself erroneous. Long story short, it shouldn't matter, but still, there's a potential array overflow and random programming of the DDI translation tables. Cc: Paulo Zanoni Signed-off-by: Damien Lespiau Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ca1f9a8a7d03..02d55843c78d 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -169,14 +169,14 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) ddi_translations_dp = bdw_ddi_translations_dp; ddi_translations_edp = bdw_ddi_translations_edp; ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi) / 2; hdmi_800mV_0dB = 7; } else if (IS_HASWELL(dev)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; ddi_translations_edp = hsw_ddi_translations_dp; ddi_translations_hdmi = hsw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi) / 2; hdmi_800mV_0dB = 6; } else { WARN(1, "ddi translation table missing\n"); @@ -184,7 +184,7 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) ddi_translations_fdi = bdw_ddi_translations_fdi; ddi_translations_dp = bdw_ddi_translations_dp; ddi_translations_hdmi = bdw_ddi_translations_hdmi; - n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi) / 2; hdmi_800mV_0dB = 7; } From dc8cd1e790081a31ba4d86c3c0812c348eeec7fc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 17:37:22 +0100 Subject: [PATCH 093/640] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt If an object is not bound into the global GTT, then it cannot be accessed via the GTT. This restores the original code that was muddled by ppGTT. In the process, we remove a WARN that had long outlived its usefulness and was simply being coded around instead. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 20743bd421f9..1be7e541a7c7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3592,11 +3592,12 @@ int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); uint32_t old_write_domain, old_read_domains; int ret; /* Not valid to be called on unbound objects. */ - if (!i915_gem_obj_bound_any(obj)) + if (vma == NULL) return -EINVAL; if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) @@ -3638,13 +3639,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) old_write_domain); /* And bump the LRU for this access */ - if (i915_gem_object_is_inactive(obj)) { - struct i915_vma *vma = i915_gem_obj_to_ggtt(obj); - if (vma) - list_move_tail(&vma->mm_list, - &dev_priv->gtt.base.inactive_list); - - } + if (i915_gem_object_is_inactive(obj)) + list_move_tail(&vma->mm_list, + &dev_priv->gtt.base.inactive_list); return 0; } @@ -3808,9 +3805,6 @@ static bool is_pin_display(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - if (list_empty(&obj->vma_list)) - return false; - vma = i915_gem_obj_to_ggtt(obj); if (!vma) return false; @@ -5253,12 +5247,6 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) { struct i915_vma *vma; - /* This WARN has probably outlived its usefulness (callers already - * WARN if they don't find the GGTT vma they expect). When removing, - * remember to remove the pre-check in is_pin_display() as well */ - if (WARN_ON(list_empty(&obj->vma_list))) - return NULL; - vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); if (vma->vm != obj_to_ggtt(obj)) return NULL; From e6a844687cf929ec053c7578d5ecc794a8a6c5cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Aug 2014 12:00:12 +0200 Subject: [PATCH 094/640] drm/i915: Force CPU relocations if not GTT mapped Move the decision on whether we need to have a mappable object during execbuffer to the fore and then reuse that decision by propagating the flag through to reservation. As a corollary, before doing the actual relocation through the GTT, we can make sure that we do have a GTT mapping through which to operate. Note that the key to make this work is to ditch the obj->map_and_fenceable unbind optimization - with full ppgtt it doesn't make a lot of sense any more anyway. v2: Revamp and resend to ease future patches. v3: Refresh patch rationale References: https://bugs.freedesktop.org/show_bug.cgi?id=81094 Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Daniel Vetter [danvet: Explain why obj->map_and_fenceable is key and split out the secure batch fix.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 8 +-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 59 +++++++++++----------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1be7e541a7c7..1ca2231e3929 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2928,9 +2928,8 @@ int i915_vma_unbind(struct i915_vma *vma) vma->unbind_vma(vma); list_del_init(&vma->mm_list); - /* Avoid an unnecessary call to unbind on rebind. */ if (i915_is_ggtt(vma->vm)) - obj->map_and_fenceable = true; + obj->map_and_fenceable = false; drm_mm_remove_node(&vma->node); i915_gem_vma_destroy(vma); @@ -3282,6 +3281,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; } } else if (enable) { + if (WARN_ON(!obj->map_and_fenceable)) + return -EINVAL; + reg = i915_find_fence_reg(dev); if (IS_ERR(reg)) return PTR_ERR(reg); @@ -4331,8 +4333,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->fence_reg = I915_FENCE_REG_NONE; obj->madv = I915_MADV_WILLNEED; - /* Avoid an unnecessary call to unbind on the first bind. */ - obj->map_and_fenceable = true; i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 60998fc4e5b2..6320a385841b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -35,6 +35,7 @@ #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) #define __EXEC_OBJECT_NEEDS_BIAS (1<<28) #define BATCH_OFFSET_BIAS (256*1024) @@ -534,14 +535,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb) return ret; } -static int -need_reloc_mappable(struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - return entry->relocation_count && !use_cpu_reloc(vma->obj) && - i915_is_ggtt(vma->vm); -} - static int i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct intel_engine_cs *ring, @@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; - bool need_fence; uint64_t flags; int ret; flags = 0; - - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - if (need_fence || need_reloc_mappable(vma)) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) flags |= PIN_MAPPABLE; - if (entry->flags & EXEC_OBJECT_NEEDS_GTT) flags |= PIN_GLOBAL; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) @@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, } static bool -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access) +need_reloc_mappable(struct i915_vma *vma) +{ + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + + if (entry->relocation_count == 0) + return false; + + if (!i915_is_ggtt(vma->vm)) + return false; + + /* See also use_cpu_reloc() */ + if (HAS_LLC(vma->obj->base.dev)) + return false; + + if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; + + return true; +} + +static bool +eb_vma_misplaced(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_object *obj = vma->obj; - bool need_fence, need_mappable; - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(vma); - - WARN_ON((need_mappable || need_fence) && + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !i915_is_ggtt(vma->vm)); if (entry->alignment && vma->node.start & (entry->alignment - 1)) return true; - if (need_mappable && !obj->map_and_fenceable) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) return true; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && @@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(vma); - if (need_mappable) + if (need_mappable) { + entry->flags |= __EXEC_OBJECT_NEEDS_MAP; list_move(&vma->exec_list, &ordered_vmas); - else + } else list_move_tail(&vma->exec_list, &ordered_vmas); obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; @@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, if (!drm_mm_node_allocated(&vma->node)) continue; - if (eb_vma_misplaced(vma, has_fenced_gpu_access)) + if (eb_vma_misplaced(vma)) ret = i915_vma_unbind(vma); else ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); From 82b6b6d786466e705e7244cc676189ce47a9199a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 17:37:24 +0100 Subject: [PATCH 095/640] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access This migrates the fence tracking onto the existing seqno infrastructure so that the later conversion to tracking via requests is simplified. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 7 ----- drivers/gpu/drm/i915/i915_gem.c | 17 ----------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 ++++++++++++---------- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- 4 files changed, 20 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fab97bc3215f..5c3f033ff928 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1777,13 +1777,6 @@ struct drm_i915_gem_object { * Only honoured if hardware has relevant pte bit */ unsigned long gt_ro:1; - - /* - * Is the GPU currently using a fence to access this buffer, - */ - unsigned int pending_fenced_gpu_access:1; - unsigned int fenced_gpu_access:1; - unsigned int cache_level:3; unsigned int has_aliasing_ppgtt_mapping:1; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1ca2231e3929..3eec344bdac0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2161,8 +2161,6 @@ static void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, struct intel_engine_cs *ring) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; u32 seqno = intel_ring_get_seqno(ring); BUG_ON(ring == NULL); @@ -2181,19 +2179,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, list_move_tail(&obj->ring_list, &ring->active_list); obj->last_read_seqno = seqno; - - if (obj->fenced_gpu_access) { - obj->last_fenced_seqno = seqno; - - /* Bump MRU to take account of the delayed flush */ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_fence_reg *reg; - - reg = &dev_priv->fence_regs[obj->fence_reg]; - list_move_tail(®->lru_list, - &dev_priv->mm.fence_list); - } - } } void i915_vma_move_to_active(struct i915_vma *vma, @@ -2229,7 +2214,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) obj->base.write_domain = 0; obj->last_fenced_seqno = 0; - obj->fenced_gpu_access = false; obj->active = 0; drm_gem_object_unreference(&obj->base); @@ -3174,7 +3158,6 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) obj->last_fenced_seqno = 0; } - obj->fenced_gpu_access = false; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6320a385841b..70946c551e5d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -542,7 +542,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, { struct drm_i915_gem_object *obj = vma->obj; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; uint64_t flags; int ret; @@ -560,17 +559,13 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, entry->flags |= __EXEC_OBJECT_HAS_PIN; - if (has_fenced_gpu_access) { - if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - ret = i915_gem_object_get_fence(obj); - if (ret) - return ret; + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + ret = i915_gem_object_get_fence(obj); + if (ret) + return ret; - if (i915_gem_object_pin_fence(obj)) - entry->flags |= __EXEC_OBJECT_HAS_FENCE; - - obj->pending_fenced_gpu_access = true; - } + if (i915_gem_object_pin_fence(obj)) + entry->flags |= __EXEC_OBJECT_HAS_FENCE; } if (entry->offset != vma->node.start) { @@ -658,8 +653,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj = vma->obj; entry = vma->exec_entry; + if (!has_fenced_gpu_access) + entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; need_fence = - has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(vma); @@ -672,7 +668,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; obj->base.pending_write_domain = 0; - obj->pending_fenced_gpu_access = false; } list_splice(&ordered_vmas, vmas); @@ -959,9 +954,11 @@ static void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct intel_engine_cs *ring) { + u32 seqno = intel_ring_get_seqno(ring); struct i915_vma *vma; list_for_each_entry(vma, vmas, exec_list) { + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; struct drm_i915_gem_object *obj = vma->obj; u32 old_read = obj->base.read_domains; u32 old_write = obj->base.write_domain; @@ -970,18 +967,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, if (obj->base.write_domain == 0) obj->base.pending_read_domains |= obj->base.read_domains; obj->base.read_domains = obj->base.pending_read_domains; - obj->fenced_gpu_access = obj->pending_fenced_gpu_access; i915_vma_move_to_active(vma, ring); if (obj->base.write_domain) { obj->dirty = 1; - obj->last_write_seqno = intel_ring_get_seqno(ring); + obj->last_write_seqno = seqno; intel_fb_obj_invalidate(obj, ring); /* update for the implicit flush after a batch */ obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; } + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + obj->last_fenced_seqno = seqno; + if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { + struct drm_i915_private *dev_priv = to_i915(ring->dev); + list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, + &dev_priv->mm.fence_list); + } + } trace_i915_gem_object_change_domain(obj, old_read, old_write); } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index cb150e8b4336..7e623bf097a1 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, if (ret == 0) { obj->fence_dirty = - obj->fenced_gpu_access || + obj->last_fenced_seqno || obj->fence_reg != I915_FENCE_REG_NONE; obj->tiling_mode = args->tiling_mode; From 87f1f46514babd40fc3551ca2d6148cdedd9c7e3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 19:18:42 +0100 Subject: [PATCH 096/640] drm/i915: Copy PCI device id into the device info block This is so that we can make the drm_i915_private->info always the preferred source for chipset type and feature queries. Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 5 ++-- drivers/gpu/drm/i915/i915_drv.h | 50 +++++++++++++++++---------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index c965698a8bac..1867e2619e73 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1603,9 +1603,10 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) dev->dev_private = dev_priv; dev_priv->dev = dev; - /* copy initial configuration to dev_priv->info */ + /* Setup the write-once "constant" device info */ device_info = (struct intel_device_info *)&dev_priv->info; - *device_info = *info; + memcpy(device_info, info, sizeof(dev_priv->info)); + device_info->device_id = dev->pdev->device; spin_lock_init(&dev_priv->irq_lock); spin_lock_init(&dev_priv->gpu_error.lock); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5c3f033ff928..8a55f07d80cb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -558,6 +558,7 @@ struct intel_uncore { struct intel_device_info { u32 display_mmio_offset; + u16 device_id; u8 num_pipes:3; u8 num_sprites[I915_MAX_PIPES]; u8 gen; @@ -1980,51 +1981,52 @@ struct drm_i915_cmd_table { int count; }; -#define INTEL_INFO(dev) (&to_i915(dev)->info) +#define INTEL_INFO(p) (&to_i915(p)->info) +#define INTEL_DEVID(p) (INTEL_INFO(p)->device_id) -#define IS_I830(dev) ((dev)->pdev->device == 0x3577) -#define IS_845G(dev) ((dev)->pdev->device == 0x2562) +#define IS_I830(dev) (INTEL_DEVID(dev) == 0x3577) +#define IS_845G(dev) (INTEL_DEVID(dev) == 0x2562) #define IS_I85X(dev) (INTEL_INFO(dev)->is_i85x) -#define IS_I865G(dev) ((dev)->pdev->device == 0x2572) +#define IS_I865G(dev) (INTEL_DEVID(dev) == 0x2572) #define IS_I915G(dev) (INTEL_INFO(dev)->is_i915g) -#define IS_I915GM(dev) ((dev)->pdev->device == 0x2592) -#define IS_I945G(dev) ((dev)->pdev->device == 0x2772) +#define IS_I915GM(dev) (INTEL_DEVID(dev) == 0x2592) +#define IS_I945G(dev) (INTEL_DEVID(dev) == 0x2772) #define IS_I945GM(dev) (INTEL_INFO(dev)->is_i945gm) #define IS_BROADWATER(dev) (INTEL_INFO(dev)->is_broadwater) #define IS_CRESTLINE(dev) (INTEL_INFO(dev)->is_crestline) -#define IS_GM45(dev) ((dev)->pdev->device == 0x2A42) +#define IS_GM45(dev) (INTEL_DEVID(dev) == 0x2A42) #define IS_G4X(dev) (INTEL_INFO(dev)->is_g4x) -#define IS_PINEVIEW_G(dev) ((dev)->pdev->device == 0xa001) -#define IS_PINEVIEW_M(dev) ((dev)->pdev->device == 0xa011) +#define IS_PINEVIEW_G(dev) (INTEL_DEVID(dev) == 0xa001) +#define IS_PINEVIEW_M(dev) (INTEL_DEVID(dev) == 0xa011) #define IS_PINEVIEW(dev) (INTEL_INFO(dev)->is_pineview) #define IS_G33(dev) (INTEL_INFO(dev)->is_g33) -#define IS_IRONLAKE_M(dev) ((dev)->pdev->device == 0x0046) +#define IS_IRONLAKE_M(dev) (INTEL_DEVID(dev) == 0x0046) #define IS_IVYBRIDGE(dev) (INTEL_INFO(dev)->is_ivybridge) -#define IS_IVB_GT1(dev) ((dev)->pdev->device == 0x0156 || \ - (dev)->pdev->device == 0x0152 || \ - (dev)->pdev->device == 0x015a) -#define IS_SNB_GT1(dev) ((dev)->pdev->device == 0x0102 || \ - (dev)->pdev->device == 0x0106 || \ - (dev)->pdev->device == 0x010A) +#define IS_IVB_GT1(dev) (INTEL_DEVID(dev) == 0x0156 || \ + INTEL_DEVID(dev) == 0x0152 || \ + INTEL_DEVID(dev) == 0x015a) +#define IS_SNB_GT1(dev) (INTEL_DEVID(dev) == 0x0102 || \ + INTEL_DEVID(dev) == 0x0106 || \ + INTEL_DEVID(dev) == 0x010A) #define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview) #define IS_CHERRYVIEW(dev) (INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev)) #define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell) #define IS_BROADWELL(dev) (!INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev)) #define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile) #define IS_HSW_EARLY_SDV(dev) (IS_HASWELL(dev) && \ - ((dev)->pdev->device & 0xFF00) == 0x0C00) + (INTEL_DEVID(dev) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev) (IS_BROADWELL(dev) && \ - (((dev)->pdev->device & 0xf) == 0x2 || \ - ((dev)->pdev->device & 0xf) == 0x6 || \ - ((dev)->pdev->device & 0xf) == 0xe)) + ((INTEL_DEVID(dev) & 0xf) == 0x2 || \ + (INTEL_DEVID(dev) & 0xf) == 0x6 || \ + (INTEL_DEVID(dev) & 0xf) == 0xe)) #define IS_HSW_ULT(dev) (IS_HASWELL(dev) && \ - ((dev)->pdev->device & 0xFF00) == 0x0A00) + (INTEL_DEVID(dev) & 0xFF00) == 0x0A00) #define IS_ULT(dev) (IS_HSW_ULT(dev) || IS_BDW_ULT(dev)) #define IS_HSW_GT3(dev) (IS_HASWELL(dev) && \ - ((dev)->pdev->device & 0x00F0) == 0x0020) + (INTEL_DEVID(dev) & 0x00F0) == 0x0020) /* ULX machines are also considered ULT. */ -#define IS_HSW_ULX(dev) ((dev)->pdev->device == 0x0A0E || \ - (dev)->pdev->device == 0x0A1E) +#define IS_HSW_ULX(dev) (INTEL_DEVID(dev) == 0x0A0E || \ + INTEL_DEVID(dev) == 0x0A1E) #define IS_PRELIMINARY_HW(intel_info) ((intel_info)->is_preliminary) /* From f6daaec29b2a201eb8db2ce26b4460b779ad8111 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 9 Aug 2014 23:00:56 +0100 Subject: [PATCH 097/640] drm/i915: Make intel_disable_shared_dpll() static Found with sparse. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0746590ed4e3..245cf4128314 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1797,7 +1797,7 @@ static void intel_enable_shared_dpll(struct intel_crtc *crtc) pll->on = true; } -void intel_disable_shared_dpll(struct intel_crtc *crtc) +static void intel_disable_shared_dpll(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; From 1bee20175f27b46427f10290fdd4a79334d41a60 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Sat, 9 Aug 2014 23:00:58 +0100 Subject: [PATCH 098/640] drm/i915: Remove set but unused 'gt_perf_status' Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 41de760bf1d4..12f4e143328c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3719,7 +3719,6 @@ static void gen6_enable_rps(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; u32 rp_state_cap; - u32 gt_perf_status; u32 rc6vids, pcu_mbox = 0, rc6_mask = 0; u32 gtfifodbg; int rc6_mode; @@ -3744,7 +3743,6 @@ static void gen6_enable_rps(struct drm_device *dev) gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); - gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS); parse_rp_state_cap(dev_priv, rp_state_cap); From 9bec9b1334d687c0a9fcf3d3a1987a61b4826a45 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Aug 2014 09:21:35 +0100 Subject: [PATCH 099/640] drm/i915: Double check ring is idle before declaring the GPU wedged During ring initialisation, sometimes we observe, though not in production hardware, that the idle flag is not set even though the ring is empty. Double check before giving up. Signed-off-by: Chris Wilson Cc: Damien Lespiau Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ringbuffer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 117543e58d48..a059b64a0fb2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -478,7 +478,12 @@ static bool stop_ring(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { DRM_ERROR("%s : timed out trying to stop ring\n", ring->name); - return false; + /* Sometimes we observe that the idle flag is not + * set even though the ring is empty. So double + * check before giving up. + */ + if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring)) + return false; } } From dbbe91279511d6a18a521b953a3c139e4787e660 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 Aug 2014 19:18:43 +0100 Subject: [PATCH 100/640] drm/i915: Agnostic INTEL_INFO Adapt the macro so that we can pass either the struct drm_device or the struct drm_i915_private pointers and get the answer we want. Over time, my plan is to convert all users over to using drm_i915_private and so trimming down the pointer dance. Having spent a few hours chasing that goal and achieved over 8k of object code saving, it appears to be a worthwhile target. This interim macro allows us to slowly convert over. Signed-off-by: Chris Wilson [danvet: Drop the (struct drm_device *) cast per the m-l discussion. Also explain the seemingly unecessary first cast.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 1867e2619e73..1763fbf34e1d 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1596,6 +1596,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!drm_core_check_feature(dev, DRIVER_MODESET) && !dev->agp) return -EINVAL; + /* For the ugly agnostic INTEL_INFO macro */ + BUILD_BUG_ON(sizeof(*dev_priv) == sizeof(*dev)); + dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); if (dev_priv == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8a55f07d80cb..6959c1b2d648 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1981,7 +1981,10 @@ struct drm_i915_cmd_table { int count; }; -#define INTEL_INFO(p) (&to_i915(p)->info) +/* Note that the (struct drm_i915_private *) cast is just to shut up gcc. */ +#define __I915__(p) ((sizeof(*(p)) == sizeof(struct drm_i915_private)) ? \ + (struct drm_i915_private *)(p) : to_i915(p)) +#define INTEL_INFO(p) (&__I915__(p)->info) #define INTEL_DEVID(p) (INTEL_INFO(p)->device_id) #define IS_I830(dev) (INTEL_DEVID(dev) == 0x3577) From da51a1e7e398129d9fddd4b26b8469145dd4fd08 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 12:08:58 +0200 Subject: [PATCH 101/640] drm/i915: Fix secure dispatch with full ppgtt Based upon a hunk from a patch from Chris Wilson, but augmented to: - Process the batch in the full ppgtt vm so that self-relocations match again with userspace's expectations.. - Add a comment why plain pin for the global gtt binding is safe at that point. v2: Drop local bind_vm variable (Chris). v3: Explain why this works despite the lack of proper active tracking for the ggtt batch vma. Cc: Chris Wilson Cc: Ben Widawsky Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 48 +++++++++++----------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 70946c551e5d..e1eac15b2583 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -95,7 +95,6 @@ eb_lookup_vmas(struct eb_vmas *eb, struct i915_address_space *vm, struct drm_file *file) { - struct drm_i915_private *dev_priv = vm->dev->dev_private; struct drm_i915_gem_object *obj; struct list_head objects; int i, ret; @@ -130,7 +129,6 @@ eb_lookup_vmas(struct eb_vmas *eb, i = 0; while (!list_empty(&objects)) { struct i915_vma *vma; - struct i915_address_space *bind_vm = vm; if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && USES_FULL_PPGTT(vm->dev)) { @@ -138,13 +136,6 @@ eb_lookup_vmas(struct eb_vmas *eb, goto err; } - /* If we have secure dispatch, or the userspace assures us that - * they know what they're doing, use the GGTT VM. - */ - if (((args->flags & I915_EXEC_SECURE) && - (i == (args->buffer_count - 1)))) - bind_vm = &dev_priv->gtt.base; - obj = list_first_entry(&objects, struct drm_i915_gem_object, obj_exec_link); @@ -157,7 +148,7 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm); + vma = i915_gem_obj_lookup_or_create_vma(obj, vm); if (IS_ERR(vma)) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); @@ -1391,25 +1382,36 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (flags & I915_DISPATCH_SECURE && - !batch_obj->has_global_gtt_mapping) { - /* When we have multiple VMs, we'll need to make sure that we - * allocate space first */ - struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); - BUG_ON(!vma); - vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); - } + if (flags & I915_DISPATCH_SECURE) { + /* + * So on first glance it looks freaky that we pin the batch here + * outside of the reservation loop. But: + * - The batch is already pinned into the relevant ppgtt, so we + * already have the backing storage fully allocated. + * - No other BO uses the global gtt (well contexts, but meh), + * so we don't really have issues with mutliple objects not + * fitting due to fragmentation. + * So this is actually safe. + */ + ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); + if (ret) + goto err; - if (flags & I915_DISPATCH_SECURE) exec_start += i915_gem_obj_ggtt_offset(batch_obj); - else + } else exec_start += i915_gem_obj_offset(batch_obj, vm); ret = legacy_ringbuffer_submission(dev, file, ring, ctx, - args, &eb->vmas, batch_obj, exec_start, flags); - if (ret) - goto err; + args, &eb->vmas, batch_obj, exec_start, flags); + /* + * FIXME: We crucially rely upon the active tracking for the (ppgtt) + * batch vma for correctness. For less ugly and less fragility this + * needs to be adjusted to also track the ggtt batch vma properly as + * active. + */ + if (flags & I915_DISPATCH_SECURE) + i915_gem_object_ggtt_unpin(batch_obj); err: /* the request owns the ref now */ i915_gem_context_unreference(ctx); From ad19f10bc2a5964f1564639e60953de76b7e50f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 Aug 2014 06:29:08 +0100 Subject: [PATCH 102/640] drm/i915: Pre-validate the NEED_GTTS flag for execbuffer We have an implementation requirement that precludes the user from requesting a ggtt entry when the device is operating in ppgtt mode. Move the current check from inside the execbuffer object collation to the prevalidation phase. v2: Roll both invalid flags checks into one Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e1eac15b2583..446f4b6fbefe 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -130,12 +130,6 @@ eb_lookup_vmas(struct eb_vmas *eb, while (!list_empty(&objects)) { struct i915_vma *vma; - if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && - USES_FULL_PPGTT(vm->dev)) { - ret = -EINVAL; - goto err; - } - obj = list_first_entry(&objects, struct drm_i915_gem_object, obj_exec_link); @@ -877,18 +871,24 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) } static int -validate_exec_list(struct drm_i915_gem_exec_object2 *exec, +validate_exec_list(struct drm_device *dev, + struct drm_i915_gem_exec_object2 *exec, int count) { - int i; unsigned relocs_total = 0; unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); + unsigned invalid_flags; + int i; + + invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; + if (USES_FULL_PPGTT(dev)) + invalid_flags |= EXEC_OBJECT_NEEDS_GTT; for (i = 0; i < count; i++) { char __user *ptr = to_user_ptr(exec[i].relocs_ptr); int length; /* limited by fault_in_pages_readable() */ - if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS) + if (exec[i].flags & invalid_flags) return -EINVAL; /* First check for malicious input causing overflow in @@ -1250,7 +1250,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (!i915_gem_check_execbuffer(args)) return -EINVAL; - ret = validate_exec_list(exec, args->buffer_count); + ret = validate_exec_list(dev, exec, args->buffer_count); if (ret) return ret; From 060e82c6f4ccf678decffb28ba8301ca9220a995 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 Aug 2014 06:29:10 +0100 Subject: [PATCH 103/640] drm/i915: Remove redundant list_empty(eb->vmas) tests in execbuffer Part of the pre-validation for an execbuffer call is that there is at least one object in the execlist. As we bail if we fail to lookup any object, we can be sure that after the eb_lookup_vma() there is at least one object in the vma list and so we do not need to assert. Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Daniel Vetter Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 446f4b6fbefe..6c07940b468c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -622,9 +622,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; int retry; - if (list_empty(vmas)) - return 0; - i915_gem_retire_requests_ring(ring); vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; @@ -725,9 +722,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, int i, total, ret; unsigned count = args->buffer_count; - if (WARN_ON(list_empty(&eb->vmas))) - return 0; - vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; /* We may process another execbuffer during the unlock... */ From 906843c3a1acc36407e500a073679c4207d307cd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 Aug 2014 06:29:11 +0100 Subject: [PATCH 104/640] drm/i915: Simplify relocate_entry_gtt() and make 64-bit safe Even though we should not try to use 4+GiB GTTs on 32-bit systems, by using a local variable we can future proof the code whilst making it easier to read. Signed-off-by: Chris Wilson [danvet: Appease checkpatch a bit.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++----------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6c07940b468c..dec2cc2fbd42 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -293,7 +293,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; uint64_t delta = reloc->delta + target_offset; - uint32_t __iomem *reloc_entry; + uint64_t offset; void __iomem *reloc_page; int ret; @@ -306,25 +306,24 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, return ret; /* Map the page containing the relocation we're going to perform. */ - reloc->offset += i915_gem_obj_ggtt_offset(obj); + offset = i915_gem_obj_ggtt_offset(obj); + offset += reloc->offset; reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, - reloc->offset & PAGE_MASK); - reloc_entry = (uint32_t __iomem *) - (reloc_page + offset_in_page(reloc->offset)); - iowrite32(lower_32_bits(delta), reloc_entry); + offset & PAGE_MASK); + iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); if (INTEL_INFO(dev)->gen >= 8) { - reloc_entry += 1; + offset += sizeof(uint32_t); - if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) { + if (offset_in_page(offset) == 0) { io_mapping_unmap_atomic(reloc_page); - reloc_page = io_mapping_map_atomic_wc( - dev_priv->gtt.mappable, - reloc->offset + sizeof(uint32_t)); - reloc_entry = reloc_page; + reloc_page = + io_mapping_map_atomic_wc(dev_priv->gtt.mappable, + offset); } - iowrite32(upper_32_bits(delta), reloc_entry); + iowrite32(upper_32_bits(delta), + reloc_page + offset_in_page(offset)); } io_mapping_unmap_atomic(reloc_page); From 2a0d7cfd9482ca4c10a4d8794791760a6a7ce40c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 29 Jul 2014 15:32:37 +0200 Subject: [PATCH 105/640] drm: Add a plane->reset hook In general having this can't hurt, and the atomic helpers will need it to be able to reset the state objects properly. The overall idea is to reset in the order pixels flow, so planes -> crtcs -> encoders -> connectors. v2: Squash in fixup from Ville to correctly deference struct drm_plane instead of drm_crtc when walking the plane list. Fixes an oops in driver init and resume. Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 5 +++++ include/drm/drm_crtc.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index cacb460a7145..285e62a134b2 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -4663,9 +4663,14 @@ out: void drm_mode_config_reset(struct drm_device *dev) { struct drm_crtc *crtc; + struct drm_plane *plane; struct drm_encoder *encoder; struct drm_connector *connector; + list_for_each_entry(plane, &dev->mode_config.plane_list, head) + if (plane->funcs->reset) + plane->funcs->reset(plane); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) if (crtc->funcs->reset) crtc->funcs->reset(crtc); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 279565aa0c33..2c1f58d6957a 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -581,6 +581,7 @@ struct drm_plane_funcs { uint32_t src_w, uint32_t src_h); int (*disable_plane)(struct drm_plane *plane); void (*destroy)(struct drm_plane *plane); + void (*reset)(struct drm_plane *plane); int (*set_property)(struct drm_plane *plane, struct drm_property *property, uint64_t val); From e8450f51a4b39cfe0878b4aee339820b2bfff240 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 25 Jul 2014 23:34:03 +0200 Subject: [PATCH 106/640] drm/irq: Implement a generic vblank_wait function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As usual in both a crtc index and a struct drm_crtc * version. The function assumes that no one drivers their display below 10Hz, and it will complain if the vblank wait takes longer than that. v2: Also check dev->max_vblank_counter since some drivers register a fake get_vblank_counter function. v3: Use drm_vblank_count instead of calling the low-level ->get_vblank_counter callback. That way we'll get the sw-cooked counter for platforms without proper vblank support and so can ditch the max_vblank_counter check again. v4: Review from Michel Dänzer: - Restore lost notes about v3: - Spelling in kerneldoc. - Inline wait_event condition. - s/vblank_wait/wait_one_vblank/ Cc: Michel Dänzer Cc: Ville Syrjälä Reviewed-by: Michel Dänzer Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_irq.c | 44 +++++++++++++++++++++++++++++++++++++++ include/drm/drmP.h | 2 ++ 2 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 6f16a104d6d0..e64d24951fc2 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1009,6 +1009,50 @@ void drm_crtc_vblank_put(struct drm_crtc *crtc) } EXPORT_SYMBOL(drm_crtc_vblank_put); +/** + * drm_wait_one_vblank - wait for one vblank + * @dev: DRM device + * @crtc: crtc index + * + * This waits for one vblank to pass on @crtc, using the irq driver interfaces. + * It is a failure to call this when the vblank irq for @crtc is disabled, e.g. + * due to lack of driver support or because the crtc is off. + */ +void drm_wait_one_vblank(struct drm_device *dev, int crtc) +{ + int ret; + u32 last; + + ret = drm_vblank_get(dev, crtc); + if (WARN_ON(ret)) + return; + + last = drm_vblank_count(dev, crtc); + + ret = wait_event_timeout(dev->vblank[crtc].queue, + last != drm_vblank_count(dev, crtc), + msecs_to_jiffies(100)); + + WARN_ON(ret == 0); + + drm_vblank_put(dev, crtc); +} +EXPORT_SYMBOL(drm_wait_one_vblank); + +/** + * drm_crtc_wait_one_vblank - wait for one vblank + * @crtc: DRM crtc + * + * This waits for one vblank to pass on @crtc, using the irq driver interfaces. + * It is a failure to call this when the vblank irq for @crtc is disabled, e.g. + * due to lack of driver support or because the crtc is off. + */ +void drm_crtc_wait_one_vblank(struct drm_crtc *crtc) +{ + drm_wait_one_vblank(crtc->dev, drm_crtc_index(crtc)); +} +EXPORT_SYMBOL(drm_crtc_wait_one_vblank); + /** * drm_vblank_off - disable vblank events on a CRTC * @dev: DRM device diff --git a/include/drm/drmP.h b/include/drm/drmP.h index d3d9be6b83ef..c2209178981f 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1327,6 +1327,8 @@ extern int drm_vblank_get(struct drm_device *dev, int crtc); extern void drm_vblank_put(struct drm_device *dev, int crtc); extern int drm_crtc_vblank_get(struct drm_crtc *crtc); extern void drm_crtc_vblank_put(struct drm_crtc *crtc); +extern void drm_wait_one_vblank(struct drm_device *dev, int crtc); +extern void drm_crtc_wait_one_vblank(struct drm_crtc *crtc); extern void drm_vblank_off(struct drm_device *dev, int crtc); extern void drm_vblank_on(struct drm_device *dev, int crtc); extern void drm_crtc_vblank_off(struct drm_crtc *crtc); From b20385f1f8434ec32d73414ffcadb7dcbd3a2a61 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:10 +0100 Subject: [PATCH 107/640] drm/i915/bdw: New source and header file for LRs, LRCs and Execlists Some legacy HW context code assumptions don't make sense for this new submission method, so we will place this stuff in a separate file. Note for reviewers: I've carefully considered the best name for this file and this was my best option (other possibilities were intel_lr_context.c or intel_execlist.c). I am open to a certain bikeshedding on this matter, anyway. And some point in time, it would be a good idea to split intel_lrc.c/.h even further, but for the moment just shove everything together. v2: Change to intel_lrc.c v3: Squash together with the header file addition Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 42 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 27 ++++++++++++++++++++ 4 files changed, 71 insertions(+) create mode 100644 drivers/gpu/drm/i915/intel_lrc.c create mode 100644 drivers/gpu/drm/i915/intel_lrc.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 91bd167e1cb7..c1dd485aeb6c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -31,6 +31,7 @@ i915-y += i915_cmd_parser.o \ i915_gpu_error.o \ i915_irq.o \ i915_trace_points.o \ + intel_lrc.o \ intel_ringbuffer.o \ intel_uncore.o diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6959c1b2d648..ec2a094f3622 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -35,6 +35,7 @@ #include "i915_reg.h" #include "intel_bios.h" #include "intel_ringbuffer.h" +#include "intel_lrc.h" #include "i915_gem_gtt.h" #include #include diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c new file mode 100644 index 000000000000..49bb6fcdface --- /dev/null +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -0,0 +1,42 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Ben Widawsky + * Michel Thierry + * Thomas Daniel + * Oscar Mateo + * + */ + +/* + * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". + * These expanded contexts enable a number of new abilities, especially + * "Execlists" (also implemented in this file). + * + * Execlists are the new method by which, on gen8+ hardware, workloads are + * submitted for execution (as opposed to the legacy, ringbuffer-based, method). + */ + +#include +#include +#include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h new file mode 100644 index 000000000000..f6830a4ec773 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -0,0 +1,27 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _INTEL_LRC_H_ +#define _INTEL_LRC_H_ + +#endif /* _INTEL_LRC_H_ */ From 127f100369a1f302904335950387d566680eb275 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:11 +0100 Subject: [PATCH 108/640] drm/i915/bdw: Macro for LRCs and module option for Execlists GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". These expanded contexts enable a number of new abilities, especially "Execlists". The macro is defined to off until we have things in place to hope to work. v2: Rename "advanced contexts" to the more correct "logical ring contexts". v3: Add a module parameter to enable execlists. Execlist are relatively new, and so it'd be wise to be able to switch back to ring submission to debug subtle problems that will inevitably arise. v4: Add an intel_enable_execlists function. v5: Sanitize early, as suggested by Daniel. Remove lrc_enabled. Signed-off-by: Ben Widawsky (v1) Signed-off-by: Damien Lespiau (v3) Signed-off-by: Oscar Mateo (v2, v4 & v5) Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 3 +++ drivers/gpu/drm/i915/i915_params.c | 6 ++++++ drivers/gpu/drm/i915/intel_lrc.c | 11 +++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 3 +++ 5 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ec2a094f3622..fd2aa15ce02b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2062,6 +2062,7 @@ struct drm_i915_cmd_table { #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) +#define HAS_LOGICAL_RING_CONTEXTS(dev) 0 #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_GEN8(dev)) #define USES_PPGTT(dev) (i915.enable_ppgtt) @@ -2149,6 +2150,7 @@ struct i915_params { int enable_rc6; int enable_fbc; int enable_ppgtt; + int enable_execlists; int enable_psr; unsigned int preliminary_hw_support; int disable_power_well; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3eec344bdac0..5646e9ba6383 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4723,6 +4723,9 @@ int i915_gem_init(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int ret; + i915.enable_execlists = intel_sanitize_enable_execlists(dev, + i915.enable_execlists); + mutex_lock(&dev->struct_mutex); if (IS_VALLEYVIEW(dev)) { diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 62ee8308d682..f7f8350c3793 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -35,6 +35,7 @@ struct i915_params i915 __read_mostly = { .vbt_sdvo_panel_type = -1, .enable_rc6 = -1, .enable_fbc = -1, + .enable_execlists = -1, .enable_hangcheck = true, .enable_ppgtt = -1, .enable_psr = 1, @@ -118,6 +119,11 @@ MODULE_PARM_DESC(enable_ppgtt, "Override PPGTT usage. " "(-1=auto [default], 0=disabled, 1=aliasing, 2=full)"); +module_param_named(enable_execlists, i915.enable_execlists, int, 0400); +MODULE_PARM_DESC(enable_execlists, + "Override execlists usage. " + "(-1=auto [default], 0=disabled, 1=enabled)"); + module_param_named(enable_psr, i915.enable_psr, int, 0600); MODULE_PARM_DESC(enable_psr, "Enable PSR (default: true)"); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 49bb6fcdface..21f7f1cce86e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -40,3 +40,14 @@ #include #include #include "i915_drv.h" + +int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) +{ + if (enable_execlists == 0) + return 0; + + if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev)) + return 1; + + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index f6830a4ec773..75ee9c3cb7dc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,4 +24,7 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Execlists */ +int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); + #endif /* _INTEL_LRC_H_ */ From bd84b1e995918ad83bdba5d5be1bef901e169f19 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 15:57:57 +0200 Subject: [PATCH 109/640] drm/i915: WARN if module opt sanitization goes out of order Depending upon one module option to be sanitized (through USES_PPGTT) for the other is a bit too fragile for my taste. At least WARN about this. Cc: Ben Widawsky Cc: Damien Lespiau Cc: Oscar Mateo Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 21f7f1cce86e..44721292eb77 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -43,6 +43,8 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { + WARN_ON(i915.enable_ppgtt == -1); + if (enable_execlists == 0) return 0; From ede7d42baeece583c864badb6f9081f4cded6c32 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:12 +0100 Subject: [PATCH 110/640] drm/i915/bdw: Initialization for Logical Ring Contexts For the moment this is just a placeholder, but it shows one of the main differences between the good ol' HW contexts and the shiny new Logical Ring Contexts: LR contexts allocate and free their own backing objects. Another difference is that the allocation is deferred (as the create function name suggests), but that does not happen in this patch yet, because for the moment we are only dealing with the default context. Early in the series we had our own gen8_gem_context_init/fini functions, but the truth is they now look almost the same as the legacy hw context init/fini functions. We can always split them later if this ceases to be the case. Also, we do not fall back to legacy ringbuffers when logical ring context initialization fails (not very likely to happen and, even if it does, hw contexts would probably fail as well). v2: Daniel says "explain, do not showcase". Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: s/BUG_ON/WARN_ON/.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 29 ++++++++++++++++++++----- drivers/gpu/drm/i915/intel_lrc.c | 15 +++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 5 +++++ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3b99390e467a..3552a351ccf7 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -182,7 +182,10 @@ void i915_gem_context_free(struct kref *ctx_ref) typeof(*ctx), ref); struct i915_hw_ppgtt *ppgtt = NULL; - if (ctx->legacy_hw_ctx.rcs_state) { + if (i915.enable_execlists) { + ppgtt = ctx_to_ppgtt(ctx); + intel_lr_context_free(ctx); + } else if (ctx->legacy_hw_ctx.rcs_state) { /* We refcount even the aliasing PPGTT to keep the code symmetric */ if (USES_PPGTT(ctx->legacy_hw_ctx.rcs_state->base.dev)) ppgtt = ctx_to_ppgtt(ctx); @@ -417,7 +420,11 @@ int i915_gem_context_init(struct drm_device *dev) if (WARN_ON(dev_priv->ring[RCS].default_context)) return 0; - if (HAS_HW_CONTEXTS(dev)) { + if (i915.enable_execlists) { + /* NB: intentionally left blank. We will allocate our own + * backing objects as we need them, thank you very much */ + dev_priv->hw_context_size = 0; + } else if (HAS_HW_CONTEXTS(dev)) { dev_priv->hw_context_size = round_up(get_context_size(dev), 4096); if (dev_priv->hw_context_size > (1<<20)) { DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", @@ -433,11 +440,20 @@ int i915_gem_context_init(struct drm_device *dev) return PTR_ERR(ctx); } - /* NB: RCS will hold a ref for all rings */ - for (i = 0; i < I915_NUM_RINGS; i++) - dev_priv->ring[i].default_context = ctx; + for (i = 0; i < I915_NUM_RINGS; i++) { + struct intel_engine_cs *ring = &dev_priv->ring[i]; - DRM_DEBUG_DRIVER("%s context support initialized\n", dev_priv->hw_context_size ? "HW" : "fake"); + /* NB: RCS will hold a ref for all rings */ + ring->default_context = ctx; + + /* FIXME: we really only want to do this for initialized rings */ + if (i915.enable_execlists) + intel_lr_context_deferred_create(ctx, ring); + } + + DRM_DEBUG_DRIVER("%s context support initialized\n", + i915.enable_execlists ? "LR" : + dev_priv->hw_context_size ? "HW" : "fake"); return 0; } @@ -779,6 +795,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct intel_context *ctx; int ret; + /* FIXME: allow user-created LR contexts as well */ if (!hw_context_enabled(dev)) return -ENODEV; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 44721292eb77..2d82d52d18bb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -53,3 +53,18 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } + +void intel_lr_context_free(struct intel_context *ctx) +{ + /* TODO */ +} + +int intel_lr_context_deferred_create(struct intel_context *ctx, + struct intel_engine_cs *ring) +{ + WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); + + /* TODO */ + + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 75ee9c3cb7dc..3b93572431e3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,6 +24,11 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Logical Ring Contexts */ +void intel_lr_context_free(struct intel_context *ctx); +int intel_lr_context_deferred_create(struct intel_context *ctx, + struct intel_engine_cs *ring); + /* Execlists */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); From c9e003af2d44d9f6eafe855448c41c9ac08ae895 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:13 +0100 Subject: [PATCH 111/640] drm/i915/bdw: Introduce one context backing object per engine A context backing object only makes sense for a given engine (because it holds state data specific to that engine). In legacy ringbuffer sumission mode, the only MI_SET_CONTEXT we really perform is for the render engine, so one backing object is all we nee. With Execlists, however, we need backing objects for every engine, as contexts become the only way to submit workloads to the GPU. To tackle this problem, we multiplex the context struct to contain objects. Originally, I colored this code by instantiating one new context for every engine I wanted to use, but this change suggested by Brad Volkin makes it more elegant. v2: Leave the old backing object pointer behind. Daniel Vetter suggested using a union, but it makes more sense to keep rcs_state as a NULL pointer behind, to make sure no one uses it incorrectly when Execlists are enabled, similar to what he suggested for ring->buffer (Rusty's API level 5). v3: Use the name "state" instead of the too-generic "obj", so that it mirrors the name choice for the legacy rcs_state. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fd2aa15ce02b..ad70e8ec18bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -626,11 +626,17 @@ struct intel_context { struct i915_ctx_hang_stats hang_stats; struct i915_address_space *vm; + /* Legacy ring buffer submission */ struct { struct drm_i915_gem_object *rcs_state; bool initialized; } legacy_hw_ctx; + /* Execlists */ + struct { + struct drm_i915_gem_object *state; + } engine[I915_NUM_RINGS]; + struct list_head link; }; From 8c8579176a144b1dca1d99ebb92510924168d508 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:14 +0100 Subject: [PATCH 112/640] drm/i915/bdw: A bit more advanced LR context alloc/free Now that we have the ability to allocate our own context backing objects and we have multiplexed one of them per engine inside the context structs, we can finally allocate and free them correctly. Regarding the context size, reading the register to calculate the sizes can work, I think, however the docs are very clear about the actual context sizes on GEN8, so just hardcode that and use it. v2: Rebased on top of the Full PPGTT series. It is important to notice that at this point we have one global default context per engine, all of them using the aliasing PPGTT (as opposed to the single global default context we have with legacy HW contexts). v3: - Go back to one single global default context, this time with multiple backing objects inside. - Use different context sizes for non-render engines, as suggested by Damien (still hardcoded, since the information about the context size registers in the BSpec is, well, *lacking*). - Render ctx size is 20 (or 19) pages, but not 21 (caught by Damien). - Move default context backing object creation to intel_init_ring (so that we don't waste memory in rings that might not get initialized). v4: - Reuse the HW legacy context init/fini. - Create a separate free function. - Rename the functions with an intel_ preffix. v5: Several rebases to account for the changes in the previous patches. Signed-off-by: Ben Widawsky (v1) Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 59 ++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ad70e8ec18bc..cbae19bab4bf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2528,6 +2528,8 @@ int i915_switch_context(struct intel_engine_cs *ring, struct intel_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); void i915_gem_context_free(struct kref *ctx_ref); +struct drm_i915_gem_object * +i915_gem_alloc_context_obj(struct drm_device *dev, size_t size); static inline void i915_gem_context_reference(struct intel_context *ctx) { kref_get(&ctx->ref); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3552a351ccf7..9f8fbbacf6c0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -199,7 +199,7 @@ void i915_gem_context_free(struct kref *ctx_ref) kfree(ctx); } -static struct drm_i915_gem_object * +struct drm_i915_gem_object * i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) { struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2d82d52d18bb..9f30ee80e487 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -41,6 +41,11 @@ #include #include "i915_drv.h" +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) +#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_ALIGN 4096 + int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -56,15 +61,65 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists void intel_lr_context_free(struct intel_context *ctx) { - /* TODO */ + int i; + + for (i = 0; i < I915_NUM_RINGS; i++) { + struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; + if (ctx_obj) { + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + } + } +} + +static uint32_t get_lr_context_size(struct intel_engine_cs *ring) +{ + int ret = 0; + + WARN_ON(INTEL_INFO(ring->dev)->gen != 8); + + switch (ring->id) { + case RCS: + ret = GEN8_LR_CONTEXT_RENDER_SIZE; + break; + case VCS: + case BCS: + case VECS: + case VCS2: + ret = GEN8_LR_CONTEXT_OTHER_SIZE; + break; + } + + return ret; } int intel_lr_context_deferred_create(struct intel_context *ctx, struct intel_engine_cs *ring) { + struct drm_device *dev = ring->dev; + struct drm_i915_gem_object *ctx_obj; + uint32_t context_size; + int ret; + WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); - /* TODO */ + context_size = round_up(get_lr_context_size(ring), 4096); + + ctx_obj = i915_gem_alloc_context_obj(dev, context_size); + if (IS_ERR(ctx_obj)) { + ret = PTR_ERR(ctx_obj); + DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret); + return ret; + } + + ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0); + if (ret) { + DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret); + drm_gem_object_unreference(&ctx_obj->base); + return ret; + } + + ctx->engine[ring->id].state = ctx_obj; return 0; } From 84c2377fcee7a43cd964b62143e9a3714130bb0c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:15 +0100 Subject: [PATCH 113/640] drm/i915/bdw: Allocate ringbuffers for Logical Ring Contexts As we have said a couple of times by now, logical ring contexts have their own ringbuffers: not only the backing pages, but the whole management struct. In a previous version of the series, this was achieved with two separate patches: drm/i915/bdw: Allocate ringbuffer backing objects for default global LRC drm/i915/bdw: Allocate ringbuffer for user-created LRCs Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 38 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 6 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +++ 4 files changed, 46 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cbae19bab4bf..eccb8e406e9c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -635,6 +635,7 @@ struct intel_context { /* Execlists */ struct { struct drm_i915_gem_object *state; + struct intel_ringbuffer *ringbuf; } engine[I915_NUM_RINGS]; struct list_head link; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9f30ee80e487..0c80bb1f5420 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -65,7 +65,11 @@ void intel_lr_context_free(struct intel_context *ctx) for (i = 0; i < I915_NUM_RINGS; i++) { struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; + struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf; + if (ctx_obj) { + intel_destroy_ringbuffer_obj(ringbuf); + kfree(ringbuf); i915_gem_object_ggtt_unpin(ctx_obj); drm_gem_object_unreference(&ctx_obj->base); } @@ -99,6 +103,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, struct drm_device *dev = ring->dev; struct drm_i915_gem_object *ctx_obj; uint32_t context_size; + struct intel_ringbuffer *ringbuf; int ret; WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); @@ -119,6 +124,39 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, return ret; } + ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); + if (!ringbuf) { + DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", + ring->name); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + ret = -ENOMEM; + return ret; + } + + ringbuf->size = 32 * PAGE_SIZE; + ringbuf->effective_size = ringbuf->size; + ringbuf->head = 0; + ringbuf->tail = 0; + ringbuf->space = ringbuf->size; + ringbuf->last_retired_head = -1; + + /* TODO: For now we put this in the mappable region so that we can reuse + * the existing ringbuffer code which ioremaps it. When we start + * creating many contexts, this will no longer work and we must switch + * to a kmapish interface. + */ + ret = intel_alloc_ringbuffer_obj(dev, ringbuf); + if (ret) { + DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n", + ring->name, ret); + kfree(ringbuf); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + return ret; + } + + ctx->engine[ring->id].ringbuf = ringbuf; ctx->engine[ring->id].state = ctx_obj; return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a059b64a0fb2..064652034d7e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1519,7 +1519,7 @@ static int init_phys_status_page(struct intel_engine_cs *ring) return 0; } -static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) { if (!ringbuf->obj) return; @@ -1530,8 +1530,8 @@ static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf) ringbuf->obj = NULL; } -static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ringbuffer *ringbuf) +int intel_alloc_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf) { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 70525d0c2c74..669cc7527f9a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -355,6 +355,10 @@ intel_write_status_page(struct intel_engine_cs *ring, #define I915_GEM_HWS_SCRATCH_INDEX 0x30 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) +void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf); +int intel_alloc_ringbuffer_obj(struct drm_device *dev, + struct intel_ringbuffer *ringbuf); + void intel_stop_ring_buffer(struct intel_engine_cs *ring); void intel_cleanup_ring_buffer(struct intel_engine_cs *ring); From 0c7dd53b84def4fbbba907bef3d32a5171b617a5 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 11 Aug 2014 16:17:44 +0200 Subject: [PATCH 114/640] drm/i915/bdw: Add a context and an engine pointers to the ringbuffer Any given ringbuffer is unequivocally tied to one context and one engine. By setting the appropriate pointers to them, the ringbuffer struct holds all the infromation you might need to submit a workload for processing, Execlists style. v2: Drop ring->ctx since that looks terribly ill-defined for legacy ringbuffer submission. Signed-off-by: Oscar Mateo (v1) Acked-by: Damien Lespiau (v2) Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0c80bb1f5420..8f2d14da6228 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -134,6 +134,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, return ret; } + ringbuf->ring = ring; ringbuf->size = 32 * PAGE_SIZE; ringbuf->effective_size = ringbuf->size; ringbuf->head = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 064652034d7e..c35f956ed6a0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1594,6 +1594,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); ringbuf->size = 32 * PAGE_SIZE; + ringbuf->ring = ring; memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); init_waitqueue_head(&ring->irq_queue); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 669cc7527f9a..fe9d9d9d3598 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -90,6 +90,8 @@ struct intel_ringbuffer { struct drm_i915_gem_object *obj; void __iomem *virtual_start; + struct intel_engine_cs *ring; + u32 head; u32 tail; int space; From 8670d6f97d8c19595950af1838f8458d7529825f Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:17 +0100 Subject: [PATCH 115/640] drm/i915/bdw: Populate LR contexts (somewhat) For the most part, logical ring context objects are similar to hardware contexts in that the backing object is meant to be opaque. There are some exceptions where we need to poke certain offsets of the object for initialization, updating the tail pointer or updating the PDPs. For our basic execlist implementation we'll only need our PPGTT PDs, and ringbuffer addresses in order to set up the context. With previous patches, we have both, so start prepping the context to be load. Before running a context for the first time you must populate some fields in the context object. These fields begin 1 PAGE + LRCA, ie. the first page (in 0 based counting) of the context image. These same fields will be read and written to as contexts are saved and restored once the system is up and running. Many of these fields are completely reused from previous global registers: ringbuffer head/tail/control, context control matches some previous MI_SET_CONTEXT flags, and page directories. There are other fields which we don't touch which we may want in the future. v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and (11) for other engines. v3: Several rebases and general changes to the code. v4: Squash with "Extract LR context object populating" Also, Damien's review comments: - Set the Force Posted bit on the LRI header, as the BSpec suggest we do. - Prevent warning when compiling a 32-bits kernel without HIGHMEM64. - Add a clarifying comment to the context population code. v5: Damien's review comments: - The third MI_LOAD_REGISTER_IMM in the context does not set Force Posted. - Remove dead code. v6: Add a note about the (presumed) differences between BDW and CHV state contexts. Also, Brad's review comments: - Use the _MASKED_BIT_ENABLE, upper_32_bits and lower_32_bits macros. - Be less magical about how we set the ring size in the context. Signed-off-by: Ben Widawsky (v1) Signed-off-by: Rafael Barbalho (v2) Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 159 ++++++++++++++++++++++++++++++- 2 files changed, 156 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 7a6cc69cdc2b..c1d24242a02d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -282,6 +282,7 @@ * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! */ #define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +#define MI_LRI_FORCE_POSTED (1<<12) #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1) #define MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1) #define MI_SRM_LRM_GLOBAL_GTT (1<<22) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8f2d14da6228..a7a08a85edb3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -46,6 +46,38 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 +#define RING_ELSP(ring) ((ring)->mmio_base+0x230) +#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) + +#define CTX_LRI_HEADER_0 0x01 +#define CTX_CONTEXT_CONTROL 0x02 +#define CTX_RING_HEAD 0x04 +#define CTX_RING_TAIL 0x06 +#define CTX_RING_BUFFER_START 0x08 +#define CTX_RING_BUFFER_CONTROL 0x0a +#define CTX_BB_HEAD_U 0x0c +#define CTX_BB_HEAD_L 0x0e +#define CTX_BB_STATE 0x10 +#define CTX_SECOND_BB_HEAD_U 0x12 +#define CTX_SECOND_BB_HEAD_L 0x14 +#define CTX_SECOND_BB_STATE 0x16 +#define CTX_BB_PER_CTX_PTR 0x18 +#define CTX_RCS_INDIRECT_CTX 0x1a +#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c +#define CTX_LRI_HEADER_1 0x21 +#define CTX_CTX_TIMESTAMP 0x22 +#define CTX_PDP3_UDW 0x24 +#define CTX_PDP3_LDW 0x26 +#define CTX_PDP2_UDW 0x28 +#define CTX_PDP2_LDW 0x2a +#define CTX_PDP1_UDW 0x2c +#define CTX_PDP1_LDW 0x2e +#define CTX_PDP0_UDW 0x30 +#define CTX_PDP0_LDW 0x32 +#define CTX_LRI_HEADER_2 0x41 +#define CTX_R_PWR_CLK_STATE 0x42 +#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 + int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -59,6 +91,115 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +static int +populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj, + struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf) +{ + struct drm_i915_gem_object *ring_obj = ringbuf->obj; + struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); + struct page *page; + uint32_t *reg_state; + int ret; + + ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true); + if (ret) { + DRM_DEBUG_DRIVER("Could not set to CPU domain\n"); + return ret; + } + + ret = i915_gem_object_get_pages(ctx_obj); + if (ret) { + DRM_DEBUG_DRIVER("Could not get object pages\n"); + return ret; + } + + i915_gem_object_pin_pages(ctx_obj); + + /* The second page of the context object contains some fields which must + * be set up prior to the first execution. */ + page = i915_gem_object_get_page(ctx_obj, 1); + reg_state = kmap_atomic(page); + + /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM + * commands followed by (reg, value) pairs. The values we are setting here are + * only for the first context restore: on a subsequent save, the GPU will + * recreate this batchbuffer with new values (including all the missing + * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */ + if (ring->id == RCS) + reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14); + else + reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11); + reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED; + reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring); + reg_state[CTX_CONTEXT_CONTROL+1] = + _MASKED_BIT_ENABLE((1<<3) | MI_RESTORE_INHIBIT); + reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base); + reg_state[CTX_RING_HEAD+1] = 0; + reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); + reg_state[CTX_RING_TAIL+1] = 0; + reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base); + reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj); + reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base); + reg_state[CTX_RING_BUFFER_CONTROL+1] = + ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID; + reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168; + reg_state[CTX_BB_HEAD_U+1] = 0; + reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140; + reg_state[CTX_BB_HEAD_L+1] = 0; + reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110; + reg_state[CTX_BB_STATE+1] = (1<<5); + reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c; + reg_state[CTX_SECOND_BB_HEAD_U+1] = 0; + reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114; + reg_state[CTX_SECOND_BB_HEAD_L+1] = 0; + reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118; + reg_state[CTX_SECOND_BB_STATE+1] = 0; + if (ring->id == RCS) { + /* TODO: according to BSpec, the register state context + * for CHV does not have these. OTOH, these registers do + * exist in CHV. I'm waiting for a clarification */ + reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0; + reg_state[CTX_BB_PER_CTX_PTR+1] = 0; + reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4; + reg_state[CTX_RCS_INDIRECT_CTX+1] = 0; + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8; + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0; + } + reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9); + reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED; + reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8; + reg_state[CTX_CTX_TIMESTAMP+1] = 0; + reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3); + reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3); + reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2); + reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2); + reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1); + reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); + reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); + reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); + reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]); + reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]); + reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]); + reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]); + reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]); + reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]); + reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]); + reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]); + if (ring->id == RCS) { + reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8; + reg_state[CTX_R_PWR_CLK_STATE+1] = 0; + } + + kunmap_atomic(reg_state); + + ctx_obj->dirty = 1; + set_page_dirty(page); + i915_gem_object_unpin_pages(ctx_obj); + + return 0; +} + void intel_lr_context_free(struct intel_context *ctx) { int i; @@ -151,14 +292,24 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, if (ret) { DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n", ring->name, ret); - kfree(ringbuf); - i915_gem_object_ggtt_unpin(ctx_obj); - drm_gem_object_unreference(&ctx_obj->base); - return ret; + goto error; + } + + ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf); + if (ret) { + DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret); + intel_destroy_ringbuffer_obj(ringbuf); + goto error; } ctx->engine[ring->id].ringbuf = ringbuf; ctx->engine[ring->id].state = ctx_obj; return 0; + +error: + kfree(ringbuf); + i915_gem_object_ggtt_unpin(ctx_obj); + drm_gem_object_unreference(&ctx_obj->base); + return ret; } From ec3e9963a681789860e5c0120a745b717d942392 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:18 +0100 Subject: [PATCH 116/640] drm/i915/bdw: Deferred creation of user-created LRCs The backing objects and ringbuffers for contexts created via open fd are actually empty until the user starts sending execbuffers to them. At that point, we allocate & populate them. We do this because, at create time, we really don't know which engine is going to be used with the context later on (and we don't want to waste memory on objects that we might never use). v2: As contexts created via ioctl can only be used with the render ring, we have enough information to allocate & populate them right away. v3: Defer the creation always, even with ioctl-created contexts, as requested by Daniel Vetter. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 7 +++---- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9f8fbbacf6c0..bcb41002aa13 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -782,9 +782,9 @@ int i915_switch_context(struct intel_engine_cs *ring, return do_switch(ring, to); } -static bool hw_context_enabled(struct drm_device *dev) +static bool contexts_enabled(struct drm_device *dev) { - return to_i915(dev)->hw_context_size; + return i915.enable_execlists || to_i915(dev)->hw_context_size; } int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, @@ -795,8 +795,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct intel_context *ctx; int ret; - /* FIXME: allow user-created LR contexts as well */ - if (!hw_context_enabled(dev)) + if (!contexts_enabled(dev)) return -ENODEV; ret = i915_mutex_lock_interruptible(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index dec2cc2fbd42..29cb2156d32b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -931,6 +931,14 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ERR_PTR(-EIO); } + if (i915.enable_execlists && !ctx->engine[ring->id].state) { + int ret = intel_lr_context_deferred_create(ctx, ring); + if (ret) { + DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret); + return ERR_PTR(ret); + } + } + return ctx; } From a83014d3f8b936778a9bc9b3d4137769bb26d9eb Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:21 +0100 Subject: [PATCH 117/640] drm/i915: Abstract the legacy workload submission mechanism away As suggested by Daniel Vetter. The idea, in subsequent patches, is to provide an alternative to these vfuncs for the Execlists submission mechanism. v2: Splitted into two and reordered to illustrate our intentions, instead of showing it off. Also, remove the add_request vfunc and added the stop_ring one. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: - Make checkpatch happy. - Be grumpy about the excessive vtable. - Ditch gt->is_ring_initialized.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 23 ++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++++---- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 20 +++++++++---------- 3 files changed, 44 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eccb8e406e9c..9198f1c96470 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1645,6 +1645,20 @@ struct drm_i915_private { /* Old ums support infrastructure, same warning applies. */ struct i915_ums_state ums; + /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ + struct { + int (*do_execbuf)(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags); + int (*init_rings)(struct drm_device *dev); + void (*cleanup_ring)(struct intel_engine_cs *ring); + void (*stop_ring)(struct intel_engine_cs *ring); + } gt; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -2252,6 +2266,14 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_ringbuffer_submission(struct drm_device *dev, + struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer2(struct drm_device *dev, void *data, @@ -2404,6 +2426,7 @@ void i915_gem_reset(struct drm_device *dev); bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); +int i915_gem_init_rings(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice); void i915_gem_init_swizzling(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5646e9ba6383..33a54cbf9a2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4495,7 +4495,7 @@ i915_gem_stop_ringbuffers(struct drm_device *dev) int i; for_each_ring(ring, dev_priv, i) - intel_stop_ring_buffer(ring); + dev_priv->gt.stop_ring(ring); } int @@ -4612,7 +4612,7 @@ intel_enable_blt(struct drm_device *dev) return true; } -static int i915_gem_init_rings(struct drm_device *dev) +int i915_gem_init_rings(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int ret; @@ -4695,7 +4695,7 @@ i915_gem_init_hw(struct drm_device *dev) i915_gem_init_swizzling(dev); - ret = i915_gem_init_rings(dev); + ret = dev_priv->gt.init_rings(dev); if (ret) return ret; @@ -4736,6 +4736,13 @@ int i915_gem_init(struct drm_device *dev) DRM_DEBUG_DRIVER("allow wake ack timed out\n"); } + if (!i915.enable_execlists) { + dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission; + dev_priv->gt.init_rings = i915_gem_init_rings; + dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; + dev_priv->gt.stop_ring = intel_stop_ring_buffer; + } + i915_gem_init_userptr(dev); i915_gem_init_global_gtt(dev); @@ -4771,7 +4778,7 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev) int i; for_each_ring(ring, dev_priv, i) - intel_cleanup_ring_buffer(ring); + dev_priv->gt.cleanup_ring(ring); } int diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 29cb2156d32b..26b38b3ae4f3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1023,14 +1023,14 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev, return 0; } -static int -legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, - struct intel_engine_cs *ring, - struct intel_context *ctx, - struct drm_i915_gem_execbuffer2 *args, - struct list_head *vmas, - struct drm_i915_gem_object *batch_obj, - u64 exec_start, u32 flags) +int +i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags) { struct drm_clip_rect *cliprects = NULL; struct drm_i915_private *dev_priv = dev->dev_private; @@ -1402,8 +1402,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } else exec_start += i915_gem_obj_offset(batch_obj, vm); - ret = legacy_ringbuffer_submission(dev, file, ring, ctx, - args, &eb->vmas, batch_obj, exec_start, flags); + ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args, + &eb->vmas, batch_obj, exec_start, flags); /* * FIXME: We crucially rely upon the active tracking for the (ppgtt) From 454afebde873874b939465bfc1a294ac3697c96e Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:22 +0100 Subject: [PATCH 118/640] drm/i915/bdw: Skeleton for the new logical rings submission path Execlists are indeed a brave new world with respect to workload submission to the GPU. In previous version of these series, I have tried to impact the legacy ringbuffer submission path as little as possible (mostly, passing the context around and using the correct ringbuffer when I needed one) but Daniel is afraid (probably with a reason) that these changes and, especially, future ones, will end up breaking older gens. This commit and some others coming next will try to limit the damage by creating an alternative path for workload submission. The first step is here: laying out a new ring init/fini. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 5 + drivers/gpu/drm/i915/intel_lrc.c | 151 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 12 +++ 3 files changed, 168 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 33a54cbf9a2e..9acb2469116a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4741,6 +4741,11 @@ int i915_gem_init(struct drm_device *dev) dev_priv->gt.init_rings = i915_gem_init_rings; dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer; dev_priv->gt.stop_ring = intel_stop_ring_buffer; + } else { + dev_priv->gt.do_execbuf = intel_execlists_submission; + dev_priv->gt.init_rings = intel_logical_rings_init; + dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup; + dev_priv->gt.stop_ring = intel_logical_ring_stop; } i915_gem_init_userptr(dev); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a7a08a85edb3..9c2ff8f11c90 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -91,6 +91,157 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags) +{ + /* TODO */ + return 0; +} + +void intel_logical_ring_stop(struct intel_engine_cs *ring) +{ + /* TODO */ +} + +void intel_logical_ring_cleanup(struct intel_engine_cs *ring) +{ + /* TODO */ +} + +static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring) +{ + /* TODO */ + return 0; +} + +static int logical_render_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + + ring->name = "render ring"; + ring->id = RCS; + ring->mmio_base = RENDER_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_bsd_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[VCS]; + + ring->name = "bsd ring"; + ring->id = VCS; + ring->mmio_base = GEN6_BSD_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_bsd2_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[VCS2]; + + ring->name = "bds2 ring"; + ring->id = VCS2; + ring->mmio_base = GEN8_BSD2_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_blt_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[BCS]; + + ring->name = "blitter ring"; + ring->id = BCS; + ring->mmio_base = BLT_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +static int logical_vebox_ring_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring = &dev_priv->ring[VECS]; + + ring->name = "video enhancement ring"; + ring->id = VECS; + ring->mmio_base = VEBOX_RING_BASE; + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + + return logical_ring_init(dev, ring); +} + +int intel_logical_rings_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = logical_render_ring_init(dev); + if (ret) + return ret; + + if (HAS_BSD(dev)) { + ret = logical_bsd_ring_init(dev); + if (ret) + goto cleanup_render_ring; + } + + if (HAS_BLT(dev)) { + ret = logical_blt_ring_init(dev); + if (ret) + goto cleanup_bsd_ring; + } + + if (HAS_VEBOX(dev)) { + ret = logical_vebox_ring_init(dev); + if (ret) + goto cleanup_blt_ring; + } + + if (HAS_BSD2(dev)) { + ret = logical_bsd2_ring_init(dev); + if (ret) + goto cleanup_vebox_ring; + } + + ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000)); + if (ret) + goto cleanup_bsd2_ring; + + return 0; + +cleanup_bsd2_ring: + intel_logical_ring_cleanup(&dev_priv->ring[VCS2]); +cleanup_vebox_ring: + intel_logical_ring_cleanup(&dev_priv->ring[VECS]); +cleanup_blt_ring: + intel_logical_ring_cleanup(&dev_priv->ring[BCS]); +cleanup_bsd_ring: + intel_logical_ring_cleanup(&dev_priv->ring[VCS]); +cleanup_render_ring: + intel_logical_ring_cleanup(&dev_priv->ring[RCS]); + + return ret; +} + static int populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj, struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 3b93572431e3..bf0eff4e9f08 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,6 +24,11 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Logical Rings */ +void intel_logical_ring_stop(struct intel_engine_cs *ring); +void intel_logical_ring_cleanup(struct intel_engine_cs *ring); +int intel_logical_rings_init(struct drm_device *dev); + /* Logical Ring Contexts */ void intel_lr_context_free(struct intel_context *ctx); int intel_lr_context_deferred_create(struct intel_context *ctx, @@ -31,5 +36,12 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, /* Execlists */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); +int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, + struct intel_engine_cs *ring, + struct intel_context *ctx, + struct drm_i915_gem_execbuffer2 *args, + struct list_head *vmas, + struct drm_i915_gem_object *batch_obj, + u64 exec_start, u32 flags); #endif /* _INTEL_LRC_H_ */ From 48d823878d64f93163f5a949623346748bbce1b4 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:23 +0100 Subject: [PATCH 119/640] drm/i915/bdw: Generic logical ring init and cleanup Allocate and populate the default LRC for every ring, call gen-specific init/cleanup, init/fini the command parser and set the status page (now inside the LRC object). These are things all engines/rings have in common. Stopping the ring before cleanup and initializing the seqnos is left as a TODO task (we need more infrastructure in place before we can achieve this). v2: Check the ringbuffer backing obj for ring_is_initialized, instead of the context backing obj (similar, but not exactly the same). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 4 -- drivers/gpu/drm/i915/intel_lrc.c | 54 ++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 17 ++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +-- 4 files changed, 70 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index bcb41002aa13..7a08f3e9e1ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -445,10 +445,6 @@ int i915_gem_context_init(struct drm_device *dev) /* NB: RCS will hold a ref for all rings */ ring->default_context = ctx; - - /* FIXME: we really only want to do this for initialized rings */ - if (i915.enable_execlists) - intel_lr_context_deferred_create(ctx, ring); } DRM_DEBUG_DRIVER("%s context support initialized\n", diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9c2ff8f11c90..ed7a4ff3bbd2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -110,12 +110,60 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { - /* TODO */ + if (!intel_ring_initialized(ring)) + return; + + /* TODO: make sure the ring is stopped */ + ring->preallocated_lazy_request = NULL; + ring->outstanding_lazy_seqno = 0; + + if (ring->cleanup) + ring->cleanup(ring); + + i915_cmd_parser_fini_ring(ring); + + if (ring->status_page.obj) { + kunmap(sg_page(ring->status_page.obj->pages->sgl)); + ring->status_page.obj = NULL; + } } static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring) { - /* TODO */ + int ret; + struct intel_context *dctx = ring->default_context; + struct drm_i915_gem_object *dctx_obj; + + /* Intentionally left blank. */ + ring->buffer = NULL; + + ring->dev = dev; + INIT_LIST_HEAD(&ring->active_list); + INIT_LIST_HEAD(&ring->request_list); + init_waitqueue_head(&ring->irq_queue); + + ret = intel_lr_context_deferred_create(dctx, ring); + if (ret) + return ret; + + /* The status page is offset 0 from the context object in LRCs. */ + dctx_obj = dctx->engine[ring->id].state; + ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj); + ring->status_page.page_addr = kmap(sg_page(dctx_obj->pages->sgl)); + if (ring->status_page.page_addr == NULL) + return -ENOMEM; + ring->status_page.obj = dctx_obj; + + ret = i915_cmd_parser_init_ring(ring); + if (ret) + return ret; + + if (ring->init) { + ret = ring->init(ring); + if (ret) + return ret; + } + return 0; } @@ -399,6 +447,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, int ret; WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL); + if (ctx->engine[ring->id].state) + return 0; context_size = round_up(get_lr_context_size(ring), 4096); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c35f956ed6a0..e4b97f5c5797 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -40,6 +40,23 @@ */ #define CACHELINE_BYTES 64 +bool +intel_ring_initialized(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + + if (!dev) + return false; + + if (i915.enable_execlists) { + struct intel_context *dctx = ring->default_context; + struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf; + + return ringbuf->obj; + } else + return ring->buffer && ring->buffer->obj; +} + static inline int __ring_space(int head, int tail, int size) { int space = head - (tail + I915_RING_FREE_SPACE); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fe9d9d9d3598..fbe54ef6a9a1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -289,11 +289,7 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; -static inline bool -intel_ring_initialized(struct intel_engine_cs *ring) -{ - return ring->buffer && ring->buffer->obj; -} +bool intel_ring_initialized(struct intel_engine_cs *ring); static inline unsigned intel_ring_flag(struct intel_engine_cs *ring) From 9b1136d505b1de5478e11b59ca59cf8ce2a33217 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:24 +0100 Subject: [PATCH 120/640] drm/i915/bdw: GEN-specific logical ring init Logical rings do not need most of the initialization their legacy ringbuffer counterparts do: we just need the pipe control object for the render ring, enable Execlists on the hardware and a few workarounds. v2: Squash with: "drm/i915: Extract pipe control fini & make init outside accesible". Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Make checkpatch happy.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 54 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 34 ++++++++++------ drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ 3 files changed, 78 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ed7a4ff3bbd2..1a1f5f98f05b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -108,6 +108,49 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) /* TODO */ } +static int gen8_init_common_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + I915_WRITE(RING_MODE_GEN7(ring), + _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | + _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + POSTING_READ(RING_MODE_GEN7(ring)); + DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name); + + memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); + + return 0; +} + +static int gen8_init_render_ring(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = gen8_init_common_ring(ring); + if (ret) + return ret; + + /* We need to disable the AsyncFlip performance optimisations in order + * to use MI_WAIT_FOR_EVENT within the CS. It should already be + * programmed to '1' on all products. + * + * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv + */ + I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); + + ret = intel_init_pipe_control(ring); + if (ret) + return ret; + + I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); + + return ret; +} + void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -178,6 +221,9 @@ static int logical_render_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + ring->init = gen8_init_render_ring; + ring->cleanup = intel_fini_pipe_control; + return logical_ring_init(dev, ring); } @@ -192,6 +238,8 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -206,6 +254,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -220,6 +270,8 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } @@ -234,6 +286,8 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + ring->init = gen8_init_common_ring; + return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e4b97f5c5797..dab5e7c79036 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -597,8 +597,25 @@ out: return ret; } -static int -init_pipe_control(struct intel_engine_cs *ring) +void +intel_fini_pipe_control(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + + if (ring->scratch.obj == NULL) + return; + + if (INTEL_INFO(dev)->gen >= 5) { + kunmap(sg_page(ring->scratch.obj->pages->sgl)); + i915_gem_object_ggtt_unpin(ring->scratch.obj); + } + + drm_gem_object_unreference(&ring->scratch.obj->base); + ring->scratch.obj = NULL; +} + +int +intel_init_pipe_control(struct intel_engine_cs *ring) { int ret; @@ -673,7 +690,7 @@ static int init_render_ring(struct intel_engine_cs *ring) _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); if (INTEL_INFO(dev)->gen >= 5) { - ret = init_pipe_control(ring); + ret = intel_init_pipe_control(ring); if (ret) return ret; } @@ -708,16 +725,7 @@ static void render_ring_cleanup(struct intel_engine_cs *ring) dev_priv->semaphore_obj = NULL; } - if (ring->scratch.obj == NULL) - return; - - if (INTEL_INFO(dev)->gen >= 5) { - kunmap(sg_page(ring->scratch.obj->pages->sgl)); - i915_gem_object_ggtt_unpin(ring->scratch.obj); - } - - drm_gem_object_unreference(&ring->scratch.obj->base); - ring->scratch.obj = NULL; + intel_fini_pipe_control(ring); } static int gen8_rcs_signal(struct intel_engine_cs *signaller, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fbe54ef6a9a1..677df0d7be48 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -381,6 +381,9 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); int intel_ring_flush_all_caches(struct intel_engine_cs *ring); int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring); +void intel_fini_pipe_control(struct intel_engine_cs *ring); +int intel_init_pipe_control(struct intel_engine_cs *ring); + int intel_init_render_ring_buffer(struct drm_device *dev); int intel_init_bsd_ring_buffer(struct drm_device *dev); int intel_init_bsd2_ring_buffer(struct drm_device *dev); From f72a113a71ab08c4df8a5f80ab2f8a140feb81f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Aug 2014 09:36:00 +0200 Subject: [PATCH 121/640] drm/radeon: add userptr support v8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds an IOCTL for turning a pointer supplied by userspace into a buffer object. It imposes several restrictions upon the memory being mapped: 1. It must be page aligned (both start/end addresses, i.e ptr and size). 2. It must be normal system memory, not a pointer into another map of IO space (e.g. it must not be a GTT mmapping of another object). 3. The BO is mapped into GTT, so the maximum amount of memory mapped at all times is still the GTT limit. 4. The BO is only mapped readonly for now, so no write support. 5. List of backing pages is only acquired once, so they represent a snapshot of the first use. Exporting and sharing as well as mapping of buffer objects created by this function is forbidden and results in an -EPERM. v2: squash all previous changes into first public version v3: fix tabs, map readonly, don't use MM callback any more v4: set TTM_PAGE_FLAG_SG so that TTM never messes with the pages, pin/unpin pages on bind/unbind instead of populate/unpopulate v5: rebased on 3.17-wip, IOCTL renamed to userptr, reject any unknown flags, better handle READONLY flag, improve permission check v6: fix ptr cast warning, use set_page_dirty/mark_page_accessed on unpin v7: add warning about it's availability in the API definition v8: drop access_ok check, fix VM mapping bits Signed-off-by: Christian König Reviewed-by: Alex Deucher (v4) Reviewed-by: Jérôme Glisse (v4) Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 6 + drivers/gpu/drm/radeon/radeon_cs.c | 25 ++++- drivers/gpu/drm/radeon/radeon_drv.c | 5 +- drivers/gpu/drm/radeon/radeon_gem.c | 68 ++++++++++++ drivers/gpu/drm/radeon/radeon_kms.c | 1 + drivers/gpu/drm/radeon/radeon_object.c | 3 + drivers/gpu/drm/radeon/radeon_prime.c | 10 ++ drivers/gpu/drm/radeon/radeon_ttm.c | 145 +++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_vm.c | 3 + include/uapi/drm/radeon_drm.h | 16 +++ 10 files changed, 279 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 9e1732eb402c..6f38a23a5810 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2138,6 +2138,8 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int radeon_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int radeon_gem_pin_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data, @@ -2871,6 +2873,10 @@ extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enabl extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable); extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain); extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo); +extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, + uint32_t flags); +extern bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm); +extern bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm); extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base); extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc); extern int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index ee712c199b25..1321491cf499 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -78,7 +78,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) struct radeon_cs_chunk *chunk; struct radeon_cs_buckets buckets; unsigned i, j; - bool duplicate; + bool duplicate, need_mmap_lock = false; + int r; if (p->chunk_relocs_idx == -1) { return 0; @@ -164,6 +165,19 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs[i].allowed_domains = domain; } + if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) { + uint32_t domain = p->relocs[i].prefered_domains; + if (!(domain & RADEON_GEM_DOMAIN_GTT)) { + DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is " + "allowed for userptr BOs\n"); + return -EINVAL; + } + need_mmap_lock = true; + domain = RADEON_GEM_DOMAIN_GTT; + p->relocs[i].prefered_domains = domain; + p->relocs[i].allowed_domains = domain; + } + p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle; @@ -176,8 +190,15 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) if (p->cs_flags & RADEON_CS_USE_VM) p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, &p->validated); + if (need_mmap_lock) + down_read(¤t->mm->mmap_sem); - return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); + r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); + + if (need_mmap_lock) + up_read(¤t->mm->mmap_sem); + + return r; } static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index a773830c6c40..5b18af926527 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -114,6 +114,9 @@ int radeon_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv); void radeon_gem_object_close(struct drm_gem_object *obj, struct drm_file *file_priv); +struct dma_buf *radeon_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags); extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int flags, int *vpos, int *hpos, ktime_t *stime, @@ -568,7 +571,7 @@ static struct drm_driver kms_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, - .gem_prime_export = drm_gem_prime_export, + .gem_prime_export = radeon_gem_prime_export, .gem_prime_import = drm_gem_prime_import, .gem_prime_pin = radeon_gem_prime_pin, .gem_prime_unpin = radeon_gem_prime_unpin, diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index bfd7e1b0ff3f..993ab223b503 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -272,6 +272,65 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, return 0; } +int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + struct radeon_device *rdev = dev->dev_private; + struct drm_radeon_gem_userptr *args = data; + struct drm_gem_object *gobj; + struct radeon_bo *bo; + uint32_t handle; + int r; + + if (offset_in_page(args->addr | args->size)) + return -EINVAL; + + /* we only support read only mappings for now */ + if (!(args->flags & RADEON_GEM_USERPTR_READONLY)) + return -EACCES; + + /* reject unknown flag values */ + if (args->flags & ~RADEON_GEM_USERPTR_READONLY) + return -EINVAL; + + /* readonly pages not tested on older hardware */ + if (rdev->family < CHIP_R600) + return -EINVAL; + + down_read(&rdev->exclusive_lock); + + /* create a gem object to contain this object in */ + r = radeon_gem_object_create(rdev, args->size, 0, + RADEON_GEM_DOMAIN_CPU, 0, + false, &gobj); + if (r) + goto handle_lockup; + + bo = gem_to_radeon_bo(gobj); + r = radeon_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); + if (r) + goto release_object; + + r = drm_gem_handle_create(filp, gobj, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(gobj); + if (r) + goto handle_lockup; + + args->handle = handle; + up_read(&rdev->exclusive_lock); + return 0; + +release_object: + drm_gem_object_unreference_unlocked(gobj); + +handle_lockup: + up_read(&rdev->exclusive_lock); + r = radeon_gem_handle_lockup(rdev, r); + + return r; +} + int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -315,6 +374,10 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, return -ENOENT; } robj = gem_to_radeon_bo(gobj); + if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) { + drm_gem_object_unreference_unlocked(gobj); + return -EPERM; + } *offset_p = radeon_bo_mmap_offset(robj); drm_gem_object_unreference_unlocked(gobj); return 0; @@ -532,6 +595,11 @@ int radeon_gem_op_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_radeon_bo(gobj); + + r = -EPERM; + if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) + goto out; + r = radeon_bo_reserve(robj, false); if (unlikely(r)) goto out; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index eb7164d07985..8309b11e674d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -885,5 +885,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(RADEON_GEM_USERPTR, radeon_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), }; int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 480c87d8edc5..c73c1e320585 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -264,6 +264,9 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, { int r, i; + if (radeon_ttm_tt_has_userptr(bo->tbo.ttm)) + return -EPERM; + if (bo->pin_count) { bo->pin_count++; if (gpu_addr) diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index f7e48d329db3..bb18bc74b7d7 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -103,3 +103,13 @@ void radeon_gem_prime_unpin(struct drm_gem_object *obj) radeon_bo_unpin(bo); radeon_bo_unreserve(bo); } + +struct dma_buf *radeon_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags) +{ + struct radeon_bo *bo = gem_to_radeon_bo(gobj); + if (radeon_ttm_tt_has_userptr(bo->tbo.ttm)) + return ERR_PTR(-EPERM); + return drm_gem_prime_export(dev, gobj, flags); +} diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 72afe82a95c9..b20933fa35c6 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include "radeon_reg.h" #include "radeon.h" @@ -515,8 +517,92 @@ struct radeon_ttm_tt { struct ttm_dma_tt ttm; struct radeon_device *rdev; u64 offset; + + uint64_t userptr; + struct mm_struct *usermm; + uint32_t userflags; }; +/* prepare the sg table with the user pages */ +static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm) +{ + struct radeon_device *rdev = radeon_get_rdev(ttm->bdev); + struct radeon_ttm_tt *gtt = (void *)ttm; + unsigned pinned = 0, nents; + int r; + + int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY); + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + + if (current->mm != gtt->usermm) + return -EPERM; + + do { + unsigned num_pages = ttm->num_pages - pinned; + uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; + struct page **pages = ttm->pages + pinned; + + r = get_user_pages(current, current->mm, userptr, num_pages, + write, 0, pages, NULL); + if (r < 0) + goto release_pages; + + pinned += r; + + } while (pinned < ttm->num_pages); + + r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, + ttm->num_pages << PAGE_SHIFT, + GFP_KERNEL); + if (r) + goto release_sg; + + r = -ENOMEM; + nents = dma_map_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction); + if (nents != ttm->sg->nents) + goto release_sg; + + drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, + gtt->ttm.dma_address, ttm->num_pages); + + return 0; + +release_sg: + kfree(ttm->sg); + +release_pages: + release_pages(ttm->pages, pinned, 0); + return r; +} + +static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm) +{ + struct radeon_device *rdev = radeon_get_rdev(ttm->bdev); + struct radeon_ttm_tt *gtt = (void *)ttm; + struct scatterlist *sg; + int i; + + int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY); + enum dma_data_direction direction = write ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE; + + /* free the sg table and pages again */ + dma_unmap_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction); + + for_each_sg(ttm->sg->sgl, sg, ttm->sg->nents, i) { + struct page *page = sg_page(sg); + + if (!(gtt->userflags & RADEON_GEM_USERPTR_READONLY)) + set_page_dirty(page); + + mark_page_accessed(page); + page_cache_release(page); + } + + sg_free_table(ttm->sg); +} + static int radeon_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { @@ -525,6 +611,11 @@ static int radeon_ttm_backend_bind(struct ttm_tt *ttm, RADEON_GART_PAGE_WRITE; int r; + if (gtt->userptr) { + radeon_ttm_tt_pin_userptr(ttm); + flags &= ~RADEON_GART_PAGE_WRITE; + } + gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT); if (!ttm->num_pages) { WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", @@ -547,6 +638,10 @@ static int radeon_ttm_backend_unbind(struct ttm_tt *ttm) struct radeon_ttm_tt *gtt = (void *)ttm; radeon_gart_unbind(gtt->rdev, gtt->offset, ttm->num_pages); + + if (gtt->userptr) + radeon_ttm_tt_unpin_userptr(ttm); + return 0; } @@ -603,6 +698,16 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm) if (ttm->state != tt_unpopulated) return 0; + if (gtt->userptr) { + ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL); + if (!ttm->sg) + return -ENOMEM; + + ttm->page_flags |= TTM_PAGE_FLAG_SG; + ttm->state = tt_unbound; + return 0; + } + if (slave && ttm->sg) { drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, gtt->ttm.dma_address, ttm->num_pages); @@ -652,6 +757,12 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm) unsigned i; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); + if (gtt->userptr) { + kfree(ttm->sg); + ttm->page_flags &= ~TTM_PAGE_FLAG_SG; + return; + } + if (slave) return; @@ -680,6 +791,40 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm) ttm_pool_unpopulate(ttm); } +int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, + uint32_t flags) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL) + return -EINVAL; + + gtt->userptr = addr; + gtt->usermm = current->mm; + gtt->userflags = flags; + return 0; +} + +bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL) + return false; + + return !!gtt->userptr; +} + +bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm) +{ + struct radeon_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL) + return false; + + return !!(gtt->userflags & RADEON_GEM_USERPTR_READONLY); +} + static struct ttm_bo_driver radeon_bo_driver = { .ttm_tt_create = &radeon_ttm_tt_create, .ttm_tt_populate = &radeon_ttm_tt_populate, diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index ccae4d9dc3de..0e107c5650bf 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -888,6 +888,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev, bo_va->flags &= ~RADEON_VM_PAGE_VALID; bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; + if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm)) + bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; + if (mem) { addr = mem->start << PAGE_SHIFT; if (mem->mem_type != TTM_PL_SYSTEM) { diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 509b2d7a41b7..3a9f20930372 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -511,6 +511,7 @@ typedef struct { #define DRM_RADEON_GEM_BUSY 0x2a #define DRM_RADEON_GEM_VA 0x2b #define DRM_RADEON_GEM_OP 0x2c +#define DRM_RADEON_GEM_USERPTR 0x2d #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -554,6 +555,7 @@ typedef struct { #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) #define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) +#define DRM_IOCTL_RADEON_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_USERPTR, struct drm_radeon_gem_userptr) typedef struct drm_radeon_init { enum { @@ -808,6 +810,20 @@ struct drm_radeon_gem_create { uint32_t flags; }; +/* + * This is not a reliable API and you should expect it to fail for any + * number of reasons and have fallback path that do not use userptr to + * perform any operation. + */ +#define RADEON_GEM_USERPTR_READONLY (1 << 0) + +struct drm_radeon_gem_userptr { + uint64_t addr; + uint64_t size; + uint32_t flags; + uint32_t handle; +}; + #define RADEON_TILING_MACRO 0x1 #define RADEON_TILING_MICRO 0x2 #define RADEON_TILING_SWAP_16BIT 0x4 From ddd00e33e17a62c5f44377ab42e7562ccfae7bd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Aug 2014 09:36:01 +0200 Subject: [PATCH 122/640] drm/radeon: add userptr flag to limit it to anonymous memory v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid problems with writeback by limiting userptr to anonymous memory. v2: add commit and code comments Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 3 ++- drivers/gpu/drm/radeon/radeon_ttm.c | 10 ++++++++++ include/uapi/drm/radeon_drm.h | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 993ab223b503..032736b429bf 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -290,7 +290,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, return -EACCES; /* reject unknown flag values */ - if (args->flags & ~RADEON_GEM_USERPTR_READONLY) + if (args->flags & ~(RADEON_GEM_USERPTR_READONLY | + RADEON_GEM_USERPTR_ANONONLY)) return -EINVAL; /* readonly pages not tested on older hardware */ diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index b20933fa35c6..12e37b1ddc40 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -538,6 +538,16 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm) if (current->mm != gtt->usermm) return -EPERM; + if (gtt->userflags & RADEON_GEM_USERPTR_ANONONLY) { + /* check that we only pin down anonymous memory + to prevent problems with writeback */ + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; + struct vm_area_struct *vma; + vma = find_vma(gtt->usermm, gtt->userptr); + if (!vma || vma->vm_file || vma->vm_end < end) + return -EPERM; + } + do { unsigned num_pages = ttm->num_pages - pinned; uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 3a9f20930372..9720e1a36848 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -816,6 +816,7 @@ struct drm_radeon_gem_create { * perform any operation. */ #define RADEON_GEM_USERPTR_READONLY (1 << 0) +#define RADEON_GEM_USERPTR_ANONONLY (1 << 1) struct drm_radeon_gem_userptr { uint64_t addr; From 2a84a4476d6e13de72472f6ca4338aed0a8269b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Aug 2014 09:36:02 +0200 Subject: [PATCH 123/640] drm/radeon: add userptr flag to directly validate the BO to GTT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way we test userptr availability at BO creation time instead of first use. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 18 +++++++++++++++++- include/uapi/drm/radeon_drm.h | 1 + 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 032736b429bf..450656027aba 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -291,7 +291,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, /* reject unknown flag values */ if (args->flags & ~(RADEON_GEM_USERPTR_READONLY | - RADEON_GEM_USERPTR_ANONONLY)) + RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE)) return -EINVAL; /* readonly pages not tested on older hardware */ @@ -312,6 +312,22 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto release_object; + if (args->flags & RADEON_GEM_USERPTR_VALIDATE) { + down_read(¤t->mm->mmap_sem); + r = radeon_bo_reserve(bo, true); + if (r) { + up_read(¤t->mm->mmap_sem); + goto release_object; + } + + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + radeon_bo_unreserve(bo); + up_read(¤t->mm->mmap_sem); + if (r) + goto release_object; + } + r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(gobj); diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 9720e1a36848..5dc61c2d4c73 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -817,6 +817,7 @@ struct drm_radeon_gem_create { */ #define RADEON_GEM_USERPTR_READONLY (1 << 0) #define RADEON_GEM_USERPTR_ANONONLY (1 << 1) +#define RADEON_GEM_USERPTR_VALIDATE (1 << 2) struct drm_radeon_gem_userptr { uint64_t addr; From 341cb9e426fac32523427c80c67543a16be46605 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Aug 2014 09:36:03 +0200 Subject: [PATCH 124/640] drm/radeon: add userptr flag to register MMU notifier v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Whenever userspace mapping related to our userptr change we wait for it to become idle and unmap it from GTT. v2: rebased, fix mutex unlock in error path v3: improve commit message Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/Kconfig | 1 + drivers/gpu/drm/radeon/Makefile | 2 +- drivers/gpu/drm/radeon/radeon.h | 12 ++ drivers/gpu/drm/radeon/radeon_device.c | 2 + drivers/gpu/drm/radeon/radeon_gem.c | 9 +- drivers/gpu/drm/radeon/radeon_mn.c | 272 +++++++++++++++++++++++++ drivers/gpu/drm/radeon/radeon_object.c | 1 + include/uapi/drm/radeon_drm.h | 1 + 8 files changed, 298 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/radeon/radeon_mn.c diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index b066bb3ca01a..358b6e8697e9 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -115,6 +115,7 @@ config DRM_RADEON select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE + select MMU_NOTIFIER help Choose this option if you have an ATI Radeon graphics card. There are both PCI and AGP versions. You don't need to choose this to diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 0013ad0db9ef..c7fa1aeb8c3f 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ - ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o + ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o # add async DMA block radeon-y += \ diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 6f38a23a5810..542da8208674 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -487,6 +488,9 @@ struct radeon_bo { struct ttm_bo_kmap_obj dma_buf_vmap; pid_t pid; + + struct radeon_mn *mn; + struct interval_tree_node mn_it; }; #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base) @@ -1725,6 +1729,11 @@ void radeon_test_ring_sync(struct radeon_device *rdev, struct radeon_ring *cpB); void radeon_test_syncing(struct radeon_device *rdev); +/* + * MMU Notifier + */ +int radeon_mn_register(struct radeon_bo *bo, unsigned long addr); +void radeon_mn_unregister(struct radeon_bo *bo); /* * Debugfs @@ -2372,6 +2381,9 @@ struct radeon_device { /* tracking pinned memory */ u64 vram_pin_size; u64 gart_pin_size; + + struct mutex mn_lock; + DECLARE_HASHTABLE(mn_hash, 7); }; bool radeon_is_px(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index c8ea050c8fa4..c58f84f3c6a5 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1270,6 +1270,8 @@ int radeon_device_init(struct radeon_device *rdev, init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->exclusive_lock); init_waitqueue_head(&rdev->irq.vblank_queue); + mutex_init(&rdev->mn_lock); + hash_init(rdev->mn_hash); r = radeon_gem_init(rdev); if (r) return r; diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 450656027aba..2a6fbf101cf0 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -291,7 +291,8 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, /* reject unknown flag values */ if (args->flags & ~(RADEON_GEM_USERPTR_READONLY | - RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE)) + RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE | + RADEON_GEM_USERPTR_REGISTER)) return -EINVAL; /* readonly pages not tested on older hardware */ @@ -312,6 +313,12 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, if (r) goto release_object; + if (args->flags & RADEON_GEM_USERPTR_REGISTER) { + r = radeon_mn_register(bo, args->addr); + if (r) + goto release_object; + } + if (args->flags & RADEON_GEM_USERPTR_VALIDATE) { down_read(¤t->mm->mmap_sem); r = radeon_bo_reserve(bo, true); diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c new file mode 100644 index 000000000000..0157bc2f11f8 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -0,0 +1,272 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +/* + * Authors: + * Christian König + */ + +#include +#include +#include +#include +#include + +#include "radeon.h" + +struct radeon_mn { + /* constant after initialisation */ + struct radeon_device *rdev; + struct mm_struct *mm; + struct mmu_notifier mn; + + /* only used on destruction */ + struct work_struct work; + + /* protected by rdev->mn_lock */ + struct hlist_node node; + + /* objects protected by lock */ + struct mutex lock; + struct rb_root objects; +}; + +/** + * radeon_mn_destroy - destroy the rmn + * + * @work: previously sheduled work item + * + * Lazy destroys the notifier from a work item + */ +static void radeon_mn_destroy(struct work_struct *work) +{ + struct radeon_mn *rmn = container_of(work, struct radeon_mn, work); + struct radeon_device *rdev = rmn->rdev; + struct radeon_bo *bo, *next; + + mutex_lock(&rdev->mn_lock); + mutex_lock(&rmn->lock); + hash_del(&rmn->node); + rbtree_postorder_for_each_entry_safe(bo, next, &rmn->objects, mn_it.rb) { + interval_tree_remove(&bo->mn_it, &rmn->objects); + bo->mn = NULL; + } + mutex_unlock(&rmn->lock); + mutex_unlock(&rdev->mn_lock); + mmu_notifier_unregister(&rmn->mn, rmn->mm); + kfree(rmn); +} + +/** + * radeon_mn_release - callback to notify about mm destruction + * + * @mn: our notifier + * @mn: the mm this callback is about + * + * Shedule a work item to lazy destroy our notifier. + */ +static void radeon_mn_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); + INIT_WORK(&rmn->work, radeon_mn_destroy); + schedule_work(&rmn->work); +} + +/** + * radeon_mn_invalidate_range_start - callback to notify about mm change + * + * @mn: our notifier + * @mn: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * We block for all BOs between start and end to be idle and + * unmap them by move them into system domain again. + */ +static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn); + struct interval_tree_node *it; + + /* notification is exclusive, but interval is inclusive */ + end -= 1; + + mutex_lock(&rmn->lock); + + it = interval_tree_iter_first(&rmn->objects, start, end); + while (it) { + struct radeon_bo *bo; + int r; + + bo = container_of(it, struct radeon_bo, mn_it); + it = interval_tree_iter_next(it, start, end); + + r = radeon_bo_reserve(bo, true); + if (r) { + DRM_ERROR("(%d) failed to reserve user bo\n", r); + continue; + } + + if (bo->tbo.sync_obj) { + r = radeon_fence_wait(bo->tbo.sync_obj, false); + if (r) + DRM_ERROR("(%d) failed to wait for user bo\n", r); + } + + radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU); + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); + if (r) + DRM_ERROR("(%d) failed to validate user bo\n", r); + + radeon_bo_unreserve(bo); + } + + mutex_unlock(&rmn->lock); +} + +static const struct mmu_notifier_ops radeon_mn_ops = { + .release = radeon_mn_release, + .invalidate_range_start = radeon_mn_invalidate_range_start, +}; + +/** + * radeon_mn_get - create notifier context + * + * @rdev: radeon device pointer + * + * Creates a notifier context for current->mm. + */ +static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev) +{ + struct mm_struct *mm = current->mm; + struct radeon_mn *rmn; + int r; + + down_write(&mm->mmap_sem); + mutex_lock(&rdev->mn_lock); + + hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) + if (rmn->mm == mm) + goto release_locks; + + rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); + if (!rmn) { + rmn = ERR_PTR(-ENOMEM); + goto release_locks; + } + + rmn->rdev = rdev; + rmn->mm = mm; + rmn->mn.ops = &radeon_mn_ops; + mutex_init(&rmn->lock); + rmn->objects = RB_ROOT; + + r = __mmu_notifier_register(&rmn->mn, mm); + if (r) + goto free_rmn; + + hash_add(rdev->mn_hash, &rmn->node, (unsigned long)mm); + +release_locks: + mutex_unlock(&rdev->mn_lock); + up_write(&mm->mmap_sem); + + return rmn; + +free_rmn: + mutex_unlock(&rdev->mn_lock); + up_write(&mm->mmap_sem); + kfree(rmn); + + return ERR_PTR(r); +} + +/** + * radeon_mn_register - register a BO for notifier updates + * + * @bo: radeon buffer object + * @addr: userptr addr we should monitor + * + * Registers an MMU notifier for the given BO at the specified address. + * Returns 0 on success, -ERRNO if anything goes wrong. + */ +int radeon_mn_register(struct radeon_bo *bo, unsigned long addr) +{ + unsigned long end = addr + radeon_bo_size(bo) - 1; + struct radeon_device *rdev = bo->rdev; + struct radeon_mn *rmn; + struct interval_tree_node *it; + + rmn = radeon_mn_get(rdev); + if (IS_ERR(rmn)) + return PTR_ERR(rmn); + + mutex_lock(&rmn->lock); + + it = interval_tree_iter_first(&rmn->objects, addr, end); + if (it) { + mutex_unlock(&rmn->lock); + return -EEXIST; + } + + bo->mn = rmn; + bo->mn_it.start = addr; + bo->mn_it.last = end; + interval_tree_insert(&bo->mn_it, &rmn->objects); + + mutex_unlock(&rmn->lock); + + return 0; +} + +/** + * radeon_mn_unregister - unregister a BO for notifier updates + * + * @bo: radeon buffer object + * + * Remove any registration of MMU notifier updates from the buffer object. + */ +void radeon_mn_unregister(struct radeon_bo *bo) +{ + struct radeon_device *rdev = bo->rdev; + struct radeon_mn *rmn; + + mutex_lock(&rdev->mn_lock); + rmn = bo->mn; + if (rmn == NULL) { + mutex_unlock(&rdev->mn_lock); + return; + } + + mutex_lock(&rmn->lock); + interval_tree_remove(&bo->mn_it, &rmn->objects); + bo->mn = NULL; + mutex_unlock(&rmn->lock); + mutex_unlock(&rdev->mn_lock); +} diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index c73c1e320585..287523807989 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -75,6 +75,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) bo = container_of(tbo, struct radeon_bo, tbo); radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); + radeon_mn_unregister(bo); mutex_lock(&bo->rdev->gem.mutex); list_del_init(&bo->list); diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 5dc61c2d4c73..c77495ffc44f 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -818,6 +818,7 @@ struct drm_radeon_gem_create { #define RADEON_GEM_USERPTR_READONLY (1 << 0) #define RADEON_GEM_USERPTR_ANONONLY (1 << 1) #define RADEON_GEM_USERPTR_VALIDATE (1 << 2) +#define RADEON_GEM_USERPTR_REGISTER (1 << 3) struct drm_radeon_gem_userptr { uint64_t addr; From bd645e4314b95b21146aa6ff893d783de20c4e60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 7 Aug 2014 09:36:04 +0200 Subject: [PATCH 125/640] drm/radeon: allow userptr write access under certain conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It needs to be anonymous memory (no file mappings) and we are requried to install an MMU notifier. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 2a6fbf101cf0..01b58941acd4 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -285,19 +285,24 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data, if (offset_in_page(args->addr | args->size)) return -EINVAL; - /* we only support read only mappings for now */ - if (!(args->flags & RADEON_GEM_USERPTR_READONLY)) - return -EACCES; - /* reject unknown flag values */ if (args->flags & ~(RADEON_GEM_USERPTR_READONLY | RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE | RADEON_GEM_USERPTR_REGISTER)) return -EINVAL; - /* readonly pages not tested on older hardware */ - if (rdev->family < CHIP_R600) - return -EINVAL; + if (args->flags & RADEON_GEM_USERPTR_READONLY) { + /* readonly pages not tested on older hardware */ + if (rdev->family < CHIP_R600) + return -EINVAL; + + } else if (!(args->flags & RADEON_GEM_USERPTR_ANONONLY) || + !(args->flags & RADEON_GEM_USERPTR_REGISTER)) { + + /* if we want to write to it we must require anonymous + memory and install a MMU notifier */ + return -EACCES; + } down_read(&rdev->exclusive_lock); From e94e37ad19c74b4c2569d556cda9da4a03d4e3f8 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:25 +0100 Subject: [PATCH 126/640] drm/i915/bdw: GEN-specific logical ring set/get seqno No mistery here: the seqno is still retrieved from the engine's HW status page (the one in the default context. For the moment, I see no reason to worry about other context's HWS page). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1a1f5f98f05b..c9518c6261de 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -151,6 +151,16 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) +{ + return intel_read_status_page(ring, I915_GEM_HWS_INDEX); +} + +static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) +{ + intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); +} + void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -223,6 +233,8 @@ static int logical_render_ring_init(struct drm_device *dev) ring->init = gen8_init_render_ring; ring->cleanup = intel_fini_pipe_control; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -239,6 +251,8 @@ static int logical_bsd_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -255,6 +269,8 @@ static int logical_bsd2_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -271,6 +287,8 @@ static int logical_blt_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } @@ -287,6 +305,8 @@ static int logical_vebox_ring_init(struct drm_device *dev) GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; ring->init = gen8_init_common_ring; + ring->get_seqno = gen8_get_seqno; + ring->set_seqno = gen8_set_seqno; return logical_ring_init(dev, ring); } From 26fbb77445bd402417f42936f68c0da26d33855d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 11 Aug 2014 18:37:37 +0300 Subject: [PATCH 127/640] drm/i915: Make hpd debug messages less cryptic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't print raw numbers, use port_name() and tell the user whether it's long or short without having to figure out what the other magic number means. Signed-off-by: Ville Syrjälä Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 4 +++- drivers/gpu/drm/i915/intel_dp.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f0d24db76e72..36eb1f234608 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1777,7 +1777,9 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev, long_hpd = (dig_hotplug_reg >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT; } - DRM_DEBUG_DRIVER("digital hpd port %d %d\n", port, long_hpd); + DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", + port_name(port), + long_hpd ? "long" : "short"); /* for long HPD pulses we want to have the digital queue happen, but we still want HPD storm detection to function. */ if (long_hpd) { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 3b88255d87dc..def55cdfef25 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4041,7 +4041,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) if (intel_dig_port->base.type != INTEL_OUTPUT_EDP) intel_dig_port->base.type = INTEL_OUTPUT_DISPLAYPORT; - DRM_DEBUG_KMS("got hpd irq on port %d - %s\n", intel_dig_port->port, + DRM_DEBUG_KMS("got hpd irq on port %c - %s\n", + port_name(intel_dig_port->port), long_hpd ? "long" : "short"); if (long_hpd) { From 82e104cc266c6da30a30fc5028b2f0236c669cd7 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:26 +0100 Subject: [PATCH 128/640] drm/i915/bdw: New logical ring submission mechanism Well, new-ish: if all this code looks familiar, that's because it's a clone of the existing submission mechanism (with some modifications here and there to adapt it to LRCs and Execlists). And why did we do this instead of reusing code, one might wonder? Well, there are some fears that the differences are big enough that they will end up breaking all platforms. Also, Execlists offer several advantages, like control over when the GPU is done with a given workload, that can help simplify the submission mechanism, no doubt. I am interested in getting Execlists to work first and foremost, but in the future this parallel submission mechanism will help us to fine tune the mechanism without affecting old gens. v2: Pass the ringbuffer only (whenever possible). Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Appease checkpatch. Again. And drop the legacy sarea gunk that somehow crept in.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 189 ++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 13 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 22 +-- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 4 files changed, 217 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c9518c6261de..31025847d680 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -108,6 +108,195 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) /* TODO */ } +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) +{ + intel_logical_ring_advance(ringbuf); + + if (intel_ring_stopped(ringbuf->ring)) + return; + + /* TODO: how to submit a context to the ELSP is not here yet */ +} + +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) +{ + if (ring->outstanding_lazy_seqno) + return 0; + + if (ring->preallocated_lazy_request == NULL) { + struct drm_i915_gem_request *request; + + request = kmalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; + + ring->preallocated_lazy_request = request; + } + + return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); +} + +static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, + int bytes) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_gem_request *request; + u32 seqno = 0; + int ret; + + if (ringbuf->last_retired_head != -1) { + ringbuf->head = ringbuf->last_retired_head; + ringbuf->last_retired_head = -1; + + ringbuf->space = intel_ring_space(ringbuf); + if (ringbuf->space >= bytes) + return 0; + } + + list_for_each_entry(request, &ring->request_list, list) { + if (__intel_ring_space(request->tail, ringbuf->tail, + ringbuf->size) >= bytes) { + seqno = request->seqno; + break; + } + } + + if (seqno == 0) + return -ENOSPC; + + ret = i915_wait_seqno(ring, seqno); + if (ret) + return ret; + + /* TODO: make sure we update the right ringbuffer's last_retired_head + * when retiring requests */ + i915_gem_retire_requests_ring(ring); + ringbuf->head = ringbuf->last_retired_head; + ringbuf->last_retired_head = -1; + + ringbuf->space = intel_ring_space(ringbuf); + return 0; +} + +static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, + int bytes) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long end; + int ret; + + ret = logical_ring_wait_request(ringbuf, bytes); + if (ret != -ENOSPC) + return ret; + + /* Force the context submission in case we have been skipping it */ + intel_logical_ring_advance_and_submit(ringbuf); + + /* With GEM the hangcheck timer should kick us out of the loop, + * leaving it early runs the risk of corrupting GEM state (due + * to running on almost untested codepaths). But on resume + * timers don't work yet, so prevent a complete hang in that + * case by choosing an insanely large timeout. */ + end = jiffies + 60 * HZ; + + do { + ringbuf->head = I915_READ_HEAD(ring); + ringbuf->space = intel_ring_space(ringbuf); + if (ringbuf->space >= bytes) { + ret = 0; + break; + } + + msleep(1); + + if (dev_priv->mm.interruptible && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); + if (ret) + break; + + if (time_after(jiffies, end)) { + ret = -EBUSY; + break; + } + } while (1); + + return ret; +} + +static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf) +{ + uint32_t __iomem *virt; + int rem = ringbuf->size - ringbuf->tail; + + if (ringbuf->space < rem) { + int ret = logical_ring_wait_for_space(ringbuf, rem); + + if (ret) + return ret; + } + + virt = ringbuf->virtual_start + ringbuf->tail; + rem /= 4; + while (rem--) + iowrite32(MI_NOOP, virt++); + + ringbuf->tail = 0; + ringbuf->space = intel_ring_space(ringbuf); + + return 0; +} + +static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) +{ + int ret; + + if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { + ret = logical_ring_wrap_buffer(ringbuf); + if (unlikely(ret)) + return ret; + } + + if (unlikely(ringbuf->space < bytes)) { + ret = logical_ring_wait_for_space(ringbuf, bytes); + if (unlikely(ret)) + return ret; + } + + return 0; +} + +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = i915_gem_check_wedge(&dev_priv->gpu_error, + dev_priv->mm.interruptible); + if (ret) + return ret; + + ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t)); + if (ret) + return ret; + + /* Preallocate the olr before touching the ring */ + ret = logical_ring_alloc_seqno(ring); + if (ret) + return ret; + + ringbuf->space -= num_dwords * sizeof(uint32_t); + return 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index bf0eff4e9f08..4e032875c1fd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,6 +29,19 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); +void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); +static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) +{ + ringbuf->tail &= ringbuf->size - 1; +} +static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, + u32 data) +{ + iowrite32(data, ringbuf->virtual_start + ringbuf->tail); + ringbuf->tail += 4; +} +int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords); + /* Logical Ring Contexts */ void intel_lr_context_free(struct intel_context *ctx); int intel_lr_context_deferred_create(struct intel_context *ctx, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index dab5e7c79036..0bfa018fab20 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -57,7 +57,7 @@ intel_ring_initialized(struct intel_engine_cs *ring) return ring->buffer && ring->buffer->obj; } -static inline int __ring_space(int head, int tail, int size) +int __intel_ring_space(int head, int tail, int size) { int space = head - (tail + I915_RING_FREE_SPACE); if (space < 0) @@ -65,12 +65,13 @@ static inline int __ring_space(int head, int tail, int size) return space; } -static inline int ring_space(struct intel_ringbuffer *ringbuf) +int intel_ring_space(struct intel_ringbuffer *ringbuf) { - return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); + return __intel_ring_space(ringbuf->head & HEAD_ADDR, + ringbuf->tail, ringbuf->size); } -static bool intel_ring_stopped(struct intel_engine_cs *ring) +bool intel_ring_stopped(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); @@ -585,7 +586,7 @@ static int init_ring_common(struct intel_engine_cs *ring) else { ringbuf->head = I915_READ_HEAD(ring); ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); ringbuf->last_retired_head = -1; } @@ -1702,13 +1703,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); if (ringbuf->space >= n) return 0; } list_for_each_entry(request, &ring->request_list, list) { - if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) { + if (__intel_ring_space(request->tail, ringbuf->tail, + ringbuf->size) >= n) { seqno = request->seqno; break; } @@ -1725,7 +1727,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n) ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); return 0; } @@ -1754,7 +1756,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) trace_i915_ring_wait_begin(ring); do { ringbuf->head = I915_READ_HEAD(ring); - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); if (ringbuf->space >= n) { ret = 0; break; @@ -1806,7 +1808,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) iowrite32(MI_NOOP, virt++); ringbuf->tail = 0; - ringbuf->space = ring_space(ringbuf); + ringbuf->space = intel_ring_space(ringbuf); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 677df0d7be48..81bad364e36d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -374,6 +374,9 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring) struct intel_ringbuffer *ringbuf = ring->buffer; ringbuf->tail &= ringbuf->size - 1; } +int __intel_ring_space(int head, int tail, int size); +int intel_ring_space(struct intel_ringbuffer *ringbuf); +bool intel_ring_stopped(struct intel_engine_cs *ring); void __intel_ring_advance(struct intel_engine_cs *ring); int __must_check intel_ring_idle(struct intel_engine_cs *ring); From 4da46e1e5bb7e7396fad172cdaffbe496562f3d8 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:27 +0100 Subject: [PATCH 129/640] drm/i915/bdw: GEN-specific logical ring emit request Very similar to the legacy add_request, only modified to account for logical ringbuffer. v2: Use MI_GLOBAL_GTT, as suggested by Brad Volkin. v3: Unify render and non-render in the same function, as noticed by Brad Volkin. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_lrc.c | 31 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 3 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c1d24242a02d..3388afb90a93 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -272,6 +272,7 @@ #define MI_SEMAPHORE_POLL (1<<15) #define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) +#define MI_STORE_DWORD_IMM_GEN8 MI_INSTR(0x20, 2) #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) #define MI_STORE_DWORD_INDEX_SHIFT 2 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 31025847d680..94f8b4087642 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -350,6 +350,32 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno) intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); } +static int gen8_emit_request(struct intel_ringbuffer *ringbuf) +{ + struct intel_engine_cs *ring = ringbuf->ring; + u32 cmd; + int ret; + + ret = intel_logical_ring_begin(ringbuf, 6); + if (ret) + return ret; + + cmd = MI_STORE_DWORD_IMM_GEN8; + cmd |= MI_GLOBAL_GTT; + + intel_logical_ring_emit(ringbuf, cmd); + intel_logical_ring_emit(ringbuf, + (ring->status_page.gfx_addr + + (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT))); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno); + intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance_and_submit(ringbuf); + + return 0; +} + void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { if (!intel_ring_initialized(ring)) @@ -424,6 +450,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->cleanup = intel_fini_pipe_control; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -442,6 +469,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -460,6 +488,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -478,6 +507,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } @@ -496,6 +526,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; + ring->emit_request = gen8_emit_request; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 81bad364e36d..467885159a80 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -216,6 +216,9 @@ struct intel_engine_cs { unsigned int num_dwords); } semaphore; + /* Execlists */ + int (*emit_request)(struct intel_ringbuffer *ringbuf); + /** * List of objects currently involved in rendering from the * ringbuffer. From 4712274c362b7730a1c6e01c9a51a6d46f5b7f43 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:28 +0100 Subject: [PATCH 130/640] drm/i915/bdw: GEN-specific logical ring emit flush Same as the legacy-style ring->flush. v2: The BSD invalidate bit still exists in GEN8! Add it for the VCS rings (but still consolidate the blt and bsd ring flushes into one). This was noticed by Brad Volkin. v3: The command for BSD and for other rings is slightly different: get it exactly the same as in gen6_ring_flush + gen6_bsd_ring_flush Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 85 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 7 -- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 +++ 3 files changed, 95 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 94f8b4087642..a88fa6e9360b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -340,6 +340,86 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 unused) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t cmd; + int ret; + + ret = intel_logical_ring_begin(ringbuf, 4); + if (ret) + return ret; + + cmd = MI_FLUSH_DW + 1; + + if (ring == &dev_priv->ring[VCS]) { + if (invalidate_domains & I915_GEM_GPU_DOMAINS) + cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | + MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_DW_OP_STOREDW; + } else { + if (invalidate_domains & I915_GEM_DOMAIN_RENDER) + cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | + MI_FLUSH_DW_OP_STOREDW; + } + + intel_logical_ring_emit(ringbuf, cmd); + intel_logical_ring_emit(ringbuf, + I915_GEM_HWS_SCRATCH_ADDR | + MI_FLUSH_DW_USE_GTT); + intel_logical_ring_emit(ringbuf, 0); /* upper addr */ + intel_logical_ring_emit(ringbuf, 0); /* value */ + intel_logical_ring_advance(ringbuf); + + return 0; +} + +static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 flush_domains) +{ + struct intel_engine_cs *ring = ringbuf->ring; + u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 flags = 0; + int ret; + + flags |= PIPE_CONTROL_CS_STALL; + + if (flush_domains) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + } + + if (invalidate_domains) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + } + + ret = intel_logical_ring_begin(ringbuf, 6); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); + intel_logical_ring_emit(ringbuf, flags); + intel_logical_ring_emit(ringbuf, scratch_addr); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_emit(ringbuf, 0); + intel_logical_ring_advance(ringbuf); + + return 0; +} + static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) { return intel_read_status_page(ring, I915_GEM_HWS_INDEX); @@ -451,6 +531,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush_render; return logical_ring_init(dev, ring); } @@ -470,6 +551,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -489,6 +571,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -508,6 +591,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } @@ -527,6 +611,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; + ring->emit_flush = gen8_emit_flush; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0bfa018fab20..4236014c1cda 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -33,13 +33,6 @@ #include "i915_trace.h" #include "intel_drv.h" -/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, - * but keeps the logic simple. Indeed, the whole purpose of this macro is just - * to give some inclination as to some of the magic values used in the various - * workarounds! - */ -#define CACHELINE_BYTES 64 - bool intel_ring_initialized(struct intel_engine_cs *ring) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 467885159a80..e497837c7724 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,13 @@ #define I915_CMD_HASH_ORDER 9 +/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 + /* * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use" @@ -218,6 +225,9 @@ struct intel_engine_cs { /* Execlists */ int (*emit_request)(struct intel_ringbuffer *ringbuf); + int (*emit_flush)(struct intel_ringbuffer *ringbuf, + u32 invalidate_domains, + u32 flush_domains); /** * List of objects currently involved in rendering from the From 9832b9dae8f9f505c7ed898a043b4f54b54597ed Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:30 +0100 Subject: [PATCH 131/640] drm/i915/bdw: Ring idle and stop with logical rings This is a hard one, since there is no direct hardware ring to control when in Execlists. We reuse intel_ring_idle here, but it should be fine as long as i915_add_request does the ring thing. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a88fa6e9360b..8a524baa8a6b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -105,7 +105,24 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, void intel_logical_ring_stop(struct intel_engine_cs *ring) { - /* TODO */ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + int ret; + + if (!intel_ring_initialized(ring)) + return; + + ret = intel_ring_idle(ring); + if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error)) + DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", + ring->name, ret); + + /* TODO: Is this correct with Execlists enabled? */ + I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); + if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { + DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); + return; + } + I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); } void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) @@ -458,10 +475,13 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { + struct drm_i915_private *dev_priv = ring->dev->dev_private; + if (!intel_ring_initialized(ring)) return; - /* TODO: make sure the ring is stopped */ + intel_logical_ring_stop(ring); + WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); ring->preallocated_lazy_request = NULL; ring->outstanding_lazy_seqno = 0; From 73d477f6bb17a1f14c4897a4b4a6597fe9a38ad2 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:31 +0100 Subject: [PATCH 132/640] drm/i915/bdw: Interrupts with logical rings We need to attend context switch interrupts from all rings. Also, fixed writing IMR/IER and added HWSTAM at ring init time. Notice that, if added to irq_enable_mask, the context switch interrupts would be incorrectly masked out when the user interrupts are due to no users waiting on a sequence number. Therefore, this commit adds a bitmask of interrupts to be kept unmasked at all times. v2: Disable HWSTAM, as suggested by Damien (nobody listens to these interrupts, anyway). v3: Add new get/put_irq functions. Signed-off-by: Thomas Daniel (v1) Signed-off-by: Oscar Mateo (v2 & v3) Reviewed-by: Damien Lespiau [danvet: Drop the GEN8_ prefix from the context switch interrupt define and move it to its brethren.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 19 ++++++-- drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/intel_lrc.c | 58 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 77 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 36eb1f234608..00957fa0b877 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1647,6 +1647,8 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, notify_ring(dev, &dev_priv->ring[RCS]); if (bcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[BCS]); + if ((rcs | bcs) & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); } else DRM_ERROR("The master control interrupt lied (GT0)!\n"); } @@ -1659,9 +1661,13 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, vcs = tmp >> GEN8_VCS1_IRQ_SHIFT; if (vcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[VCS]); + if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); vcs = tmp >> GEN8_VCS2_IRQ_SHIFT; if (vcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[VCS2]); + if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); } else DRM_ERROR("The master control interrupt lied (GT1)!\n"); } @@ -1685,6 +1691,8 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, vcs = tmp >> GEN8_VECS_IRQ_SHIFT; if (vcs & GT_RENDER_USER_INTERRUPT) notify_ring(dev, &dev_priv->ring[VECS]); + if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) + DRM_DEBUG_DRIVER("TODO: Context switch\n"); } else DRM_ERROR("The master control interrupt lied (GT3)!\n"); } @@ -3788,12 +3796,17 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) /* These are interrupts we'll toggle with the ring mask register */ uint32_t gt_interrupts[] = { GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT | GT_RENDER_L3_PARITY_ERROR_INTERRUPT | - GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT, + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT, GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | - GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT, + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT, 0, - GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT + GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT | + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT }; for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3388afb90a93..f79c20d49d99 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1087,6 +1087,7 @@ enum punit_power_well { #define RING_ACTHD_UDW(base) ((base)+0x5c) #define RING_NOPID(base) ((base)+0x94) #define RING_IMR(base) ((base)+0xa8) +#define RING_HWSTAM(base) ((base)+0x98) #define RING_TIMESTAMP(base) ((base)+0x358) #define TAIL_ADDR 0x001FFFF8 #define HEAD_WRAP_COUNT 0xFFE00000 @@ -1403,6 +1404,7 @@ enum punit_power_well { #define GT_BSD_CS_ERROR_INTERRUPT (1 << 15) #define GT_BSD_USER_INTERRUPT (1 << 12) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1 (1 << 11) /* hsw+; rsvd on snb, ivb, vlv */ +#define GT_CONTEXT_SWITCH_INTERRUPT (1 << 8) #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT (1 << 5) /* !snb */ #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT (1 << 4) #define GT_RENDER_CS_MASTER_ERROR_INTERRUPT (1 << 3) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8a524baa8a6b..009a8b5c088e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -319,6 +319,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring) struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; + I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask)); + I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff); + I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); @@ -357,6 +360,39 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + + if (!dev->irq_enabled) + return false; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + if (ring->irq_refcount++ == 0) { + I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask)); + POSTING_READ(RING_IMR(ring->mmio_base)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + + return true; +} + +static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + if (--ring->irq_refcount == 0) { + I915_WRITE_IMR(ring, ~ring->irq_keep_mask); + POSTING_READ(RING_IMR(ring->mmio_base)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); +} + static int gen8_emit_flush(struct intel_ringbuffer *ringbuf, u32 invalidate_domains, u32 unused) @@ -545,6 +581,10 @@ static int logical_render_ring_init(struct drm_device *dev) ring->mmio_base = RENDER_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT; + if (HAS_L3_DPF(dev)) + ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; ring->init = gen8_init_render_ring; ring->cleanup = intel_fini_pipe_control; @@ -552,6 +592,8 @@ static int logical_render_ring_init(struct drm_device *dev) ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush_render; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -566,12 +608,16 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->mmio_base = GEN6_BSD_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -586,12 +632,16 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->mmio_base = GEN8_BSD2_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -606,12 +656,16 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->mmio_base = BLT_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } @@ -626,12 +680,16 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->mmio_base = VEBOX_RING_BASE; ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + ring->irq_keep_mask = + GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT; ring->init = gen8_init_common_ring; ring->get_seqno = gen8_get_seqno; ring->set_seqno = gen8_set_seqno; ring->emit_request = gen8_emit_request; ring->emit_flush = gen8_emit_flush; + ring->irq_get = gen8_logical_ring_get_irq; + ring->irq_put = gen8_logical_ring_put_irq; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index e497837c7724..cb529ee10c8f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -224,6 +224,7 @@ struct intel_engine_cs { } semaphore; /* Execlists */ + u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct intel_ringbuffer *ringbuf); int (*emit_flush)(struct intel_ringbuffer *ringbuf, u32 invalidate_domains, From 156485852684b511be28a83c78fece8b27ef7c26 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:32 +0100 Subject: [PATCH 133/640] drm/i915/bdw: GEN-specific logical ring emit batchbuffer start Dispatch_execbuffer's evil twin. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Ditch the check for aliasing ppgtt. It'll break soon and execlists requires full ppgtt anyway.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 27 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 009a8b5c088e..e0d4ef2a5c30 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -360,6 +360,28 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) return ret; } +static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, + u64 offset, unsigned flags) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + bool ppgtt = !(flags & I915_DISPATCH_SECURE); + int ret; + + ret = intel_logical_ring_begin(ringbuf, 4); + if (ret) + return ret; + + /* FIXME(BDW): Address space and security selectors. */ + intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); + intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_advance(ringbuf); + + return 0; +} + static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -594,6 +616,7 @@ static int logical_render_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush_render; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -618,6 +641,7 @@ static int logical_bsd_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -642,6 +666,7 @@ static int logical_bsd2_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -666,6 +691,7 @@ static int logical_blt_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } @@ -690,6 +716,7 @@ static int logical_vebox_ring_init(struct drm_device *dev) ring->emit_flush = gen8_emit_flush; ring->irq_get = gen8_logical_ring_get_irq; ring->irq_put = gen8_logical_ring_put_irq; + ring->emit_bb_start = gen8_emit_bb_start; return logical_ring_init(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index cb529ee10c8f..24437da91f77 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -229,6 +229,8 @@ struct intel_engine_cs { int (*emit_flush)(struct intel_ringbuffer *ringbuf, u32 invalidate_domains, u32 flush_domains); + int (*emit_bb_start)(struct intel_ringbuffer *ringbuf, + u64 offset, unsigned flags); /** * List of objects currently involved in rendering from the From ba8b7ccb196b07c1c553450e8e7b44a7a938e58a Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:33 +0100 Subject: [PATCH 134/640] drm/i915/bdw: Workload submission mechanism for Execlists This is what i915_gem_do_execbuffer calls when it wants to execute some worload in an Execlists world. v2: Check arguments before doing stuff in intel_execlists_submission. Also, get rel_constants parsing right. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Drop the chipset flush, that's pre-gen6. And appease checkpatch a bit .... again!] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 6 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 130 ++++++++++++++++++++- 3 files changed, 135 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9198f1c96470..53cc4c083d0d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2266,6 +2266,12 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void i915_gem_execbuffer_move_to_active(struct list_head *vmas, + struct intel_engine_cs *ring); +void i915_gem_execbuffer_retire_commands(struct drm_device *dev, + struct drm_file *file, + struct intel_engine_cs *ring, + struct drm_i915_gem_object *obj); int i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 26b38b3ae4f3..7e04a4825ed0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -942,7 +942,7 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } -static void +void i915_gem_execbuffer_move_to_active(struct list_head *vmas, struct intel_engine_cs *ring) { @@ -983,7 +983,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, } } -static void +void i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e0d4ef2a5c30..e1a298f23f50 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -91,6 +91,55 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) +{ + struct intel_engine_cs *ring = ringbuf->ring; + uint32_t flush_domains; + int ret; + + flush_domains = 0; + if (ring->gpu_caches_dirty) + flush_domains = I915_GEM_GPU_DOMAINS; + + ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains); + if (ret) + return ret; + + ring->gpu_caches_dirty = false; + return 0; +} + +static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, + struct list_head *vmas) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct i915_vma *vma; + uint32_t flush_domains = 0; + bool flush_chipset = false; + int ret; + + list_for_each_entry(vma, vmas, exec_list) { + struct drm_i915_gem_object *obj = vma->obj; + + ret = i915_gem_object_sync(obj, ring); + if (ret) + return ret; + + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) + flush_chipset |= i915_gem_clflush_object(obj, false); + + flush_domains |= obj->base.write_domain; + } + + if (flush_domains & I915_GEM_DOMAIN_GTT) + wmb(); + + /* Unconditionally invalidate gpu caches and ensure that we do flush + * any residual writes from the previous batch. + */ + return logical_ring_invalidate_all_caches(ringbuf); +} + int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, struct intel_context *ctx, @@ -99,7 +148,84 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_object *batch_obj, u64 exec_start, u32 flags) { - /* TODO */ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf; + int instp_mode; + u32 instp_mask; + int ret; + + instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; + instp_mask = I915_EXEC_CONSTANTS_MASK; + switch (instp_mode) { + case I915_EXEC_CONSTANTS_REL_GENERAL: + case I915_EXEC_CONSTANTS_ABSOLUTE: + case I915_EXEC_CONSTANTS_REL_SURFACE: + if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) { + DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); + return -EINVAL; + } + + if (instp_mode != dev_priv->relative_constants_mode) { + if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { + DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); + return -EINVAL; + } + + /* The HW changed the meaning on this bit on gen6 */ + instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; + } + break; + default: + DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + return -EINVAL; + } + + if (args->num_cliprects != 0) { + DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); + return -EINVAL; + } else { + if (args->DR4 == 0xffffffff) { + DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); + args->DR4 = 0; + } + + if (args->DR1 || args->DR4 || args->cliprects_ptr) { + DRM_DEBUG("0 cliprects but dirt in cliprects fields\n"); + return -EINVAL; + } + } + + if (args->flags & I915_EXEC_GEN7_SOL_RESET) { + DRM_DEBUG("sol reset is gen7 only\n"); + return -EINVAL; + } + + ret = execlists_move_to_gpu(ringbuf, vmas); + if (ret) + return ret; + + if (ring == &dev_priv->ring[RCS] && + instp_mode != dev_priv->relative_constants_mode) { + ret = intel_logical_ring_begin(ringbuf, 4); + if (ret) + return ret; + + intel_logical_ring_emit(ringbuf, MI_NOOP); + intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1)); + intel_logical_ring_emit(ringbuf, INSTPM); + intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode); + intel_logical_ring_advance(ringbuf); + + dev_priv->relative_constants_mode = instp_mode; + } + + ret = ring->emit_bb_start(ringbuf, exec_start, flags); + if (ret) + return ret; + + i915_gem_execbuffer_move_to_active(vmas, ring); + i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); + return 0; } @@ -363,8 +489,6 @@ static int gen8_init_render_ring(struct intel_engine_cs *ring) static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, u64 offset, unsigned flags) { - struct intel_engine_cs *ring = ringbuf->ring; - struct drm_i915_private *dev_priv = ring->dev->dev_private; bool ppgtt = !(flags & I915_DISPATCH_SECURE); int ret; From 14bf993e83e1d6924f4bf4506120a15c4b255e58 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:34 +0100 Subject: [PATCH 135/640] drm/i915/bdw: Always use MMIO flips with Execlists The normal flip function places things in the ring in the legacy way, so we either fix that or force MMIO flips always as we do in this patch. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch. Fucking again.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 245cf4128314..1bd1aa21a8e9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9536,6 +9536,8 @@ static bool use_mmio_flip(struct intel_engine_cs *ring, return false; else if (i915.use_mmio_flip > 0) return true; + else if (i915.enable_execlists) + return true; else return ring != obj->ring; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e1a298f23f50..6f1b64e01a05 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -85,7 +85,8 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists if (enable_execlists == 0) return 0; - if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev)) + if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) && + i915.use_mmio_flip >= 0) return 1; return 0; From b9d06dd9d1dd3672b391e6387d62aa8dc4e377bd Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Wed, 6 Aug 2014 15:04:44 +0200 Subject: [PATCH 136/640] drm/i915: vma/ppgtt lifetime rules VMAs should take a reference of the address space they use. Now, when the fd is closed, it will release the ref that the context was holding, but it will still be referenced by any vmas that are still active. ppgtt_release() should then only be called when the last thing referencing it releases the ref, and it can just call the base cleanup and free the ppgtt. Note that with this we will extend the lifetime of ppgtts which contain shared objects. But all the non-shared objects will get removed as soon as they drop of the active list and for the shared ones the shrinker can eventually reap them. Since we currently can't evict ppgtt pagetables either I don't think that temporary leak is important. Signed-off-by: Michel Thierry [danvet: Add note about potential ppgtt leak with this approach.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 8 ++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 23 +++-------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++++ 4 files changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 53cc4c083d0d..38b1849a450a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2547,7 +2547,9 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); /* i915_gem_context.c */ #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) +#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); +void ppgtt_release(struct kref *kref); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9acb2469116a..63aee412b258 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4476,12 +4476,20 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, void i915_gem_vma_destroy(struct i915_vma *vma) { + struct i915_address_space *vm = NULL; + struct i915_hw_ppgtt *ppgtt = NULL; WARN_ON(vma->node.allocated); /* Keep the vma as a placeholder in the execbuffer reservation lists */ if (!list_empty(&vma->exec_list)) return; + vm = vma->vm; + ppgtt = vm_to_ppgtt(vm); + + if (ppgtt) + kref_put(&ppgtt->ref, ppgtt_release); + list_del(&vma->vma_link); kfree(vma); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 7a08f3e9e1ae..a509a4bca991 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -108,30 +108,13 @@ static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) return; } - /* - * Make sure vmas are unbound before we take down the drm_mm - * - * FIXME: Proper refcounting should take care of this, this shouldn't be - * needed at all. - */ - if (!list_empty(&vm->active_list)) { - struct i915_vma *vma; - - list_for_each_entry(vma, &vm->active_list, mm_list) - if (WARN_ON(list_empty(&vma->vma_link) || - list_is_singular(&vma->vma_link))) - break; - - i915_gem_evict_vm(&ppgtt->base, true); - } else { - i915_gem_retire_requests(dev); - i915_gem_evict_vm(&ppgtt->base, false); - } + /* vmas should already be unbound */ + WARN_ON(!list_empty(&vm->active_list)); ppgtt->base.cleanup(&ppgtt->base); } -static void ppgtt_release(struct kref *kref) +void ppgtt_release(struct kref *kref) { struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b4b7cfd226b7..76dd3b428483 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2148,10 +2148,15 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) { struct i915_vma *vma; + struct i915_hw_ppgtt *ppgtt = NULL; vma = i915_gem_obj_to_vma(obj, vm); if (!vma) vma = __i915_gem_vma_create(obj, vm); + ppgtt = vm_to_ppgtt(vm); + if (ppgtt) + kref_get(&ppgtt->ref); + return vma; } From ee960be7bb09b201926cb37eaa82fb7da605ea7c Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:45 +0200 Subject: [PATCH 137/640] drm/i915: Some cleanups for the ppgtt lifetime handling So when reviewing Michel's patch I've noticed a few things and cleaned them up: - The early checks in ppgtt_release are now redundant: The inactive list should always be empty now, so we can ditch these checks. Even for the aliasing ppgtt (though that's a different confusion) since we tear that down after all the objects are gone. - The ppgtt handling functions are splattered all over. Consolidate them in i915_gem_gtt.c, give them OCD prefixes and add wrappers for get/put. - There was a bit a confusion in ppgtt_release about whether it cares about the active or inactive list. It should care about them both, so augment the WARNINGs to check for both. There's still create_vm_for_ctx left to do, put that is blocked on the removal of ppgtt->ctx. Once that's done we can rename it to i915_ppgtt_create and move it to its siblings for handling ppgtts. v2: Move the ppgtt checks into the inline get/put functions as suggested by Chris. v3: Inline the now redundant ppgtt local variable. Cc: Michel Thierry Cc: Chris Wilson Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 5 +--- drivers/gpu/drm/i915/i915_gem_context.c | 36 +++---------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 20 ++++++++++---- drivers/gpu/drm/i915/i915_gem_gtt.h | 14 +++++++++- 5 files changed, 33 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 38b1849a450a..101fc637eb46 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2549,7 +2549,6 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) #define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); -void ppgtt_release(struct kref *kref); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 63aee412b258..8061d45eaa80 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4477,7 +4477,6 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, void i915_gem_vma_destroy(struct i915_vma *vma) { struct i915_address_space *vm = NULL; - struct i915_hw_ppgtt *ppgtt = NULL; WARN_ON(vma->node.allocated); /* Keep the vma as a placeholder in the execbuffer reservation lists */ @@ -4485,10 +4484,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma) return; vm = vma->vm; - ppgtt = vm_to_ppgtt(vm); - if (ppgtt) - kref_put(&ppgtt->ref, ppgtt_release); + i915_ppgtt_put(vm_to_ppgtt(vm)); list_del(&vma->vma_link); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a509a4bca991..dc43d0263a01 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -96,33 +96,6 @@ #define GEN6_CONTEXT_ALIGN (64<<10) #define GEN7_CONTEXT_ALIGN 4096 -static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt) -{ - struct drm_device *dev = ppgtt->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct i915_address_space *vm = &ppgtt->base; - - if (ppgtt == dev_priv->mm.aliasing_ppgtt || - (list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) { - ppgtt->base.cleanup(&ppgtt->base); - return; - } - - /* vmas should already be unbound */ - WARN_ON(!list_empty(&vm->active_list)); - - ppgtt->base.cleanup(&ppgtt->base); -} - -void ppgtt_release(struct kref *kref) -{ - struct i915_hw_ppgtt *ppgtt = - container_of(kref, struct i915_hw_ppgtt, ref); - - do_ppgtt_cleanup(ppgtt); - kfree(ppgtt); -} - static size_t get_context_alignment(struct drm_device *dev) { if (IS_GEN6(dev)) @@ -174,8 +147,7 @@ void i915_gem_context_free(struct kref *ctx_ref) ppgtt = ctx_to_ppgtt(ctx); } - if (ppgtt) - kref_put(&ppgtt->ref, ppgtt_release); + i915_ppgtt_put(ppgtt); if (ctx->legacy_hw_ctx.rcs_state) drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base); list_del(&ctx->link); @@ -222,7 +194,7 @@ create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx) if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_gem_init_ppgtt(dev, ppgtt); + ret = i915_ppgtt_init(dev, ppgtt); if (ret) { kfree(ppgtt); return ERR_PTR(ret); @@ -234,7 +206,7 @@ create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx) static struct intel_context * __create_hw_context(struct drm_device *dev, - struct drm_i915_file_private *file_priv) + struct drm_i915_file_private *file_priv) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_context *ctx; @@ -342,7 +314,7 @@ i915_gem_create_context(struct drm_device *dev, /* For platforms which only have aliasing PPGTT, we fake the * address space and refcounting. */ ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; - kref_get(&dev_priv->mm.aliasing_ppgtt->ref); + i915_ppgtt_get(dev_priv->mm.aliasing_ppgtt); } else ctx->vm = &dev_priv->gtt.base; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 76dd3b428483..6ffa12b72538 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1180,7 +1180,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return 0; } -int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = dev->dev_private; int ret = 0; @@ -1211,6 +1211,19 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) return ret; } +void i915_ppgtt_release(struct kref *kref) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(kref, struct i915_hw_ppgtt, ref); + + /* vmas should already be unbound */ + WARN_ON(!list_empty(&ppgtt->base.active_list)); + WARN_ON(!list_empty(&ppgtt->base.inactive_list)); + + ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); +} + static void ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, @@ -2148,15 +2161,12 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm) { struct i915_vma *vma; - struct i915_hw_ppgtt *ppgtt = NULL; vma = i915_gem_obj_to_vma(obj, vm); if (!vma) vma = __i915_gem_vma_create(obj, vm); - ppgtt = vm_to_ppgtt(vm); - if (ppgtt) - kref_get(&ppgtt->ref); + i915_ppgtt_get(vm_to_ppgtt(vm)); return vma; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 666c938a51e3..c6beb528f955 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -272,7 +272,19 @@ void i915_gem_init_global_gtt(struct drm_device *dev); void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); -int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); + +int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); +void i915_ppgtt_release(struct kref *kref); +static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) +{ + if (ppgtt) + kref_get(&ppgtt->ref); +} +static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt) +{ + if (ppgtt) + kref_put(&ppgtt->ref, i915_ppgtt_release); +} void i915_check_and_clear_faults(struct drm_device *dev); void i915_gem_suspend_gtt_mappings(struct drm_device *dev); From 4d884705dababd7d0f3f12796bc7b45e84962596 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:47 +0200 Subject: [PATCH 138/640] drm/i915: Track file_priv, not ctx in the ppgtt structure Hardware contexts reference a ppgtt, not the other way round. And the only user of this (in debugfs) actually only cares about which file the ppgtt is associated with. So give it what it wants. While at it give the ppgtt create function a proper name&place. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 22 +--------------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +++++- 4 files changed, 28 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3b7decbeeed3..1c3a9943a742 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -333,7 +333,7 @@ static int per_file_stats(int id, void *ptr, void *data) } ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base); - if (ppgtt->ctx && ppgtt->ctx->file_priv != stats->file_priv) + if (ppgtt->file_priv != stats->file_priv) continue; if (obj->ring) /* XXX per-vma statistic */ diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index dc43d0263a01..90665872734d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -184,26 +184,6 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) return obj; } -static struct i915_hw_ppgtt * -create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx) -{ - struct i915_hw_ppgtt *ppgtt; - int ret; - - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return ERR_PTR(-ENOMEM); - - ret = i915_ppgtt_init(dev, ppgtt); - if (ret) { - kfree(ppgtt); - return ERR_PTR(ret); - } - - ppgtt->ctx = ctx; - return ppgtt; -} - static struct intel_context * __create_hw_context(struct drm_device *dev, struct drm_i915_file_private *file_priv) @@ -290,7 +270,7 @@ i915_gem_create_context(struct drm_device *dev, } if (create_vm) { - struct i915_hw_ppgtt *ppgtt = create_vm_for_ctx(dev, ctx); + struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); if (IS_ERR_OR_NULL(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6ffa12b72538..a5715faba65f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1211,6 +1211,27 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) return ret; } +struct i915_hw_ppgtt * +i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) +{ + struct i915_hw_ppgtt *ppgtt; + int ret; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return ERR_PTR(-ENOMEM); + + ret = i915_ppgtt_init(dev, ppgtt); + if (ret) { + kfree(ppgtt); + return ERR_PTR(ret); + } + + ppgtt->file_priv = fpriv; + + return ppgtt; +} + void i915_ppgtt_release(struct kref *kref) { struct i915_hw_ppgtt *ppgtt = diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index c6beb528f955..90ff45246b62 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -34,6 +34,8 @@ #ifndef __I915_GEM_GTT_H__ #define __I915_GEM_GTT_H__ +struct drm_i915_file_private; + typedef uint32_t gen6_gtt_pte_t; typedef uint64_t gen8_gtt_pte_t; typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; @@ -258,7 +260,7 @@ struct i915_hw_ppgtt { dma_addr_t *gen8_pt_dma_addr[4]; }; - struct intel_context *ctx; + struct drm_i915_file_private *file_priv; int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, @@ -275,6 +277,8 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); void i915_ppgtt_release(struct kref *kref); +struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, + struct drm_i915_file_private *fpriv); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { if (ppgtt) From 841cd7737557785c0f215b0984c06aaaaa882302 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:48 +0200 Subject: [PATCH 139/640] drm/i915: Only refcount ppgtt if it actually is one This essentially unbreaks non-ppgtt operation where we'd scribble over random memory. While at it give the vm_to_ppgtt function a proper prefix and make it a bit more paranoid. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 10 +++++++++- drivers/gpu/drm/i915/i915_gem.c | 3 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 ++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 101fc637eb46..454badf31dfd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2512,6 +2512,15 @@ static inline bool i915_is_ggtt(struct i915_address_space *vm) return vm == ggtt; } +static inline struct i915_hw_ppgtt * +i915_vm_to_ppgtt(struct i915_address_space *vm) +{ + WARN_ON(i915_is_ggtt(vm)); + + return container_of(vm, struct i915_hw_ppgtt, base); +} + + static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) { return i915_gem_obj_bound(obj, obj_to_ggtt(obj)); @@ -2547,7 +2556,6 @@ void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); /* i915_gem_context.c */ #define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) -#define vm_to_ppgtt(vm) container_of(vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8061d45eaa80..e3e30cd474be 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4485,7 +4485,8 @@ void i915_gem_vma_destroy(struct i915_vma *vma) vm = vma->vm; - i915_ppgtt_put(vm_to_ppgtt(vm)); + if (!i915_is_ggtt(vm)) + i915_ppgtt_put(i915_vm_to_ppgtt(vm)); list_del(&vma->vma_link); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a5715faba65f..48d8f4a21c3f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2187,7 +2187,8 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, if (!vma) vma = __i915_gem_vma_create(obj, vm); - i915_ppgtt_get(vm_to_ppgtt(vm)); + if (!i915_is_ggtt(vm)) + i915_ppgtt_get(i915_vm_to_ppgtt(vm)); return vma; } From 5dc383b05a05d05e964172d882603cd171040c5f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:49 +0200 Subject: [PATCH 140/640] drm/i915: Add proper prefix to obj_to_ggtt Stuff in headers really aught to have this. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 11 ++++++----- drivers/gpu/drm/i915/i915_gem.c | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 454badf31dfd..2db22732c7ae 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2503,7 +2503,7 @@ static inline bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) { } /* Some GGTT VM helpers */ -#define obj_to_ggtt(obj) \ +#define i915_obj_to_ggtt(obj) \ (&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base) static inline bool i915_is_ggtt(struct i915_address_space *vm) { @@ -2523,19 +2523,19 @@ i915_vm_to_ppgtt(struct i915_address_space *vm) static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) { - return i915_gem_obj_bound(obj, obj_to_ggtt(obj)); + return i915_gem_obj_bound(obj, i915_obj_to_ggtt(obj)); } static inline unsigned long i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *obj) { - return i915_gem_obj_offset(obj, obj_to_ggtt(obj)); + return i915_gem_obj_offset(obj, i915_obj_to_ggtt(obj)); } static inline unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj) { - return i915_gem_obj_size(obj, obj_to_ggtt(obj)); + return i915_gem_obj_size(obj, i915_obj_to_ggtt(obj)); } static inline int __must_check @@ -2543,7 +2543,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj, uint32_t alignment, unsigned flags) { - return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, flags | PIN_GLOBAL); + return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), + alignment, flags | PIN_GLOBAL); } static inline int diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e3e30cd474be..c9d1396781e2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5252,7 +5252,7 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) struct i915_vma *vma; vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link); - if (vma->vm != obj_to_ggtt(obj)) + if (vma->vm != i915_obj_to_ggtt(obj)) return NULL; return vma; From 6c5566a82c6fb1da9e13a294f23d4cd85a08cb30 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:50 +0200 Subject: [PATCH 141/640] drm/i915: Allow i915_gem_setup_global_gtt to fail We already needs this just as a safety check in case the preallocation reservation dance fails. But we definitely need this to be able to move tha aliasing ppgtt setup back out of the context code to this place, where it belongs. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- drivers/gpu/drm/i915/i915_gem_gtt.c | 16 ++++++++++------ drivers/gpu/drm/i915/i915_gem_gtt.h | 4 ++-- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c9d1396781e2..c8404a439502 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4754,7 +4754,12 @@ int i915_gem_init(struct drm_device *dev) dev_priv->gt.stop_ring = intel_logical_ring_stop; } - i915_gem_init_userptr(dev); + ret = i915_gem_init_userptr(dev); + if (ret) { + mutex_unlock(&dev->struct_mutex); + return ret; + } + i915_gem_init_global_gtt(dev); ret = i915_gem_context_init(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 48d8f4a21c3f..d228f839ca4f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1698,10 +1698,10 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, } } -void i915_gem_setup_global_gtt(struct drm_device *dev, - unsigned long start, - unsigned long mappable_end, - unsigned long end) +int i915_gem_setup_global_gtt(struct drm_device *dev, + unsigned long start, + unsigned long mappable_end, + unsigned long end) { /* Let GEM Manage all of the aperture. * @@ -1734,8 +1734,10 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, WARN_ON(i915_gem_obj_ggtt_bound(obj)); ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); - if (ret) - DRM_DEBUG_KMS("Reservation failed\n"); + if (ret) { + DRM_DEBUG_KMS("Reservation failed: %i\n", ret); + return ret; + } obj->has_global_gtt_mapping = 1; } @@ -1752,6 +1754,8 @@ void i915_gem_setup_global_gtt(struct drm_device *dev, /* And finally clear the reserved guard page */ ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); + + return 0; } void i915_gem_init_global_gtt(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 90ff45246b62..0eb0dddff76b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -271,8 +271,8 @@ struct i915_hw_ppgtt { int i915_gem_gtt_init(struct drm_device *dev); void i915_gem_init_global_gtt(struct drm_device *dev); -void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, - unsigned long mappable_end, unsigned long end); +int i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, + unsigned long mappable_end, unsigned long end); int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); From 896ab1a5d54269b463a24194c2e4a369103b46d8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:51 +0200 Subject: [PATCH 142/640] drm/i915: Fix up checks for aliasing ppgtt A subsequent patch will no longer initialize the aliasing ppgtt if we have full ppgtt enabled, since we simply don't need that any more. Unfortunately a few places check for the aliasing ppgtt instead of checking for ppgtt in general. Fix them up. One special case are the gtt offset and size macros, which have some code to remap the aliasing ppgtt to the global gtt. The aliasing ppgtt is _not_ a logical address space, so passing that in as the vm is plain and simple a bug. So just WARN about it and carry on - we have a gracefully fall-through anyway if we can't find the vma. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 +--- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 8 ++------ drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +--- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index dea99d92fb4a..c45856bcc8b9 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -842,8 +842,6 @@ finish: */ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - if (!ring->needs_cmd_parser) return false; @@ -852,7 +850,7 @@ bool i915_needs_cmd_parser(struct intel_engine_cs *ring) * disabled. That will cause all of the parser's PPGTT checks to * fail. For now, disable parsing when PPGTT is off. */ - if (!dev_priv->mm.aliasing_ppgtt) + if (USES_PPGTT(ring->dev)) return false; return (i915.enable_cmd_parser == 1); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 1763fbf34e1d..895f9f2f35ea 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -999,7 +999,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = HAS_WT(dev); break; case I915_PARAM_HAS_ALIASING_PPGTT: - value = dev_priv->mm.aliasing_ppgtt || USES_FULL_PPGTT(dev); + value = USES_PPGTT(dev); break; case I915_PARAM_HAS_WAIT_TIMEOUT: value = 1; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c8404a439502..6a7795097017 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5106,9 +5106,7 @@ unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o, struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; - if (!dev_priv->mm.aliasing_ppgtt || - vm == &dev_priv->mm.aliasing_ppgtt->base) - vm = &dev_priv->gtt.base; + WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); list_for_each_entry(vma, &o->vma_list, vma_link) { if (vma->vm == vm) @@ -5149,9 +5147,7 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o, struct drm_i915_private *dev_priv = o->base.dev->dev_private; struct i915_vma *vma; - if (!dev_priv->mm.aliasing_ppgtt || - vm == &dev_priv->mm.aliasing_ppgtt->base) - vm = &dev_priv->gtt.base; + WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); BUG_ON(list_empty(&o->vma_list)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 4236014c1cda..13543f8528c2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2006,9 +2006,7 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, u64 offset, u32 len, unsigned flags) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL && - !(flags & I915_DISPATCH_SECURE); + bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE); int ret; ret = intel_ring_begin(ring, 4); From 82460d97246a993aa49e88bf9b4154cce60f8da8 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 20:19:53 +0200 Subject: [PATCH 143/640] drm/i915: Rework ppgtt init to no require an aliasing ppgtt Currently we abuse the aliasing ppgtt to set up the ppgtt support in general. Which is a bit backwards since with full ppgtt we don't ever need the aliasing ppgtt. So untangle this and separate the ppgtt init from the aliasing ppgtt. While at it drag it out of the context enabling (which just does a switch to the default context). Note that we still have the differentiation between synchronous and asynchronous ppgtt setup, but that will soon vanish. So also correctly wire up the return value handling to be prepared for when ->switch_mm drops the synchronous parameter and could start to fail. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 8 +++ drivers/gpu/drm/i915/i915_gem_context.c | 7 --- drivers/gpu/drm/i915/i915_gem_gtt.c | 84 ++++++++++--------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 4 files changed, 42 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6a7795097017..6c2f0b886eb0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4719,6 +4719,14 @@ i915_gem_init_hw(struct drm_device *dev) if (ret && ret != -EIO) { DRM_ERROR("Context enable failed %d\n", ret); i915_gem_cleanup_ringbuffer(dev); + + return ret; + } + + ret = i915_ppgtt_init_hw(dev); + if (ret && ret != -EIO) { + DRM_ERROR("PPGTT enable failed %d\n", ret); + i915_gem_cleanup_ringbuffer(dev); } return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 90665872734d..eece059a8447 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -436,13 +436,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv) struct intel_engine_cs *ring; int ret, i; - /* This is the only place the aliasing PPGTT gets enabled, which means - * it has to happen before we bail on reset */ - if (dev_priv->mm.aliasing_ppgtt) { - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - ppgtt->enable(ppgtt); - } - /* FIXME: We should make this work, even in reset */ if (i915_reset_in_progress(&dev_priv->gpu_error)) return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d228f839ca4f..b7dcf72126f9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -67,7 +67,6 @@ static void ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); static void ppgtt_unbind_vma(struct i915_vma *vma); -static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt); static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, enum i915_cache_level level, @@ -604,7 +603,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) kunmap_atomic(pd_vaddr); } - ppgtt->enable = gen8_ppgtt_enable; ppgtt->switch_mm = gen8_mm_switch; ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; @@ -825,39 +823,20 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } -static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static void gen8_ppgtt_enable(struct drm_device *dev) { - struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - int j, ret; + int j; for_each_ring(ring, dev_priv, j) { I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - /* We promise to do a switch later with FULL PPGTT. If this is - * aliasing, this is the one and only switch we'll do */ - if (USES_FULL_PPGTT(dev)) - continue; - - ret = ppgtt->switch_mm(ppgtt, ring, true); - if (ret) - goto err_out; } - - return 0; - -err_out: - for_each_ring(ring, dev_priv, j) - I915_WRITE(RING_MODE_GEN7(ring), - _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE)); - return ret; } -static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static void gen7_ppgtt_enable(struct drm_device *dev) { - struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; uint32_t ecochk, ecobits; @@ -876,31 +855,16 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) I915_WRITE(GAM_ECOCHK, ecochk); for_each_ring(ring, dev_priv, i) { - int ret; /* GFX_MODE is per-ring on gen7+ */ I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - /* We promise to do a switch later with FULL PPGTT. If this is - * aliasing, this is the one and only switch we'll do */ - if (USES_FULL_PPGTT(dev)) - continue; - - ret = ppgtt->switch_mm(ppgtt, ring, true); - if (ret) - return ret; } - - return 0; } -static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) +static void gen6_ppgtt_enable(struct drm_device *dev) { - struct drm_device *dev = ppgtt->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_engine_cs *ring; uint32_t ecochk, gab_ctl, ecobits; - int i; ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | @@ -913,14 +877,6 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt) I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); - - for_each_ring(ring, dev_priv, i) { - int ret = ppgtt->switch_mm(ppgtt, ring, true); - if (ret) - return ret; - } - - return 0; } /* PPGTT support for Sandybdrige/Gen6 and later */ @@ -1140,13 +1096,10 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; if (IS_GEN6(dev)) { - ppgtt->enable = gen6_ppgtt_enable; ppgtt->switch_mm = gen6_mm_switch; } else if (IS_HASWELL(dev)) { - ppgtt->enable = gen7_ppgtt_enable; ppgtt->switch_mm = hsw_mm_switch; } else if (IS_GEN7(dev)) { - ppgtt->enable = gen7_ppgtt_enable; ppgtt->switch_mm = gen7_mm_switch; } else BUG(); @@ -1211,6 +1164,35 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) return ret; } +int i915_ppgtt_init_hw(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + int i, ret = 0; + + if (!USES_PPGTT(dev)) + return 0; + + if (IS_GEN6(dev)) + gen6_ppgtt_enable(dev); + else if (IS_GEN7(dev)) + gen7_ppgtt_enable(dev); + else if (INTEL_INFO(dev)->gen >= 8) + gen8_ppgtt_enable(dev); + else + WARN_ON(1); + + if (ppgtt) { + for_each_ring(ring, dev_priv, i) { + ret = ppgtt->switch_mm(ppgtt, ring, true); + if (ret != 0) + return ret; + } + } + + return ret; +} struct i915_hw_ppgtt * i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0eb0dddff76b..98fbb7309ea5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -276,6 +276,7 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); +int i915_ppgtt_init_hw(struct drm_device *dev); void i915_ppgtt_release(struct kref *kref); struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv); From fa76da3499f1789f0e37d3bbcdc320bdf47c89ca Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 20:19:54 +0200 Subject: [PATCH 144/640] drm/i915: Initialize the aliasing ppgtt as part of global gtt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stuffing this into the context setup code doesn't make a lot of sense. Also reusing the real ppgtt setup code makes even less sense since the aliasing ppgtt isn't a real address space. Leaving all that stuff unitialized will make sure that we catch any abusers promptly. This is also a prep work to clean up the context->ppgtt link. v2: Fix up the logic fail, I've fumbled it so badly to completely disable ppgtt on gen6. Spotted by Ville and Michel. Also move around the pde write into the gen6 init function, since otherwise it won't work at all. v3: Only initialize the aliasing ppgtt when we actually enable it. Cc: "Thierry, Michel" Cc: Ville Syrjälä Reviewed-by: Michel Thierry [danvet: Squash in fixup from Fengguang Wu.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 13 +------- drivers/gpu/drm/i915/i915_gem_gtt.c | 42 ++++++++++++++++++------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index eece059a8447..7093f5df9ee7 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -279,17 +279,6 @@ i915_gem_create_context(struct drm_device *dev, goto err_unpin; } else ctx->vm = &ppgtt->base; - - /* This case is reserved for the global default context and - * should only happen once. */ - if (is_global_default_ctx) { - if (WARN_ON(dev_priv->mm.aliasing_ppgtt)) { - ret = -EEXIST; - goto err_unpin; - } - - dev_priv->mm.aliasing_ppgtt = ppgtt; - } } else if (USES_PPGTT(dev)) { /* For platforms which only have aliasing PPGTT, we fake the * address space and refcounting. */ @@ -368,7 +357,7 @@ int i915_gem_context_init(struct drm_device *dev) } } - ctx = i915_gem_create_context(dev, NULL, USES_PPGTT(dev)); + ctx = i915_gem_create_context(dev, NULL, USES_FULL_PPGTT(dev)); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context (error %ld)\n", PTR_ERR(ctx)); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b7dcf72126f9..b033ff770ac4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1130,35 +1130,38 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); + gen6_write_pdes(ppgtt); + DRM_DEBUG("Adding PPGTT at offset %x\n", + ppgtt->pd_offset << 10); + return 0; } -int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = dev->dev_private; - int ret = 0; ppgtt->base.dev = dev; ppgtt->base.scratch = dev_priv->gtt.base.scratch; if (INTEL_INFO(dev)->gen < 8) - ret = gen6_ppgtt_init(ppgtt); + return gen6_ppgtt_init(ppgtt); else if (IS_GEN8(dev)) - ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); + return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); else BUG(); +} +int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int ret = 0; - if (!ret) { - struct drm_i915_private *dev_priv = dev->dev_private; + ret = __hw_ppgtt_init(dev, ppgtt); + if (ret == 0) { kref_init(&ppgtt->ref); drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); i915_init_vm(dev_priv, &ppgtt->base); - if (INTEL_INFO(dev)->gen < 8) { - gen6_write_pdes(ppgtt); - DRM_DEBUG("Adding PPGTT at offset %x\n", - ppgtt->pd_offset << 10); - } } return ret; @@ -1699,6 +1702,7 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, struct drm_mm_node *entry; struct drm_i915_gem_object *obj; unsigned long hole_start, hole_end; + int ret; BUG_ON(mappable_end > end); @@ -1710,7 +1714,7 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, /* Mark any preallocated objects as occupied */ list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); - int ret; + DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n", i915_gem_obj_ggtt_offset(obj), obj->base.size); @@ -1737,6 +1741,20 @@ int i915_gem_setup_global_gtt(struct drm_device *dev, /* And finally clear the reserved guard page */ ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); + if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { + struct i915_hw_ppgtt *ppgtt; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return -ENOMEM; + + ret = __hw_ppgtt_init(dev, ppgtt); + if (ret != 0) + return ret; + + dev_priv->mm.aliasing_ppgtt = ppgtt; + } + return 0; } From ae6c4806927b8b0781ecc187aa16b10c820fc430 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:53 +0200 Subject: [PATCH 145/640] drm/i915: Only track real ppgtt for a context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's a bit a confusion since we track the global gtt, the aliasing and real ppgtt in the ctx->vm pointer. And not all callers really bother to check for the different cases and just presume that it points to a real ppgtt. Now looking closely we don't actually need ->vm to always point at an address space - the only place that cares actually has fixup code already to decide whether to look at the per-proces or the global address space. So switch to just tracking the ppgtt directly and ditch all the extraneous code. v2: Fixup the ppgtt debugfs file to not oops on a NULL ctx->ppgtt. Also drop the early exit - without aliasing ppgtt we want to dump all the ppgtts of the contexts if we have full ppgtt. v3: Actually git add the compile fix. Reviewed-by: Michel Thierry Cc: "Thierry, Michel" Cc: Ville Syrjälä OTC-Jira: VIZ-3724 [danvet: Resolve conflicts with execlist patches while applying.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 11 ++++++-- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_gem_context.c | 33 +++++++--------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 10 +++++-- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 6 files changed, 30 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1c3a9943a742..d42db6bc34e0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1821,7 +1821,13 @@ static int per_file_ctx(int id, void *ptr, void *data) { struct intel_context *ctx = ptr; struct seq_file *m = data; - struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; + + if (!ppgtt) { + seq_printf(m, " no ppgtt for context %d\n", + ctx->user_handle); + return 0; + } if (i915_gem_context_is_default(ctx)) seq_puts(m, " default context:\n"); @@ -1881,8 +1887,7 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset); ppgtt->debug_dump(ppgtt, m); - } else - return; + } list_for_each_entry_reverse(file, &dev->filelist, lhead) { struct drm_i915_file_private *file_priv = file->driver_priv; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2db22732c7ae..eb99a109c0bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -624,7 +624,7 @@ struct intel_context { uint8_t remap_slice; struct drm_i915_file_private *file_priv; struct i915_ctx_hang_stats hang_stats; - struct i915_address_space *vm; + struct i915_hw_ppgtt *ppgtt; /* Legacy ring buffer submission */ struct { @@ -2556,7 +2556,6 @@ i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj) void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj); /* i915_gem_context.c */ -#define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base) int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 7093f5df9ee7..206bf2d6c554 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -135,19 +135,13 @@ static int get_context_size(struct drm_device *dev) void i915_gem_context_free(struct kref *ctx_ref) { struct intel_context *ctx = container_of(ctx_ref, - typeof(*ctx), ref); - struct i915_hw_ppgtt *ppgtt = NULL; + typeof(*ctx), ref); - if (i915.enable_execlists) { - ppgtt = ctx_to_ppgtt(ctx); + if (i915.enable_execlists) intel_lr_context_free(ctx); - } else if (ctx->legacy_hw_ctx.rcs_state) { - /* We refcount even the aliasing PPGTT to keep the code symmetric */ - if (USES_PPGTT(ctx->legacy_hw_ctx.rcs_state->base.dev)) - ppgtt = ctx_to_ppgtt(ctx); - } - i915_ppgtt_put(ppgtt); + i915_ppgtt_put(ctx->ppgtt); + if (ctx->legacy_hw_ctx.rcs_state) drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base); list_del(&ctx->link); @@ -243,7 +237,6 @@ i915_gem_create_context(struct drm_device *dev, bool create_vm) { const bool is_global_default_ctx = file_priv == NULL; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_context *ctx; int ret = 0; @@ -277,15 +270,10 @@ i915_gem_create_context(struct drm_device *dev, PTR_ERR(ppgtt)); ret = PTR_ERR(ppgtt); goto err_unpin; - } else - ctx->vm = &ppgtt->base; - } else if (USES_PPGTT(dev)) { - /* For platforms which only have aliasing PPGTT, we fake the - * address space and refcounting. */ - ctx->vm = &dev_priv->mm.aliasing_ppgtt->base; - i915_ppgtt_get(dev_priv->mm.aliasing_ppgtt); - } else - ctx->vm = &dev_priv->gtt.base; + } + + ctx->ppgtt = ppgtt; + } return ctx; @@ -543,7 +531,6 @@ static int do_switch(struct intel_engine_cs *ring, { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct intel_context *from = ring->last_context; - struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to); u32 hw_flags = 0; bool uninitialized = false; int ret, i; @@ -571,8 +558,8 @@ static int do_switch(struct intel_engine_cs *ring, */ from = ring->last_context; - if (USES_FULL_PPGTT(ring->dev)) { - ret = ppgtt->switch_mm(ppgtt, ring, false); + if (to->ppgtt) { + ret = to->ppgtt->switch_mm(to->ppgtt, ring, false); if (ret) goto unpin_out; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7e04a4825ed0..1a0611bb576b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1315,8 +1315,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, i915_gem_context_reference(ctx); - vm = ctx->vm; - if (!USES_FULL_PPGTT(dev)) + if (ctx->ppgtt) + vm = &ctx->ppgtt->base; + else vm = &dev_priv->gtt.base; eb = eb_create(args); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index eab41f9390f8..fc11ac6b0373 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -967,6 +967,12 @@ static void i915_gem_record_rings(struct drm_device *dev, request = i915_gem_find_active_request(ring); if (request) { + struct i915_address_space *vm; + + vm = request->ctx && request->ctx->ppgtt ? + &request->ctx->ppgtt->base : + &dev_priv->gtt.base; + /* We need to copy these to an anonymous buffer * as the simplest method to avoid being overwritten * by userspace. @@ -974,9 +980,7 @@ static void i915_gem_record_rings(struct drm_device *dev, error->ring[i].batchbuffer = i915_error_object_create(dev_priv, request->batch_obj, - request->ctx ? - request->ctx->vm : - &dev_priv->gtt.base); + vm); if (HAS_BROKEN_CS_TLB(dev_priv->dev) && ring->scratch.obj) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6f1b64e01a05..6b5f416b5c0d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -904,7 +904,7 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf) { struct drm_i915_gem_object *ring_obj = ringbuf->obj; - struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx); + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; struct page *page; uint32_t *reg_state; int ret; From d624d86e1e3b69cadb2dad42588e71e9a3b6d70a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:54 +0200 Subject: [PATCH 146/640] drm/i915: Drop create_vm argument to i915_gem_create_context Now that all the flow is streamlined the rule is simple: We create a new ppgtt for a new context when we have full ppgtt enabled. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 206bf2d6c554..9683e62ec61a 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -233,8 +233,7 @@ err_out: */ static struct intel_context * i915_gem_create_context(struct drm_device *dev, - struct drm_i915_file_private *file_priv, - bool create_vm) + struct drm_i915_file_private *file_priv) { const bool is_global_default_ctx = file_priv == NULL; struct intel_context *ctx; @@ -262,7 +261,7 @@ i915_gem_create_context(struct drm_device *dev, } } - if (create_vm) { + if (USES_FULL_PPGTT(dev)) { struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv); if (IS_ERR_OR_NULL(ppgtt)) { @@ -345,7 +344,7 @@ int i915_gem_context_init(struct drm_device *dev) } } - ctx = i915_gem_create_context(dev, NULL, USES_FULL_PPGTT(dev)); + ctx = i915_gem_create_context(dev, NULL); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context (error %ld)\n", PTR_ERR(ctx)); @@ -444,7 +443,7 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file) idr_init(&file_priv->context_idr); mutex_lock(&dev->struct_mutex); - ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); + ctx = i915_gem_create_context(dev, file_priv); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) { @@ -702,7 +701,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev)); + ctx = i915_gem_create_context(dev, file_priv); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); From 19dd120ceee085dbac70b1b01bd09d599cf87bd0 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:55 +0200 Subject: [PATCH 147/640] drm/i915: Extract common cleanup into i915_ppgtt_release Address space cleanup isn't really a job for the low-level cleanup callbacks. Without this change we can't reuse the low-level cleanup callback for the aliasing ppgtt cleanup. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b033ff770ac4..c1e2d9a9f9e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -391,9 +391,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - list_del(&vm->global_link); - drm_mm_takedown(&vm->mm); - gen8_ppgtt_unmap_pages(ppgtt); gen8_ppgtt_free(ppgtt); } @@ -974,8 +971,6 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - list_del(&vm->global_link); - drm_mm_takedown(&ppgtt->base.mm); drm_mm_remove_node(&ppgtt->node); gen6_ppgtt_unmap_pages(ppgtt); @@ -1226,6 +1221,9 @@ void i915_ppgtt_release(struct kref *kref) WARN_ON(!list_empty(&ppgtt->base.active_list)); WARN_ON(!list_empty(&ppgtt->base.inactive_list)); + list_del(&ppgtt->base.global_link); + drm_mm_takedown(&ppgtt->base.mm); + ppgtt->base.cleanup(&ppgtt->base); kfree(ppgtt); } From 90d0a0e8d0e64c92c4a6147f3c7cdc7c544d6b1a Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:56 +0200 Subject: [PATCH 148/640] drm/i915: Extract commmon global gtt cleanup code We want to move the aliasing ppgtt cleanup back into the global gtt cleanup code for symmetry, but first we need to create such a place. Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 20 ++++++++++++-------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 895f9f2f35ea..c9af48503f76 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1821,7 +1821,7 @@ out_mtrrfree: arch_phys_wc_del(dev_priv->gtt.mtrr); io_mapping_free(dev_priv->gtt.mappable); out_gtt: - dev_priv->gtt.base.cleanup(&dev_priv->gtt.base); + i915_global_gtt_cleanup(dev); out_regs: intel_uncore_fini(dev); pci_iounmap(dev->pdev, dev_priv->regs); @@ -1920,7 +1920,7 @@ int i915_driver_unload(struct drm_device *dev) destroy_workqueue(dev_priv->wq); pm_qos_remove_request(&dev_priv->pm_qos); - dev_priv->gtt.base.cleanup(&dev_priv->gtt.base); + i915_global_gtt_cleanup(dev); intel_uncore_fini(dev); if (dev_priv->regs != NULL) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c1e2d9a9f9e2..8d94e3582045 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1767,6 +1767,18 @@ void i915_gem_init_global_gtt(struct drm_device *dev) i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); } +void i915_global_gtt_cleanup(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_address_space *vm = &dev_priv->gtt.base; + + if (drm_mm_initialized(&vm->mm)) { + drm_mm_takedown(&vm->mm); + list_del(&vm->global_link); + } + + vm->cleanup(vm); +} static int setup_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2035,10 +2047,6 @@ static void gen6_gmch_remove(struct i915_address_space *vm) struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); - if (drm_mm_initialized(&vm->mm)) { - drm_mm_takedown(&vm->mm); - list_del(&vm->global_link); - } iounmap(gtt->gsm); teardown_scratch_page(vm->dev); } @@ -2071,10 +2079,6 @@ static int i915_gmch_probe(struct drm_device *dev, static void i915_gmch_remove(struct i915_address_space *vm) { - if (drm_mm_initialized(&vm->mm)) { - drm_mm_takedown(&vm->mm); - list_del(&vm->global_link); - } intel_gmch_remove(); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 98fbb7309ea5..6280648d4805 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -273,6 +273,7 @@ int i915_gem_gtt_init(struct drm_device *dev); void i915_gem_init_global_gtt(struct drm_device *dev); int i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); +void i915_global_gtt_cleanup(struct drm_device *dev); int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); From 70e32544aa4027b4c27226da32eb3866e7bbbcdc Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 6 Aug 2014 15:04:57 +0200 Subject: [PATCH 149/640] drm/i915: Cleanup aliasging ppgtt alongside the global gtt Also remove related WARN_ONs which seem to have been hit since a rather long time. But apperently no one noticed since our module reload is already WARNING-infested :( Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 4 ---- drivers/gpu/drm/i915/i915_gem_gtt.c | 7 +++++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index c9af48503f76..04dd6112865e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1388,7 +1388,6 @@ cleanup_gem: i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); mutex_unlock(&dev->struct_mutex); - WARN_ON(dev_priv->mm.aliasing_ppgtt); cleanup_irq: drm_irq_uninstall(dev); cleanup_gem_stolen: @@ -1901,7 +1900,6 @@ int i915_driver_unload(struct drm_device *dev) mutex_lock(&dev->struct_mutex); i915_gem_cleanup_ringbuffer(dev); i915_gem_context_fini(dev); - WARN_ON(dev_priv->mm.aliasing_ppgtt); mutex_unlock(&dev->struct_mutex); i915_gem_cleanup_stolen(dev); @@ -1909,8 +1907,6 @@ int i915_driver_unload(struct drm_device *dev) i915_free_hws(dev); } - WARN_ON(!list_empty(&dev_priv->vm_list)); - drm_vblank_cleanup(dev); intel_teardown_gmbus(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8d94e3582045..d97b280861ee 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1772,6 +1772,12 @@ void i915_global_gtt_cleanup(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct i915_address_space *vm = &dev_priv->gtt.base; + if (dev_priv->mm.aliasing_ppgtt) { + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + + ppgtt->base.cleanup(&ppgtt->base); + } + if (drm_mm_initialized(&vm->mm)) { drm_mm_takedown(&vm->mm); list_del(&vm->global_link); @@ -1779,6 +1785,7 @@ void i915_global_gtt_cleanup(struct drm_device *dev) vm->cleanup(vm); } + static int setup_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; From a08a42ad441e113f87308e0844049cb881f1ac1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Aug 2014 19:39:52 +0300 Subject: [PATCH 150/640] drm/i915: Don't try to enable cursor from setplane when crtc is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure the cursor gets fully clipped when enabling it on a disabled crtc via setplane. This will prevent the lower level code from attempting to enable the cursor in hardware. Cc: Paulo Zanoni Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1bd1aa21a8e9..a93d5ffd3863 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11743,8 +11743,8 @@ intel_cursor_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, }; const struct drm_rect clip = { /* integer pixels */ - .x2 = intel_crtc->config.pipe_src_w, - .y2 = intel_crtc->config.pipe_src_h, + .x2 = intel_crtc->active ? intel_crtc->config.pipe_src_w : 0, + .y2 = intel_crtc->active ? intel_crtc->config.pipe_src_h : 0, }; bool visible; int ret; From d7ce484eeec43079ad842f1d351f53998ed6bb30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Aug 2014 19:39:53 +0300 Subject: [PATCH 151/640] drm/i915: Move CURSIZE setup to i845_update_cursor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CURSIZE register exists on 845/865 only, so move it to i845_update_cursor(). Changes to cursor size must be done only when the cursor is disabled, so do the write just before enabling the cursor. Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a93d5ffd3863..cd3dc3b72798 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8070,6 +8070,7 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base) CURSOR_GAMMA_ENABLE | CURSOR_FORMAT_ARGB); if (intel_crtc->cursor_cntl != cntl) { + I915_WRITE(CURSIZE, (64 << 12) | 64); I915_WRITE(_CURACNTR, cntl); POSTING_READ(_CURACNTR); intel_crtc->cursor_cntl = cntl; @@ -8219,7 +8220,6 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, uint32_t width, uint32_t height) { struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; unsigned old_width; @@ -8292,9 +8292,6 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, addr = obj->phys_handle->busaddr; } - if (IS_GEN2(dev)) - I915_WRITE(CURSIZE, (height << 12) | width); - finish: if (intel_crtc->cursor_bo) { if (!INTEL_INFO(dev)->cursor_needs_physical) From 8ac5466926daef2406f7b25e9a272567cb81adb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 12 Aug 2014 19:39:54 +0300 Subject: [PATCH 152/640] drm/i915: Unify ivb_update_cursor() and i9xx_update_cursor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since commit 5efb3e2838536832c9b6872512e6b6daf592cee9 Author: Ville Syrjälä Date: Wed Apr 9 13:28:53 2014 +0300 drm/i915/chv: Add cursor pipe offsets the only difference between i9xx_update_cursor() and ivb_update_cursor() was the hsw+ pipe csc handling. Let's unify them and we can rid outselves of some duplicated code. Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 41 +--------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index cd3dc3b72798..b3fb127131f0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8104,43 +8104,6 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base) } cntl |= pipe << 28; /* Connect to correct pipe */ } - if (intel_crtc->cursor_cntl != cntl) { - I915_WRITE(CURCNTR(pipe), cntl); - POSTING_READ(CURCNTR(pipe)); - intel_crtc->cursor_cntl = cntl; - } - - /* and commit changes on next vblank */ - I915_WRITE(CURBASE(pipe), base); - POSTING_READ(CURBASE(pipe)); -} - -static void ivb_update_cursor(struct drm_crtc *crtc, u32 base) -{ - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - uint32_t cntl; - - cntl = 0; - if (base) { - cntl = MCURSOR_GAMMA_ENABLE; - switch (intel_crtc->cursor_width) { - case 64: - cntl |= CURSOR_MODE_64_ARGB_AX; - break; - case 128: - cntl |= CURSOR_MODE_128_ARGB_AX; - break; - case 256: - cntl |= CURSOR_MODE_256_ARGB_AX; - break; - default: - WARN_ON(1); - return; - } - } if (IS_HASWELL(dev) || IS_BROADWELL(dev)) cntl |= CURSOR_PIPE_CSC_ENABLE; @@ -8199,9 +8162,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, I915_WRITE(CURPOS(pipe), pos); - if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev) || IS_BROADWELL(dev)) - ivb_update_cursor(crtc, base); - else if (IS_845G(dev) || IS_I865G(dev)) + if (IS_845G(dev) || IS_I865G(dev)) i845_update_cursor(crtc, base); else i9xx_update_cursor(crtc, base); From dc41c154ffc30afb7ee7e891140dead26fce5c39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 13 Aug 2014 11:57:05 +0300 Subject: [PATCH 153/640] drm/i915: Add support for variable cursor size on 845/865 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 845/865 support different cursor sizes as well, albeit a bit differently than later platforms. Add the necessary code to make them work. Untested due to lack of hardware. v2: Warn but accept invalid stride (Chris) Rewrite the cursor size checks for other platforms (Chris) v3: More polish and magic to the cursor size checks (Chris) v4: Moar polish and a comment (Chris) Reviewed-by: Chris Wilson Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 3 +- drivers/gpu/drm/i915/intel_display.c | 113 +++++++++++++++++++++------ drivers/gpu/drm/i915/intel_drv.h | 1 + 3 files changed, 92 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f79c20d49d99..203062e93452 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4128,7 +4128,8 @@ enum punit_power_well { /* Old style CUR*CNTR flags (desktop 8xx) */ #define CURSOR_ENABLE 0x80000000 #define CURSOR_GAMMA_ENABLE 0x40000000 -#define CURSOR_STRIDE_MASK 0x30000000 +#define CURSOR_STRIDE_SHIFT 28 +#define CURSOR_STRIDE(x) ((ffs(x)-9) << CURSOR_STRIDE_SHIFT) /* 256,512,1k,2k */ #define CURSOR_PIPE_CSC_ENABLE (1<<24) #define CURSOR_FORMAT_SHIFT 24 #define CURSOR_FORMAT_MASK (0x07 << CURSOR_FORMAT_SHIFT) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b3fb127131f0..fd15601f6360 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8047,30 +8047,55 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base) struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t cntl; + uint32_t cntl = 0, size = 0; - if (base != intel_crtc->cursor_base) { - /* On these chipsets we can only modify the base whilst - * the cursor is disabled. - */ - if (intel_crtc->cursor_cntl) { - I915_WRITE(_CURACNTR, 0); - POSTING_READ(_CURACNTR); - intel_crtc->cursor_cntl = 0; + if (base) { + unsigned int width = intel_crtc->cursor_width; + unsigned int height = intel_crtc->cursor_height; + unsigned int stride = roundup_pow_of_two(width) * 4; + + switch (stride) { + default: + WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n", + width, stride); + stride = 256; + /* fallthrough */ + case 256: + case 512: + case 1024: + case 2048: + break; } - I915_WRITE(_CURABASE, base); - POSTING_READ(_CURABASE); + cntl |= CURSOR_ENABLE | + CURSOR_GAMMA_ENABLE | + CURSOR_FORMAT_ARGB | + CURSOR_STRIDE(stride); + + size = (height << 12) | width; + } + + if (intel_crtc->cursor_cntl != 0 && + (intel_crtc->cursor_base != base || + intel_crtc->cursor_size != size || + intel_crtc->cursor_cntl != cntl)) { + /* On these chipsets we can only modify the base/size/stride + * whilst the cursor is disabled. + */ + I915_WRITE(_CURACNTR, 0); + POSTING_READ(_CURACNTR); + intel_crtc->cursor_cntl = 0; + } + + if (intel_crtc->cursor_base != base) + I915_WRITE(_CURABASE, base); + + if (intel_crtc->cursor_size != size) { + I915_WRITE(CURSIZE, size); + intel_crtc->cursor_size = size; } - /* XXX width must be 64, stride 256 => 0x00 << 28 */ - cntl = 0; - if (base) - cntl = (CURSOR_ENABLE | - CURSOR_GAMMA_ENABLE | - CURSOR_FORMAT_ARGB); if (intel_crtc->cursor_cntl != cntl) { - I915_WRITE(CURSIZE, (64 << 12) | 64); I915_WRITE(_CURACNTR, cntl); POSTING_READ(_CURACNTR); intel_crtc->cursor_cntl = cntl; @@ -8169,6 +8194,43 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, intel_crtc->cursor_base = base; } +static bool cursor_size_ok(struct drm_device *dev, + uint32_t width, uint32_t height) +{ + if (width == 0 || height == 0) + return false; + + /* + * 845g/865g are special in that they are only limited by + * the width of their cursors, the height is arbitrary up to + * the precision of the register. Everything else requires + * square cursors, limited to a few power-of-two sizes. + */ + if (IS_845G(dev) || IS_I865G(dev)) { + if ((width & 63) != 0) + return false; + + if (width > (IS_845G(dev) ? 64 : 512)) + return false; + + if (height > 1023) + return false; + } else { + switch (width | height) { + case 256: + case 128: + if (IS_GEN2(dev)) + return false; + case 64: + break; + default: + return false; + } + } + + return true; +} + /* * intel_crtc_cursor_set_obj - Set cursor to specified GEM object * @@ -8183,7 +8245,7 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; - unsigned old_width; + unsigned old_width, stride; uint32_t addr; int ret; @@ -8197,14 +8259,13 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc, } /* Check for which cursor types we support */ - if (!((width == 64 && height == 64) || - (width == 128 && height == 128 && !IS_GEN2(dev)) || - (width == 256 && height == 256 && !IS_GEN2(dev)))) { + if (!cursor_size_ok(dev, width, height)) { DRM_DEBUG("Cursor dimension not supported\n"); return -EINVAL; } - if (obj->base.size < width * height * 4) { + stride = roundup_pow_of_two(width) * 4; + if (obj->base.size < stride * height) { DRM_DEBUG_KMS("buffer is too small\n"); ret = -ENOMEM; goto fail; @@ -11797,6 +11858,7 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_crtc->cursor_base = ~0; intel_crtc->cursor_cntl = ~0; + intel_crtc->cursor_size = ~0; BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); @@ -12585,7 +12647,10 @@ void intel_modeset_init(struct drm_device *dev) dev->mode_config.max_height = 8192; } - if (IS_GEN2(dev)) { + if (IS_845G(dev) || IS_I865G(dev)) { + dev->mode_config.cursor_width = IS_845G(dev) ? 64 : 512; + dev->mode_config.cursor_height = 1023; + } else if (IS_GEN2(dev)) { dev->mode_config.cursor_width = GEN2_CURSOR_WIDTH; dev->mode_config.cursor_height = GEN2_CURSOR_HEIGHT; } else { diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 666ca8a044ea..579b1d40f934 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -405,6 +405,7 @@ struct intel_crtc { uint32_t cursor_addr; int16_t cursor_width, cursor_height; uint32_t cursor_cntl; + uint32_t cursor_size; uint32_t cursor_base; struct intel_plane_config plane_config; From 7312e2ddec1ffe4511a85a2814df44e79ded3c1d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Aug 2014 12:14:12 +0100 Subject: [PATCH 154/640] drm/i915: Replace __I915__ with typesafe variant Ville pointed out the GCCism __builtin_types_compatible_p() that we could use to replace our heavily casted presumption __I915__ macro that was based on comparing struct sizes. Signed-off-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 --- drivers/gpu/drm/i915/i915_drv.h | 12 ++++++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 04dd6112865e..db56b26a08c9 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1595,9 +1595,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!drm_core_check_feature(dev, DRIVER_MODESET) && !dev->agp) return -EINVAL; - /* For the ugly agnostic INTEL_INFO macro */ - BUILD_BUG_ON(sizeof(*dev_priv) == sizeof(*dev)); - dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); if (dev_priv == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb99a109c0bc..2af0071efb38 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2004,8 +2004,16 @@ struct drm_i915_cmd_table { }; /* Note that the (struct drm_i915_private *) cast is just to shut up gcc. */ -#define __I915__(p) ((sizeof(*(p)) == sizeof(struct drm_i915_private)) ? \ - (struct drm_i915_private *)(p) : to_i915(p)) +#define __I915__(p) ({ \ + struct drm_i915_private *__p; \ + if (__builtin_types_compatible_p(typeof(*p), struct drm_i915_private)) \ + __p = (struct drm_i915_private *)p; \ + else if (__builtin_types_compatible_p(typeof(*p), struct drm_device)) \ + __p = to_i915((struct drm_device *)p); \ + else \ + BUILD_BUG(); \ + __p; \ +}) #define INTEL_INFO(p) (&__I915__(p)->info) #define INTEL_DEVID(p) (INTEL_INFO(p)->device_id) From 82e3b8c130f046b7dd1e7898c10e40edb52fee6d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 Aug 2014 13:09:46 +0100 Subject: [PATCH 155/640] drm/i915: Localise the fbdev console lock frobbing Rather than take and release the console_lock() around a non-existent DRM_I915_FBDEV, move the lock acquisation into the callee where it will be compiled out by the config option entirely. This includes moving the deferred fb_set_suspend() dance and encapsulating it entirely within intel_fbdev.c. v2: Use an integral work item so that we can explicitly flush the work upon suspend/unload. Signed-off-by: Chris Wilson Cc: Daniel Vetter [danvet: Add the flush_work in fbdev_fini per the mailing list discussion. And s/BUG_ON/WARN_ON/ because.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 -- drivers/gpu/drm/i915/i915_drv.c | 28 ++----------------- drivers/gpu/drm/i915/i915_drv.h | 9 +----- drivers/gpu/drm/i915/intel_drv.h | 4 +-- drivers/gpu/drm/i915/intel_fbdev.c | 44 +++++++++++++++++++++++++++++- 5 files changed, 48 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index db56b26a08c9..3f676f904f7e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1350,8 +1350,6 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_irq; - INIT_WORK(&dev_priv->console_resume_work, intel_console_resume); - intel_modeset_gem_init(dev); /* Always safe in the mode setting case. */ @@ -1864,7 +1862,6 @@ int i915_driver_unload(struct drm_device *dev) if (drm_core_check_feature(dev, DRIVER_MODESET)) { intel_fbdev_fini(dev); intel_modeset_cleanup(dev); - cancel_work_sync(&dev_priv->console_resume_work); /* * free the memory space allocated for the child device diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ec96f9a9724c..01de97776d81 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -558,9 +558,7 @@ static int i915_drm_freeze(struct drm_device *dev) intel_uncore_forcewake_reset(dev, false); intel_opregion_fini(dev); - console_lock(); - intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED); - console_unlock(); + intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED, true); dev_priv->suspend_count++; @@ -599,18 +597,6 @@ int i915_suspend(struct drm_device *dev, pm_message_t state) return 0; } -void intel_console_resume(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, struct drm_i915_private, - console_resume_work); - struct drm_device *dev = dev_priv->dev; - - console_lock(); - intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING); - console_unlock(); -} - static int i915_drm_thaw_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -681,17 +667,7 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) intel_opregion_init(dev); - /* - * The console lock can be pretty contented on resume due - * to all the printk activity. Try to keep it out of the hot - * path of resume if possible. - */ - if (console_trylock()) { - intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING); - console_unlock(); - } else { - schedule_work(&dev_priv->console_resume_work); - } + intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING, false); mutex_lock(&dev_priv->modeset_restore_lock); dev_priv->modeset_restore = MODESET_DONE; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2af0071efb38..541fb6f295bb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1586,14 +1586,9 @@ struct drm_i915_private { #ifdef CONFIG_DRM_I915_FBDEV /* list of fbdev register on this device */ struct intel_fbdev *fbdev; + struct work_struct fbdev_suspend_work; #endif - /* - * The console may be contended at resume, but we don't - * want it to block on it. - */ - struct work_struct console_resume_work; - struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; @@ -2225,8 +2220,6 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); -extern void intel_console_resume(struct work_struct *work); - /* i915_irq.c */ void i915_queue_hangcheck(struct drm_device *dev); __printf(3, 4) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 579b1d40f934..4ab0d928b819 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -948,7 +948,7 @@ void intel_dvo_init(struct drm_device *dev); extern int intel_fbdev_init(struct drm_device *dev); extern void intel_fbdev_initial_config(struct drm_device *dev); extern void intel_fbdev_fini(struct drm_device *dev); -extern void intel_fbdev_set_suspend(struct drm_device *dev, int state); +extern void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous); extern void intel_fbdev_output_poll_changed(struct drm_device *dev); extern void intel_fbdev_restore_mode(struct drm_device *dev); #else @@ -965,7 +965,7 @@ static inline void intel_fbdev_fini(struct drm_device *dev) { } -static inline void intel_fbdev_set_suspend(struct drm_device *dev, int state) +static inline void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) { } diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index f475414671d8..cf052a39558d 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -636,6 +637,15 @@ out: return false; } +static void intel_fbdev_suspend_worker(struct work_struct *work) +{ + intel_fbdev_set_suspend(container_of(work, + struct drm_i915_private, + fbdev_suspend_work)->dev, + FBINFO_STATE_RUNNING, + true); +} + int intel_fbdev_init(struct drm_device *dev) { struct intel_fbdev *ifbdev; @@ -662,6 +672,8 @@ int intel_fbdev_init(struct drm_device *dev) } dev_priv->fbdev = ifbdev; + INIT_WORK(&dev_priv->fbdev_suspend_work, intel_fbdev_suspend_worker); + drm_fb_helper_single_add_all_connectors(&ifbdev->helper); return 0; @@ -682,12 +694,14 @@ void intel_fbdev_fini(struct drm_device *dev) if (!dev_priv->fbdev) return; + flush_work(&dev_priv->fbdev_suspend_work); + intel_fbdev_destroy(dev, dev_priv->fbdev); kfree(dev_priv->fbdev); dev_priv->fbdev = NULL; } -void intel_fbdev_set_suspend(struct drm_device *dev, int state) +void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_fbdev *ifbdev = dev_priv->fbdev; @@ -698,6 +712,33 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state) info = ifbdev->helper.fbdev; + if (synchronous) { + /* Flush any pending work to turn the console on, and then + * wait to turn it off. It must be synchronous as we are + * about to suspend or unload the driver. + * + * Note that from within the work-handler, we cannot flush + * ourselves, so only flush outstanding work upon suspend! + */ + if (state != FBINFO_STATE_RUNNING) + flush_work(&dev_priv->fbdev_suspend_work); + console_lock(); + } else { + /* + * The console lock can be pretty contented on resume due + * to all the printk activity. Try to keep it out of the hot + * path of resume if possible. + */ + WARN_ON(state != FBINFO_STATE_RUNNING); + if (!console_trylock()) { + /* Don't block our own workqueue as this can + * be run in parallel with other i915.ko tasks. + */ + schedule_work(&dev_priv->fbdev_suspend_work); + return; + } + } + /* On resume from hibernation: If the object is shmemfs backed, it has * been restored from swap. If the object is stolen however, it will be * full of whatever garbage was left in there. @@ -706,6 +747,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state) memset_io(info->screen_base, 0, info->screen_size); fb_set_suspend(info, state); + console_unlock(); } void intel_fbdev_output_poll_changed(struct drm_device *dev) From ebc3282409ae4d1e90c2f9608665cc4d8fbf7e73 Mon Sep 17 00:00:00 2001 From: Sagar Kamble Date: Wed, 13 Aug 2014 23:07:05 +0530 Subject: [PATCH 156/640] drm/i915: Created common handler for platform specific suspend/resume With this change, intel_runtime_suspend and intel_runtime_resume functions become completely platform agnostic. Platform specific suspend/resume changes are moved to intel_suspend_complete and intel_resume_prepare. Cc: Imre Deak Cc: Paulo Zanoni Cc: Daniel Vetter Cc: Jani Nikula Cc: Goel, Akash Signed-off-by: Sagar Kamble Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 76 +++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 01de97776d81..b06e975dba39 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -494,6 +494,10 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) return true; } + +static int intel_suspend_complete(struct drm_i915_private *dev_priv); +static int intel_resume_prepare(struct drm_i915_private *dev_priv); + static int i915_drm_freeze(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -959,14 +963,14 @@ static int i915_pm_poweroff(struct device *dev) return i915_drm_freeze(drm_dev); } -static int hsw_runtime_suspend(struct drm_i915_private *dev_priv) +static int hsw_suspend_complete(struct drm_i915_private *dev_priv) { hsw_enable_pc8(dev_priv); return 0; } -static int snb_runtime_resume(struct drm_i915_private *dev_priv) +static int snb_resume_prepare(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; @@ -975,7 +979,7 @@ static int snb_runtime_resume(struct drm_i915_private *dev_priv) return 0; } -static int hsw_runtime_resume(struct drm_i915_private *dev_priv) +static int hsw_resume_prepare(struct drm_i915_private *dev_priv) { hsw_disable_pc8(dev_priv); @@ -1271,7 +1275,7 @@ static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv) I915_WRITE(VLV_GTLC_PW_STATUS, VLV_GTLC_ALLOWWAKEERR); } -static int vlv_runtime_suspend(struct drm_i915_private *dev_priv) +static int vlv_suspend_complete(struct drm_i915_private *dev_priv) { u32 mask; int err; @@ -1311,7 +1315,7 @@ err1: return err; } -static int vlv_runtime_resume(struct drm_i915_private *dev_priv) +static int vlv_resume_prepare(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; int err; @@ -1389,17 +1393,7 @@ static int intel_runtime_suspend(struct device *device) cancel_work_sync(&dev_priv->rps.work); intel_runtime_pm_disable_interrupts(dev); - if (IS_GEN6(dev)) { - ret = 0; - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { - ret = hsw_runtime_suspend(dev_priv); - } else if (IS_VALLEYVIEW(dev)) { - ret = vlv_runtime_suspend(dev_priv); - } else { - ret = -ENODEV; - WARN_ON(1); - } - + ret = intel_suspend_complete(dev_priv); if (ret) { DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret); intel_runtime_pm_restore_interrupts(dev); @@ -1437,17 +1431,7 @@ static int intel_runtime_resume(struct device *device) intel_opregion_notify_adapter(dev, PCI_D0); dev_priv->pm.suspended = false; - if (IS_GEN6(dev)) { - ret = snb_runtime_resume(dev_priv); - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { - ret = hsw_runtime_resume(dev_priv); - } else if (IS_VALLEYVIEW(dev)) { - ret = vlv_runtime_resume(dev_priv); - } else { - WARN_ON(1); - ret = -ENODEV; - } - + ret = intel_resume_prepare(dev_priv); /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). @@ -1466,6 +1450,44 @@ static int intel_runtime_resume(struct device *device) return ret; } +static int intel_suspend_complete(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + int ret; + + if (IS_GEN6(dev)) { + ret = 0; + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + ret = hsw_suspend_complete(dev_priv); + } else if (IS_VALLEYVIEW(dev)) { + ret = vlv_suspend_complete(dev_priv); + } else { + ret = -ENODEV; + WARN_ON(1); + } + + return ret; +} + +static int intel_resume_prepare(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + int ret; + + if (IS_GEN6(dev)) { + ret = snb_resume_prepare(dev_priv); + } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + ret = hsw_resume_prepare(dev_priv); + } else if (IS_VALLEYVIEW(dev)) { + ret = vlv_resume_prepare(dev_priv); + } else { + WARN_ON(1); + ret = -ENODEV; + } + + return ret; +} + static const struct dev_pm_ops i915_pm_ops = { .suspend = i915_pm_suspend, .suspend_late = i915_pm_suspend_late, From 016970beb05da6285c2f3ed2bee1c676cb75972e Mon Sep 17 00:00:00 2001 From: Sagar Kamble Date: Wed, 13 Aug 2014 23:07:06 +0530 Subject: [PATCH 157/640] drm/i915: Sharing platform specific sequence between runtime and system suspend/ resume paths On VLV, post S0i3 during i915_drm_thaw following issue is observed during ring initialization. [ 335.604039] [drm:stop_ring] ERROR render ring :timed out trying to stop ring [ 336.607340] [drm:stop_ring] ERROR render ring :timed out trying to stop ring [ 336.607345] [drm:init_ring_common] ERROR failed to set render ring head to zero ctl 00000000 head 00000000 tail 00000000 start 00000000 [ 337.610645] [drm:stop_ring] ERROR bsd ring :timed out trying to stop ring [ 338.613952] [drm:stop_ring] ERROR bsd ring :timed out trying to stop ring [ 338.613956] [drm:init_ring_common] ERROR failed to set bsd ring head to zero ctl 00000000 head 00000000 tail 00000000 start 00000000 [ 339.617256] [drm:stop_ring] ERROR render ring :timed out trying to stop ring [ 339.617258] -----------[ cut here ]----------- [ 339.617267] WARNING: CPU: 0 PID: 6 at drivers/gpu/drm/i915/intel_ringbuffer.c:1666 intel_cleanup_ring+0xe6/0xf0() [ 339.617396] --[ end trace 5ef5ed1a3c92e2a6 ]-- [ 339.617428] [drm:__i915_drm_thaw] ERROR failed to re-initialize GPU, declaring wedged! This is happening since wake is not enabled and Gunit registers are not restored. For this system suspend/resume paths need to follow save/restore and additional platform specific setup in suspend_complete and resume_prepare. suspend_complete is shared unconditionaly for VLV, HSW, BDW. resume_prepare for HSW and BDW has pc8 disabling which is needed during thaw_early so sharing uncondtionally. For VLV and SNB runtime resume specific sequence exists. Cc: Imre Deak Cc: Paulo Zanoni Cc: Daniel Vetter Cc: Jani Nikula Cc: Goel, Akash Signed-off-by: Sagar Kamble Reviewed-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 63 ++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b06e975dba39..2f112853c36f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -496,7 +496,8 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) static int intel_suspend_complete(struct drm_i915_private *dev_priv); -static int intel_resume_prepare(struct drm_i915_private *dev_priv); +static int intel_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume); static int i915_drm_freeze(struct drm_device *dev) { @@ -604,15 +605,17 @@ int i915_suspend(struct drm_device *dev, pm_message_t state) static int i915_drm_thaw_early(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + int ret; - if (IS_HASWELL(dev) || IS_BROADWELL(dev)) - hsw_disable_pc8(dev_priv); + ret = intel_resume_prepare(dev_priv, false); + if (ret) + DRM_ERROR("Resume prepare failed: %d,Continuing resume\n", ret); intel_uncore_early_sanitize(dev, true); intel_uncore_sanitize(dev); intel_power_domains_init_hw(dev_priv); - return 0; + return ret; } static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings) @@ -888,6 +891,7 @@ static int i915_pm_suspend_late(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct drm_device *drm_dev = pci_get_drvdata(pdev); struct drm_i915_private *dev_priv = drm_dev->dev_private; + int ret; /* * We have a suspedn ordering issue with the snd-hda driver also @@ -901,13 +905,16 @@ static int i915_pm_suspend_late(struct device *dev) if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - if (IS_HASWELL(drm_dev) || IS_BROADWELL(drm_dev)) - hsw_enable_pc8(dev_priv); + ret = intel_suspend_complete(dev_priv); - pci_disable_device(pdev); - pci_set_power_state(pdev, PCI_D3hot); + if (ret) + DRM_ERROR("Suspend complete failed: %d\n", ret); + else { + pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3hot); + } - return 0; + return ret; } static int i915_pm_resume_early(struct device *dev) @@ -970,16 +977,19 @@ static int hsw_suspend_complete(struct drm_i915_private *dev_priv) return 0; } -static int snb_resume_prepare(struct drm_i915_private *dev_priv) +static int snb_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { struct drm_device *dev = dev_priv->dev; - intel_init_pch_refclk(dev); + if (rpm_resume) + intel_init_pch_refclk(dev); return 0; } -static int hsw_resume_prepare(struct drm_i915_private *dev_priv) +static int hsw_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { hsw_disable_pc8(dev_priv); @@ -1315,7 +1325,8 @@ err1: return err; } -static int vlv_resume_prepare(struct drm_i915_private *dev_priv) +static int vlv_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { struct drm_device *dev = dev_priv->dev; int err; @@ -1340,8 +1351,10 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv) vlv_check_no_gt_access(dev_priv); - intel_init_clock_gating(dev); - i915_gem_restore_fences(dev); + if (rpm_resume) { + intel_init_clock_gating(dev); + i915_gem_restore_fences(dev); + } return ret; } @@ -1431,7 +1444,7 @@ static int intel_runtime_resume(struct device *device) intel_opregion_notify_adapter(dev, PCI_D0); dev_priv->pm.suspended = false; - ret = intel_resume_prepare(dev_priv); + ret = intel_resume_prepare(dev_priv, true); /* * No point of rolling back things in case of an error, as the best * we can do is to hope that things will still work (and disable RPM). @@ -1450,6 +1463,10 @@ static int intel_runtime_resume(struct device *device) return ret; } +/* + * This function implements common functionality of runtime and system + * suspend sequence. + */ static int intel_suspend_complete(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; @@ -1469,17 +1486,23 @@ static int intel_suspend_complete(struct drm_i915_private *dev_priv) return ret; } -static int intel_resume_prepare(struct drm_i915_private *dev_priv) +/* + * This function implements common functionality of runtime and system + * resume sequence. Variable rpm_resume used for implementing different + * code paths. + */ +static int intel_resume_prepare(struct drm_i915_private *dev_priv, + bool rpm_resume) { struct drm_device *dev = dev_priv->dev; int ret; if (IS_GEN6(dev)) { - ret = snb_resume_prepare(dev_priv); + ret = snb_resume_prepare(dev_priv, rpm_resume); } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { - ret = hsw_resume_prepare(dev_priv); + ret = hsw_resume_prepare(dev_priv, rpm_resume); } else if (IS_VALLEYVIEW(dev)) { - ret = vlv_resume_prepare(dev_priv); + ret = vlv_resume_prepare(dev_priv, rpm_resume); } else { WARN_ON(1); ret = -ENODEV; From 3a448734902359113b0c7c3454ce4cd56dc1e61f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Aug 2014 20:05:47 +0100 Subject: [PATCH 158/640] drm/i915: Print captured bo for all VM in error state The current error state harks back to the era of just a single VM. For full-ppgtt, we capture every bo on every VM. It behoves us to then print every bo for every VM, which we currently fail to do and so miss vital information in the error state. v2: Use the vma address rather than -1! Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gpu_error.c | 82 +++++++++++++++++++-------- 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 541fb6f295bb..ed52ac744105 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -396,6 +396,7 @@ struct drm_i915_error_state { pid_t pid; char comm[TASK_COMM_LEN]; } ring[I915_NUM_RINGS]; + struct drm_i915_error_buffer { u32 size; u32 name; @@ -414,6 +415,7 @@ struct drm_i915_error_state { } **active_bo, **pinned_bo; u32 *active_bo_count, *pinned_bo_count; + u32 vm_count; }; struct intel_connector; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index fc11ac6b0373..35e70d5d6282 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -192,10 +192,10 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, struct drm_i915_error_buffer *err, int count) { - err_printf(m, "%s [%d]:\n", name, count); + err_printf(m, " %s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x %8u %02x %02x %x %x", + err_printf(m, " %08x %8u %02x %02x %x %x", err->gtt_offset, err->size, err->read_domains, @@ -393,15 +393,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, i915_ring_error_state(m, dev, &error->ring[i]); } - if (error->active_bo) - print_error_buffers(m, "Active", - error->active_bo[0], - error->active_bo_count[0]); + for (i = 0; i < error->vm_count; i++) { + err_printf(m, "vm[%d]\n", i); + + print_error_buffers(m, "Active", + error->active_bo[i], + error->active_bo_count[i]); - if (error->pinned_bo) print_error_buffers(m, "Pinned", - error->pinned_bo[0], - error->pinned_bo_count[0]); + error->pinned_bo[i], + error->pinned_bo_count[i]); + } for (i = 0; i < ARRAY_SIZE(error->ring); i++) { obj = error->ring[i].batchbuffer; @@ -644,13 +646,15 @@ unwind: (src)->base.size>>PAGE_SHIFT) static void capture_bo(struct drm_i915_error_buffer *err, - struct drm_i915_gem_object *obj) + struct i915_vma *vma) { + struct drm_i915_gem_object *obj = vma->obj; + err->size = obj->base.size; err->name = obj->base.name; err->rseqno = obj->last_read_seqno; err->wseqno = obj->last_write_seqno; - err->gtt_offset = i915_gem_obj_ggtt_offset(obj); + err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; err->fence_reg = obj->fence_reg; @@ -674,7 +678,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, int i = 0; list_for_each_entry(vma, head, mm_list) { - capture_bo(err++, vma->obj); + capture_bo(err++, vma); if (++i == count) break; } @@ -683,21 +687,27 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, } static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, - int count, struct list_head *head) + int count, struct list_head *head, + struct i915_address_space *vm) { struct drm_i915_gem_object *obj; - int i = 0; + struct drm_i915_error_buffer * const first = err; + struct drm_i915_error_buffer * const last = err + count; list_for_each_entry(obj, head, global_list) { - if (!i915_gem_obj_is_pinned(obj)) - continue; + struct i915_vma *vma; - capture_bo(err++, obj); - if (++i == count) + if (err == last) break; + + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm && vma->pin_count > 0) { + capture_bo(err++, vma); + break; + } } - return i; + return err - first; } /* Generate a semi-unique error code. The code is not meant to have meaning, The @@ -1053,9 +1063,14 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, list_for_each_entry(vma, &vm->active_list, mm_list) i++; error->active_bo_count[ndx] = i; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) - if (i915_gem_obj_is_pinned(obj)) - i++; + + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + list_for_each_entry(vma, &obj->vma_list, vma_link) + if (vma->vm == vm && vma->pin_count > 0) { + i++; + break; + } + } error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx]; if (i) { @@ -1074,7 +1089,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, error->pinned_bo_count[ndx] = capture_pinned_bo(pinned_bo, error->pinned_bo_count[ndx], - &dev_priv->mm.bound_list); + &dev_priv->mm.bound_list, vm); error->active_bo[ndx] = active_bo; error->pinned_bo[ndx] = pinned_bo; } @@ -1095,8 +1110,25 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count), GFP_ATOMIC); - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - i915_gem_capture_vm(dev_priv, error, vm, i++); + if (error->active_bo == NULL || + error->pinned_bo == NULL || + error->active_bo_count == NULL || + error->pinned_bo_count == NULL) { + kfree(error->active_bo); + kfree(error->active_bo_count); + kfree(error->pinned_bo); + kfree(error->pinned_bo_count); + + error->active_bo = NULL; + error->active_bo_count = NULL; + error->pinned_bo = NULL; + error->pinned_bo_count = NULL; + } else { + list_for_each_entry(vm, &dev_priv->vm_list, global_link) + i915_gem_capture_vm(dev_priv, error, vm, i++); + + error->vm_count = cnt; + } } /* Capture all registers which don't fit into another category. */ From 582d67f0b19afc2299bc8977aba835d8d25bb591 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:16 +0100 Subject: [PATCH 159/640] drm/i915: Add temporary ring->ctx backpointer The execlist patches have a bit a convoluted and long history and due to that have the actual submission still misplaced deeply burried in the low-level ringbuffer handling code. This design goes back to the legacy ringbuffer code with its tricky lazy request and simple work submissiion using ring tail writes. For that reason they need a ring->ctx backpointer. The goal is to unburry that code and move it up into a level where the full execlist context is available so that we can ditch this backpointer. Until that's done make it really obvious that there's work still to be done. Cc: Oscar Mateo Cc: Thomas Daniel Acked-by: Thomas Daniel Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 2 ++ drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6b5f416b5c0d..c2352d1b23fa 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1086,6 +1086,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, } ringbuf->ring = ring; + ringbuf->FIXME_lrc_ctx = ctx; + ringbuf->size = 32 * PAGE_SIZE; ringbuf->effective_size = ringbuf->size; ringbuf->head = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 24437da91f77..26785ca72530 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -99,6 +99,13 @@ struct intel_ringbuffer { struct intel_engine_cs *ring; + /* + * FIXME: This backpointer is an artifact of the history of how the + * execlist patches came into being. It will get removed once the basic + * code has landed. + */ + struct intel_context *FIXME_lrc_ctx; + u32 head; u32 tail; int space; From 295ee85316aedfe1878306d71b5e9c7d4498fb1b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 30 Jul 2014 14:23:44 +0200 Subject: [PATCH 160/640] drm: Docbook fixes Bunch of small leftovers spotted by looking at the make htmldocs output. I've left out dp mst, there's too much amiss there. v2: Also add the missing parameter docbook in the dp mst code - Dave Airlie correctly pointed out that we don't actually want kerneldoc for the missing structure members in header files. Cc: Dave Airlie Reviewed-by: Matt Roper Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 5 +++-- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + drivers/gpu/drm/drm_edid.c | 2 +- drivers/gpu/drm/drm_modeset_lock.c | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 285e62a134b2..f09b75212081 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -3512,9 +3512,10 @@ EXPORT_SYMBOL(drm_property_create_enum); * @flags: flags specifying the property type * @name: name of the property * @props: enumeration lists with property bitflags - * @num_values: number of pre-defined values + * @num_props: size of the @props array + * @supported_bits: bitmask of all supported enumeration values * - * This creates a new generic drm property which can then be attached to a drm + * This creates a new bitmask drm property which can then be attached to a drm * object with drm_object_attach_property. The returned property object must be * freed with drm_property_destroy. * diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index ac3c2738db94..352f5d6c763c 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2071,6 +2071,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) * drm_dp_mst_hpd_irq() - MST hotplug IRQ notify * @mgr: manager to notify irq for. * @esi: 4 bytes from SINK_COUNT_ESI + * @handled: whether the hpd interrupt was consumed or not * * This should be called from the driver when it detects a short IRQ, * along with the value of the DEVICE_SERVICE_IRQ_VECTOR_ESI0. The diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 1dbf3bc4c6a3..f905c63c0f68 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3433,10 +3433,10 @@ EXPORT_SYMBOL(drm_rgb_quant_range_selectable); /** * drm_assign_hdmi_deep_color_info - detect whether monitor supports * hdmi deep color modes and update drm_display_info if so. - * * @edid: monitor EDID information * @info: Updated with maximum supported deep color bpc and color format * if deep color supported. + * @connector: DRM connector, used only for debug output * * Parse the CEA extension according to CEA-861-B. * Return true if HDMI deep color supported, false if not or unknown. diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 5280b64a0230..8749fc06570e 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -199,7 +199,7 @@ EXPORT_SYMBOL(drm_modeset_lock_crtc); /** * drm_modeset_legacy_acquire_ctx - find acquire ctx for legacy ioctls - * crtc: drm crtc + * @crtc: drm crtc * * Legacy ioctl operations like cursor updates or page flips only have per-crtc * locking, and store the acquire ctx in the corresponding crtc. All other From c11cda52193dfa459dfea38f00b19bc9325fa922 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 8 Aug 2014 18:50:18 +0100 Subject: [PATCH 161/640] drm: Don't return 0 for a value used as a denominator Static analysis will be unhappy if a function can theoretically return 0 and we're trying to divide by that value. Mark that case that cannot occur as a BUG() instead. Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_dp_mst_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 352f5d6c763c..b3adf1445020 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1772,7 +1772,7 @@ static int drm_dp_get_vc_payload_bw(int dp_link_bw, int dp_link_count) case DP_LINK_BW_5_4: return 10 * dp_link_count; } - return 0; + BUG(); } /** From 14f476fa24e81d0beea1aa14d763102958518d60 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 8 Aug 2014 19:15:20 +0100 Subject: [PATCH 162/640] drm: Use the type of the array element when reallocating Static analysers find it 'suspicious', that we're trying to allocate memory for elements of size sizeof(struct drm_fb_helper_connector) when the array is defined as struct drm_fb_helper_connector **. Use sizeof(struct drm_fb_helper_connector *) instead. Note that the structure being defined as: struct drm_fb_helper_connector { struct drm_connector *connector; }; This was still doing the right thing, but may not in the future if additional fields are added. Cc: Todd Previte Cc: Dave Airlie Signed-off-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_fb_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 7b7b9565188f..6019392b19cc 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -126,7 +126,7 @@ int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper, struct drm_ WARN_ON(!mutex_is_locked(&fb_helper->dev->mode_config.mutex)); if (fb_helper->connector_count + 1 > fb_helper->connector_info_alloc_count) { - temp = krealloc(fb_helper->connector_info, sizeof(struct drm_fb_helper_connector) * (fb_helper->connector_count + 1), GFP_KERNEL); + temp = krealloc(fb_helper->connector_info, sizeof(struct drm_fb_helper_connector *) * (fb_helper->connector_count + 1), GFP_KERNEL); if (!temp) return -ENOMEM; From 48e29f5535b9eb506c44bd8f41bd9348fd219435 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:29 +0100 Subject: [PATCH 163/640] drm/i915/bdw: Emission of requests with logical rings On a previous iteration of this patch, I created an Execlists version of __i915_add_request and asbtracted it away as a vfunc. Daniel Vetter wondered then why that was needed: "with the clean split in command submission I expect every function to know wether it'll submit to an lrc (everything in intel_lrc.c) or wether it'll submit to a legacy ring (existing code), so I don't see a need for an add_request vfunc." The honest, hairy truth is that this patch is the glue keeping the whole logical ring puzzle together: - i915_add_request is used by intel_ring_idle, which in turn is used by i915_gpu_idle, which in turn is used in several places inside the eviction and gtt codes. - Also, it is used by i915_gem_check_olr, which is littered all over i915_gem.c - ... If I were to duplicate all the code that directly or indirectly uses __i915_add_request, I'll end up creating a separate driver. To show the differences between the existing legacy version and the new Execlists one, this time I have special-cased __i915_add_request instead of adding an add_request vfunc. I hope this helps to untangle this Gordian knot. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Adjust to ringbuf->FIXME_lrc_ctx per the discussion with Thomas Daniel.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 72 +++++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_lrc.c | 30 +++++++++++-- drivers/gpu/drm/i915/intel_lrc.h | 1 + 3 files changed, 80 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6c2f0b886eb0..32fa1e9eb844 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2311,10 +2311,21 @@ int __i915_add_request(struct intel_engine_cs *ring, { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct drm_i915_gem_request *request; + struct intel_ringbuffer *ringbuf; u32 request_ring_position, request_start; int ret; - request_start = intel_ring_get_tail(ring->buffer); + request = ring->preallocated_lazy_request; + if (WARN_ON(request == NULL)) + return -ENOMEM; + + if (i915.enable_execlists) { + struct intel_context *ctx = request->ctx; + ringbuf = ctx->engine[ring->id].ringbuf; + } else + ringbuf = ring->buffer; + + request_start = intel_ring_get_tail(ringbuf); /* * Emit any outstanding flushes - execbuf can fail to emit the flush * after having emitted the batchbuffer command. Hence we need to fix @@ -2322,24 +2333,32 @@ int __i915_add_request(struct intel_engine_cs *ring, * is that the flush _must_ happen before the next request, no matter * what. */ - ret = intel_ring_flush_all_caches(ring); - if (ret) - return ret; - - request = ring->preallocated_lazy_request; - if (WARN_ON(request == NULL)) - return -ENOMEM; + if (i915.enable_execlists) { + ret = logical_ring_flush_all_caches(ringbuf); + if (ret) + return ret; + } else { + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; + } /* Record the position of the start of the request so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the * position of the head. */ - request_ring_position = intel_ring_get_tail(ring->buffer); + request_ring_position = intel_ring_get_tail(ringbuf); - ret = ring->add_request(ring); - if (ret) - return ret; + if (i915.enable_execlists) { + ret = ring->emit_request(ringbuf); + if (ret) + return ret; + } else { + ret = ring->add_request(ring); + if (ret) + return ret; + } request->seqno = intel_ring_get_seqno(ring); request->ring = ring; @@ -2354,12 +2373,14 @@ int __i915_add_request(struct intel_engine_cs *ring, */ request->batch_obj = obj; - /* Hold a reference to the current context so that we can inspect - * it later in case a hangcheck error event fires. - */ - request->ctx = ring->last_context; - if (request->ctx) - i915_gem_context_reference(request->ctx); + if (!i915.enable_execlists) { + /* Hold a reference to the current context so that we can inspect + * it later in case a hangcheck error event fires. + */ + request->ctx = ring->last_context; + if (request->ctx) + i915_gem_context_reference(request->ctx); + } request->emitted_jiffies = jiffies; list_add_tail(&request->list, &ring->request_list); @@ -2614,6 +2635,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; + struct intel_ringbuffer *ringbuf; request = list_first_entry(&ring->request_list, struct drm_i915_gem_request, @@ -2623,12 +2645,24 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) break; trace_i915_gem_request_retire(ring, request->seqno); + + /* This is one of the few common intersection points + * between legacy ringbuffer submission and execlists: + * we need to tell them apart in order to find the correct + * ringbuffer to which the request belongs to. + */ + if (i915.enable_execlists) { + struct intel_context *ctx = request->ctx; + ringbuf = ctx->engine[ring->id].ringbuf; + } else + ringbuf = ring->buffer; + /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position * of tail of the request to update the last known position * of the GPU head. */ - ring->buffer->last_retired_head = request->tail; + ringbuf->last_retired_head = request->tail; i915_gem_free_request(request); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c2352d1b23fa..cd6ddd80e54c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -252,6 +252,22 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring) I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); } +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) +{ + struct intel_engine_cs *ring = ringbuf->ring; + int ret; + + if (!ring->gpu_caches_dirty) + return 0; + + ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + + ring->gpu_caches_dirty = false; + return 0; +} + void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) { intel_logical_ring_advance(ringbuf); @@ -262,7 +278,8 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) /* TODO: how to submit a context to the ELSP is not here yet */ } -static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) +static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, + struct intel_context *ctx) { if (ring->outstanding_lazy_seqno) return 0; @@ -274,6 +291,13 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring) if (request == NULL) return -ENOMEM; + /* Hold a reference to the context this request belongs to + * (we will need it when the time comes to emit/retire the + * request). + */ + request->ctx = ctx; + i915_gem_context_reference(request->ctx); + ring->preallocated_lazy_request = request; } @@ -312,8 +336,6 @@ static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf, if (ret) return ret; - /* TODO: make sure we update the right ringbuffer's last_retired_head - * when retiring requests */ i915_gem_retire_requests_ring(ring); ringbuf->head = ringbuf->last_retired_head; ringbuf->last_retired_head = -1; @@ -433,7 +455,7 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) return ret; /* Preallocate the olr before touching the ring */ - ret = logical_ring_alloc_seqno(ring); + ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 4e032875c1fd..460e1af15600 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -29,6 +29,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); int intel_logical_rings_init(struct drm_device *dev); +int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) { From 84b790f80e5153d8d54074aa4eae49ff3070f2f1 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 24 Jul 2014 17:04:36 +0100 Subject: [PATCH 164/640] drm/i915/bdw: Implement context switching (somewhat) A context switch occurs by submitting a context descriptor to the ExecList Submission Port. Given that we can now initialize a context, it's possible to begin implementing the context switch by creating the descriptor and submitting it to ELSP (actually two, since the ELSP has two ports). The context object must be mapped in the GGTT, which means it must exist in the 0-4GB graphics VA range. Signed-off-by: Ben Widawsky v2: This code has changed quite a lot in various rebases. Of particular importance is that now we use the globally unique Submission ID to send to the hardware. Also, context pages are now pinned unconditionally to GGTT, so there is no need to bind them. v3: Use LRCA[31:12] as hwCtxId[19:0]. This guarantees that the HW context ID we submit to the ELSP is globally unique and != 0 (Bspec requirements of the software use-only bits of the Context ID in the Context Descriptor Format) without the hassle of the previous submission Id construction. Also, re-add the ELSP porting read (it was dropped somewhere during the rebases). v4: - Squash with "drm/i915/bdw: Add forcewake lock around ELSP writes" (BSPEC says: "SW must set Force Wakeup bit to prevent GT from entering C6 while ELSP writes are in progress") as noted by Thomas Daniel (thomas.daniel@intel.com). - Rename functions and use an execlists/intel_execlists_ namespace. - The BUG_ON only checked that the LRCA was <32 bits, but it didn't make sure that it was properly aligned. Spotted by Alistair Mcaulay . v5: - Improved source code comments as suggested by Chris Wilson. - No need to abstract submit_ctx away, as pointed by Brad Volkin. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch. Sigh.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 116 ++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_lrc.h | 1 + 2 files changed, 115 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index cd6ddd80e54c..aa81fd41b9c1 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -47,6 +47,7 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 #define RING_ELSP(ring) ((ring)->mmio_base+0x230) +#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) #define CTX_LRI_HEADER_0 0x01 @@ -78,6 +79,26 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) +enum { + ADVANCED_CONTEXT = 0, + LEGACY_CONTEXT, + ADVANCED_AD_CONTEXT, + LEGACY_64B_CONTEXT +}; +#define GEN8_CTX_MODE_SHIFT 3 +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; +#define GEN8_CTX_ID_SHIFT 32 + int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -92,6 +113,93 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) +{ + u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj); + + /* LRCA is required to be 4K aligned so the more significant 20 bits + * are globally unique */ + return lrca >> 12; +} + +static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj) +{ + uint64_t desc; + uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); + BUG_ON(lrca & 0xFFFFFFFF00000FFFULL); + + desc = GEN8_CTX_VALID; + desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; + desc |= GEN8_CTX_L3LLC_COHERENT; + desc |= GEN8_CTX_PRIVILEGE; + desc |= lrca; + desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers */ + /* desc |= GEN8_CTX_FORCE_RESTORE; */ + + return desc; +} + +static void execlists_elsp_write(struct intel_engine_cs *ring, + struct drm_i915_gem_object *ctx_obj0, + struct drm_i915_gem_object *ctx_obj1) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + uint64_t temp = 0; + uint32_t desc[4]; + + /* XXX: You must always write both descriptors in the order below. */ + if (ctx_obj1) + temp = execlists_ctx_descriptor(ctx_obj1); + else + temp = 0; + desc[1] = (u32)(temp >> 32); + desc[0] = (u32)temp; + + temp = execlists_ctx_descriptor(ctx_obj0); + desc[3] = (u32)(temp >> 32); + desc[2] = (u32)temp; + + /* Set Force Wakeup bit to prevent GT from entering C6 while + * ELSP writes are in progress */ + gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + + I915_WRITE(RING_ELSP(ring), desc[1]); + I915_WRITE(RING_ELSP(ring), desc[0]); + I915_WRITE(RING_ELSP(ring), desc[3]); + /* The context is automatically loaded after the following */ + I915_WRITE(RING_ELSP(ring), desc[2]); + + /* ELSP is a wo register, so use another nearby reg for posting instead */ + POSTING_READ(RING_EXECLIST_STATUS(ring)); + + gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); +} + +static int execlists_submit_context(struct intel_engine_cs *ring, + struct intel_context *to0, u32 tail0, + struct intel_context *to1, u32 tail1) +{ + struct drm_i915_gem_object *ctx_obj0; + struct drm_i915_gem_object *ctx_obj1 = NULL; + + ctx_obj0 = to0->engine[ring->id].state; + BUG_ON(!ctx_obj0); + BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + + if (to1) { + ctx_obj1 = to1->engine[ring->id].state; + BUG_ON(!ctx_obj1); + BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + } + + execlists_elsp_write(ring, ctx_obj0, ctx_obj1); + + return 0; +} + static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) { struct intel_engine_cs *ring = ringbuf->ring; @@ -270,12 +378,16 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) { + struct intel_engine_cs *ring = ringbuf->ring; + struct intel_context *ctx = ringbuf->FIXME_lrc_ctx; + intel_logical_ring_advance(ringbuf); - if (intel_ring_stopped(ringbuf->ring)) + if (intel_ring_stopped(ring)) return; - /* TODO: how to submit a context to the ELSP is not here yet */ + /* FIXME: too cheeky, we don't even check if the ELSP is ready */ + execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0); } static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 460e1af15600..69605b158235 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -57,5 +57,6 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct list_head *vmas, struct drm_i915_gem_object *batch_obj, u64 exec_start, u32 flags); +u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); #endif /* _INTEL_LRC_H_ */ From ae1250b9da308acd16554365d125b4afb795b825 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:37 +0100 Subject: [PATCH 165/640] drm/i915/bdw: Write the tail pointer, LRC style Each logical ring context has the tail pointer in the context object, so update it before submission. v2: New namespace. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index aa81fd41b9c1..26bc063f137b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -178,6 +178,21 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); } +static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail) +{ + struct page *page; + uint32_t *reg_state; + + page = i915_gem_object_get_page(ctx_obj, 1); + reg_state = kmap_atomic(page); + + reg_state[CTX_RING_TAIL+1] = tail; + + kunmap_atomic(reg_state); + + return 0; +} + static int execlists_submit_context(struct intel_engine_cs *ring, struct intel_context *to0, u32 tail0, struct intel_context *to1, u32 tail1) @@ -189,10 +204,14 @@ static int execlists_submit_context(struct intel_engine_cs *ring, BUG_ON(!ctx_obj0); BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + execlists_ctx_write_tail(ctx_obj0, tail0); + if (to1) { ctx_obj1 = to1->engine[ring->id].state; BUG_ON(!ctx_obj1); BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + + execlists_ctx_write_tail(ctx_obj1, tail1); } execlists_elsp_write(ring, ctx_obj0, ctx_obj1); From acdd884a2e1b873995c120d5eabd8cab77f48f20 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Thu, 24 Jul 2014 17:04:38 +0100 Subject: [PATCH 166/640] drm/i915/bdw: Two-stage execlist submit process Context switch (and execlist submission) should happen only when other contexts are not active, otherwise pre-emption occurs. To assure this, we place context switch requests in a queue and those request are later consumed when the right context switch interrupt is received (still TODO). v2: Use a spinlock, do not remove the requests on unqueue (wait for context switch completion). Signed-off-by: Thomas Daniel v3: Several rebases and code changes. Use unique ID. v4: - Move the queue/lock init to the late ring initialization. - Damien's kmalloc review comments: check return, use sizeof(*req), do not cast. v5: - Do not reuse drm_i915_gem_request. Instead, create our own. - New namespace. Signed-off-by: Michel Thierry (v1) Signed-off-by: Oscar Mateo (v2-v5) Reviewed-by: Damien Lespiau [davnet: Checkpatch + wash-up s/BUG_ON/WARN_ON/.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 72 +++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_lrc.h | 8 +++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 26bc063f137b..e81f5f6c49b9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -126,7 +126,8 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj) { uint64_t desc; uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj); - BUG_ON(lrca & 0xFFFFFFFF00000FFFULL); + + WARN_ON(lrca & 0xFFFFFFFF00000FFFULL); desc = GEN8_CTX_VALID; desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT; @@ -202,14 +203,14 @@ static int execlists_submit_context(struct intel_engine_cs *ring, ctx_obj0 = to0->engine[ring->id].state; BUG_ON(!ctx_obj0); - BUG_ON(!i915_gem_obj_is_pinned(ctx_obj0)); + WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0)); execlists_ctx_write_tail(ctx_obj0, tail0); if (to1) { ctx_obj1 = to1->engine[ring->id].state; BUG_ON(!ctx_obj1); - BUG_ON(!i915_gem_obj_is_pinned(ctx_obj1)); + WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1)); execlists_ctx_write_tail(ctx_obj1, tail1); } @@ -219,6 +220,65 @@ static int execlists_submit_context(struct intel_engine_cs *ring, return 0; } +static void execlists_context_unqueue(struct intel_engine_cs *ring) +{ + struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL; + struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL; + + if (list_empty(&ring->execlist_queue)) + return; + + /* Try to read in pairs */ + list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue, + execlist_link) { + if (!req0) { + req0 = cursor; + } else if (req0->ctx == cursor->ctx) { + /* Same ctx: ignore first request, as second request + * will update tail past first request's workload */ + list_del(&req0->execlist_link); + i915_gem_context_unreference(req0->ctx); + kfree(req0); + req0 = cursor; + } else { + req1 = cursor; + break; + } + } + + WARN_ON(execlists_submit_context(ring, req0->ctx, req0->tail, + req1 ? req1->ctx : NULL, + req1 ? req1->tail : 0)); +} + +static int execlists_context_queue(struct intel_engine_cs *ring, + struct intel_context *to, + u32 tail) +{ + struct intel_ctx_submit_request *req = NULL; + unsigned long flags; + bool was_empty; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (req == NULL) + return -ENOMEM; + req->ctx = to; + i915_gem_context_reference(req->ctx); + req->ring = ring; + req->tail = tail; + + spin_lock_irqsave(&ring->execlist_lock, flags); + + was_empty = list_empty(&ring->execlist_queue); + list_add_tail(&req->execlist_link, &ring->execlist_queue); + if (was_empty) + execlists_context_unqueue(ring); + + spin_unlock_irqrestore(&ring->execlist_lock, flags); + + return 0; +} + static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf) { struct intel_engine_cs *ring = ringbuf->ring; @@ -405,8 +465,7 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) if (intel_ring_stopped(ring)) return; - /* FIXME: too cheeky, we don't even check if the ELSP is ready */ - execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0); + execlists_context_queue(ring, ctx, ringbuf->tail); } static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, @@ -846,6 +905,9 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin INIT_LIST_HEAD(&ring->request_list); init_waitqueue_head(&ring->irq_queue); + INIT_LIST_HEAD(&ring->execlist_queue); + spin_lock_init(&ring->execlist_lock); + ret = intel_lr_context_deferred_create(dctx, ring); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 69605b158235..3c389b3a2b75 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -59,4 +59,12 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, u64 exec_start, u32 flags); u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); +struct intel_ctx_submit_request { + struct intel_context *ctx; + struct intel_engine_cs *ring; + u32 tail; + + struct list_head execlist_link; +}; + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 26785ca72530..670262dabb6c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -231,6 +231,8 @@ struct intel_engine_cs { } semaphore; /* Execlists */ + spinlock_t execlist_lock; + struct list_head execlist_queue; u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct intel_ringbuffer *ringbuf); int (*emit_flush)(struct intel_ringbuffer *ringbuf, From e981e7b17f2b41970e7e2367d4225e0bb3310667 Mon Sep 17 00:00:00 2001 From: Thomas Daniel Date: Thu, 24 Jul 2014 17:04:39 +0100 Subject: [PATCH 167/640] drm/i915/bdw: Handle context switch events Handle all context status events in the context status buffer on every context switch interrupt. We only remove work from the execlist queue after a context status buffer reports that it has completed and we only attempt to schedule new contexts on interrupt when a previously submitted context completes (unless no contexts are queued, which means the GPU is free). We canot call intel_runtime_pm_get() in an interrupt (or with a spinlock grabbed, FWIW), because it might sleep, which is not a nice thing to do. Instead, do the runtime_pm get/put together with the create/destroy request, and handle the forcewake get/put directly. Signed-off-by: Thomas Daniel v2: Unreferencing the context when we are freeing the request might free the backing bo, which requires the struct_mutex to be grabbed, so defer unreferencing and freeing to a bottom half. v3: - Ack the interrupt inmediately, before trying to handle it (fix for missing interrupts by Bob Beckett ). - Update the Context Status Buffer Read Pointer, just in case (spotted by Damien Lespiau). v4: New namespace and multiple rebase changes. v5: Squash with "drm/i915/bdw: Do not call intel_runtime_pm_get() in an interrupt", as suggested by Daniel. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch ...] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_irq.c | 35 +++++-- drivers/gpu/drm/i915/intel_lrc.c | 133 ++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_lrc.h | 3 + drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 155 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 00957fa0b877..f5d6795887d2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1632,6 +1632,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, struct drm_i915_private *dev_priv, u32 master_ctl) { + struct intel_engine_cs *ring; u32 rcs, bcs, vcs; uint32_t tmp = 0; irqreturn_t ret = IRQ_NONE; @@ -1641,14 +1642,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(0), tmp); ret = IRQ_HANDLED; + rcs = tmp >> GEN8_RCS_IRQ_SHIFT; - bcs = tmp >> GEN8_BCS_IRQ_SHIFT; + ring = &dev_priv->ring[RCS]; if (rcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[RCS]); + notify_ring(dev, ring); + if (rcs & GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); + + bcs = tmp >> GEN8_BCS_IRQ_SHIFT; + ring = &dev_priv->ring[BCS]; if (bcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[BCS]); - if ((rcs | bcs) & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + notify_ring(dev, ring); + if (bcs & GT_CONTEXT_SWITCH_INTERRUPT) + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR("The master control interrupt lied (GT0)!\n"); } @@ -1658,16 +1665,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(1), tmp); ret = IRQ_HANDLED; + vcs = tmp >> GEN8_VCS1_IRQ_SHIFT; + ring = &dev_priv->ring[VCS]; if (vcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[VCS]); + notify_ring(dev, ring); if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + intel_execlists_handle_ctx_events(ring); + vcs = tmp >> GEN8_VCS2_IRQ_SHIFT; + ring = &dev_priv->ring[VCS2]; if (vcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[VCS2]); + notify_ring(dev, ring); if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR("The master control interrupt lied (GT1)!\n"); } @@ -1688,11 +1699,13 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, if (tmp) { I915_WRITE(GEN8_GT_IIR(3), tmp); ret = IRQ_HANDLED; + vcs = tmp >> GEN8_VECS_IRQ_SHIFT; + ring = &dev_priv->ring[VECS]; if (vcs & GT_RENDER_USER_INTERRUPT) - notify_ring(dev, &dev_priv->ring[VECS]); + notify_ring(dev, ring); if (vcs & GT_CONTEXT_SWITCH_INTERRUPT) - DRM_DEBUG_DRIVER("TODO: Context switch\n"); + intel_execlists_handle_ctx_events(ring); } else DRM_ERROR("The master control interrupt lied (GT3)!\n"); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e81f5f6c49b9..22f6a7c0cb18 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -49,6 +49,22 @@ #define RING_ELSP(ring) ((ring)->mmio_base+0x230) #define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) +#define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) +#define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) + +#define RING_EXECLIST_QFULL (1 << 0x2) +#define RING_EXECLIST1_VALID (1 << 0x3) +#define RING_EXECLIST0_VALID (1 << 0x4) +#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) +#define RING_EXECLIST1_ACTIVE (1 << 0x11) +#define RING_EXECLIST0_ACTIVE (1 << 0x12) + +#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) +#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) +#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) +#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) +#define GEN8_CTX_STATUS_COMPLETE (1 << 4) +#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) #define CTX_LRI_HEADER_0 0x01 #define CTX_CONTEXT_CONTROL 0x02 @@ -150,6 +166,7 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, struct drm_i915_private *dev_priv = ring->dev->dev_private; uint64_t temp = 0; uint32_t desc[4]; + unsigned long flags; /* XXX: You must always write both descriptors in the order below. */ if (ctx_obj1) @@ -163,9 +180,17 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, desc[3] = (u32)(temp >> 32); desc[2] = (u32)temp; - /* Set Force Wakeup bit to prevent GT from entering C6 while - * ELSP writes are in progress */ - gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + /* Set Force Wakeup bit to prevent GT from entering C6 while ELSP writes + * are in progress. + * + * The other problem is that we can't just call gen6_gt_force_wake_get() + * because that function calls intel_runtime_pm_get(), which might sleep. + * Instead, we do the runtime_pm_get/put when creating/destroying requests. + */ + spin_lock_irqsave(&dev_priv->uncore.lock, flags); + if (dev_priv->uncore.forcewake_count++ == 0) + dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); I915_WRITE(RING_ELSP(ring), desc[1]); I915_WRITE(RING_ELSP(ring), desc[0]); @@ -176,7 +201,11 @@ static void execlists_elsp_write(struct intel_engine_cs *ring, /* ELSP is a wo register, so use another nearby reg for posting instead */ POSTING_READ(RING_EXECLIST_STATUS(ring)); - gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + /* Release Force Wakeup (see the big comment above). */ + spin_lock_irqsave(&dev_priv->uncore.lock, flags); + if (--dev_priv->uncore.forcewake_count == 0) + dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL); + spin_unlock_irqrestore(&dev_priv->uncore.lock, flags); } static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail) @@ -224,6 +253,9 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) { struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL; struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + + assert_spin_locked(&ring->execlist_lock); if (list_empty(&ring->execlist_queue)) return; @@ -237,8 +269,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) /* Same ctx: ignore first request, as second request * will update tail past first request's workload */ list_del(&req0->execlist_link); - i915_gem_context_unreference(req0->ctx); - kfree(req0); + queue_work(dev_priv->wq, &req0->work); req0 = cursor; } else { req1 = cursor; @@ -251,11 +282,97 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) req1 ? req1->tail : 0)); } +static bool execlists_check_remove_request(struct intel_engine_cs *ring, + u32 request_id) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct intel_ctx_submit_request *head_req; + + assert_spin_locked(&ring->execlist_lock); + + head_req = list_first_entry_or_null(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + + if (head_req != NULL) { + struct drm_i915_gem_object *ctx_obj = + head_req->ctx->engine[ring->id].state; + if (intel_execlists_ctx_id(ctx_obj) == request_id) { + list_del(&head_req->execlist_link); + queue_work(dev_priv->wq, &head_req->work); + return true; + } + } + + return false; +} + +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + u32 status_pointer; + u8 read_pointer; + u8 write_pointer; + u32 status; + u32 status_id; + u32 submit_contexts = 0; + + status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring)); + + read_pointer = ring->next_context_status_buffer; + write_pointer = status_pointer & 0x07; + if (read_pointer > write_pointer) + write_pointer += 6; + + spin_lock(&ring->execlist_lock); + + while (read_pointer < write_pointer) { + read_pointer++; + status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + + (read_pointer % 6) * 8); + status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + + (read_pointer % 6) * 8 + 4); + + if (status & GEN8_CTX_STATUS_COMPLETE) { + if (execlists_check_remove_request(ring, status_id)) + submit_contexts++; + } + } + + if (submit_contexts != 0) + execlists_context_unqueue(ring); + + spin_unlock(&ring->execlist_lock); + + WARN(submit_contexts > 2, "More than two context complete events?\n"); + ring->next_context_status_buffer = write_pointer % 6; + + I915_WRITE(RING_CONTEXT_STATUS_PTR(ring), + ((u32)ring->next_context_status_buffer & 0x07) << 8); +} + +static void execlists_free_request_task(struct work_struct *work) +{ + struct intel_ctx_submit_request *req = + container_of(work, struct intel_ctx_submit_request, work); + struct drm_device *dev = req->ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + intel_runtime_pm_put(dev_priv); + + mutex_lock(&dev->struct_mutex); + i915_gem_context_unreference(req->ctx); + mutex_unlock(&dev->struct_mutex); + + kfree(req); +} + static int execlists_context_queue(struct intel_engine_cs *ring, struct intel_context *to, u32 tail) { struct intel_ctx_submit_request *req = NULL; + struct drm_i915_private *dev_priv = ring->dev->dev_private; unsigned long flags; bool was_empty; @@ -266,6 +383,9 @@ static int execlists_context_queue(struct intel_engine_cs *ring, i915_gem_context_reference(req->ctx); req->ring = ring; req->tail = tail; + INIT_WORK(&req->work, execlists_free_request_task); + + intel_runtime_pm_get(dev_priv); spin_lock_irqsave(&ring->execlist_lock, flags); @@ -907,6 +1027,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin INIT_LIST_HEAD(&ring->execlist_queue); spin_lock_init(&ring->execlist_lock); + ring->next_context_status_buffer = 0; ret = intel_lr_context_deferred_create(dctx, ring); if (ret) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 3c389b3a2b75..a3f135cf439e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -65,6 +65,9 @@ struct intel_ctx_submit_request { u32 tail; struct list_head execlist_link; + struct work_struct work; }; +void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 670262dabb6c..9cbf7b0ebc99 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -233,6 +233,7 @@ struct intel_engine_cs { /* Execlists */ spinlock_t execlist_lock; struct list_head execlist_queue; + u8 next_context_status_buffer; u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ int (*emit_request)(struct intel_ringbuffer *ringbuf); int (*emit_flush)(struct intel_ringbuffer *ringbuf, From e1fee72c2ea2e9c0c6e6743d32a6832f21337d6c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:40 +0100 Subject: [PATCH 168/640] drm/i915/bdw: Avoid non-lite-restore preemptions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the current Execlists feeding mechanism, full preemption is not supported yet: only lite-restores are allowed (this is: the GPU simply samples a new tail pointer for the context currently in execution). But we have identified an scenario in which a full preemption occurs: 1) We submit two contexts for execution (A & B). 2) The GPU finishes with the first one (A), switches to the second one (B) and informs us. 3) We submit B again (hoping to cause a lite restore) together with C, but in the time we spend writing to the ELSP, the GPU finishes B. 4) The GPU start executing B again (since we told it so). 5) We receive a B finished interrupt and, mistakenly, we submit C (again) and D, causing a full preemption of B. The race is avoided by keeping track of how many times a context has been submitted to the hardware and by better discriminating the received context switch interrupts: in the example, when we have submitted B twice, we won´t submit C and D as soon as we receive the notification that B is completed because we were expecting to get a LITE_RESTORE and we didn´t, so we know a second completion will be received shortly. Without this explicit checking, somehow, the batch buffer execution order gets messed with. This can be verified with the IGT test I sent together with the series. I don´t know the exact mechanism by which the pre-emption messes with the execution order but, since other people is working on the Scheduler + Preemption on Execlists, I didn´t try to fix it. In these series, only Lite Restores are supported (other kind of preemptions WARN). v2: elsp_submitted belongs in the new intel_ctx_submit_request. Several rebase changes. v3: Clarify how the race is avoided, as requested by Daniel. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Align function parameters ...] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_lrc.h | 2 ++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 22f6a7c0cb18..0f1b6b2b0f0e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -268,6 +268,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) } else if (req0->ctx == cursor->ctx) { /* Same ctx: ignore first request, as second request * will update tail past first request's workload */ + cursor->elsp_submitted = req0->elsp_submitted; list_del(&req0->execlist_link); queue_work(dev_priv->wq, &req0->work); req0 = cursor; @@ -277,9 +278,15 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) } } + WARN_ON(req1 && req1->elsp_submitted); + WARN_ON(execlists_submit_context(ring, req0->ctx, req0->tail, req1 ? req1->ctx : NULL, req1 ? req1->tail : 0)); + + req0->elsp_submitted++; + if (req1) + req1->elsp_submitted++; } static bool execlists_check_remove_request(struct intel_engine_cs *ring, @@ -298,9 +305,14 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, struct drm_i915_gem_object *ctx_obj = head_req->ctx->engine[ring->id].state; if (intel_execlists_ctx_id(ctx_obj) == request_id) { - list_del(&head_req->execlist_link); - queue_work(dev_priv->wq, &head_req->work); - return true; + WARN(head_req->elsp_submitted == 0, + "Never submitted head request\n"); + + if (--head_req->elsp_submitted <= 0) { + list_del(&head_req->execlist_link); + queue_work(dev_priv->wq, &head_req->work); + return true; + } } } @@ -333,7 +345,16 @@ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + (read_pointer % 6) * 8 + 4); - if (status & GEN8_CTX_STATUS_COMPLETE) { + if (status & GEN8_CTX_STATUS_PREEMPTED) { + if (status & GEN8_CTX_STATUS_LITE_RESTORE) { + if (execlists_check_remove_request(ring, status_id)) + WARN(1, "Lite Restored request removed from queue\n"); + } else + WARN(1, "Preemption without Lite Restore\n"); + } + + if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) || + (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) { if (execlists_check_remove_request(ring, status_id)) submit_contexts++; } diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index a3f135cf439e..331c6c2ba376 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -66,6 +66,8 @@ struct intel_ctx_submit_request { struct list_head execlist_link; struct work_struct work; + + int elsp_submitted; }; void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring); From f1ad5a1fd4127b3a5e21b8f5ef7f1921a5d3063e Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:41 +0100 Subject: [PATCH 169/640] drm/i915/bdw: Help out the ctx switch interrupt handler If we receive a storm of requests for the same context (see gem_storedw_loop_*) we might end up iterating over too many elements in interrupt time, looking for contexts to squash together. Instead, share the burden by giving more intelligence to the queue function. At most, the interrupt will iterate over three elements. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_lrc.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0f1b6b2b0f0e..6f6c5a931faf 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -392,10 +392,10 @@ static int execlists_context_queue(struct intel_engine_cs *ring, struct intel_context *to, u32 tail) { - struct intel_ctx_submit_request *req = NULL; + struct intel_ctx_submit_request *req = NULL, *cursor; struct drm_i915_private *dev_priv = ring->dev->dev_private; unsigned long flags; - bool was_empty; + int num_elements = 0; req = kzalloc(sizeof(*req), GFP_KERNEL); if (req == NULL) @@ -410,9 +410,27 @@ static int execlists_context_queue(struct intel_engine_cs *ring, spin_lock_irqsave(&ring->execlist_lock, flags); - was_empty = list_empty(&ring->execlist_queue); + list_for_each_entry(cursor, &ring->execlist_queue, execlist_link) + if (++num_elements > 2) + break; + + if (num_elements > 2) { + struct intel_ctx_submit_request *tail_req; + + tail_req = list_last_entry(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + + if (to == tail_req->ctx) { + WARN(tail_req->elsp_submitted != 0, + "More than 2 already-submitted reqs queued\n"); + list_del(&tail_req->execlist_link); + queue_work(dev_priv->wq, &tail_req->work); + } + } + list_add_tail(&req->execlist_link, &ring->execlist_queue); - if (was_empty) + if (num_elements == 0) execlists_context_unqueue(ring); spin_unlock_irqrestore(&ring->execlist_lock, flags); From 4ed91096881449012b14b1e879f40b4a37533e0e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 8 Aug 2014 20:27:01 +0200 Subject: [PATCH 170/640] drm/i915: Track cursor changes as frontbuffer tracking flushes We treat other plane updates in the same fashion. Spotted because Rodrigo kept reporting a bug in the PSR code where the frontbuffer was eternally stuck with a dirty cursor bit set. The psr testcase should have caught this, but that i-g-t is kaputt. Rodrigo is signed up to fix that. Cc: Rodrigo Vivi Tested-by-and-Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fd15601f6360..b2e4eac7b70b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11782,6 +11782,10 @@ intel_cursor_plane_update(struct drm_plane *plane, struct drm_crtc *crtc, return intel_crtc_cursor_set_obj(crtc, obj, crtc_w, crtc_h); } else { intel_crtc_update_cursor(crtc, visible); + + intel_frontbuffer_flip(crtc->dev, + INTEL_FRONTBUFFER_CURSOR(intel_crtc->pipe)); + return 0; } } From ba07975f0fe5bf95107d71d0df0405c16f5c3266 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:30 -0700 Subject: [PATCH 171/640] gpu: ipu-v3: Add functions to set CSI/IC source muxes Adds two new functions, ipu_set_csi_src_mux() and ipu_set_ic_src_mux(), that select the inputs to the CSI and IC respectively. Both muxes are programmed in the IPU_CONF register. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 51 +++++++++++++++++++++++++++++++++ include/video/imx-ipu-v3.h | 6 ++++ 2 files changed, 57 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 5978e7aab8ed..cae543115856 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -382,6 +382,57 @@ static int ipu_memory_reset(struct ipu_soc *ipu) return 0; } +/* + * Set the source mux for the given CSI. Selects either parallel or + * MIPI CSI2 sources. + */ +void ipu_set_csi_src_mux(struct ipu_soc *ipu, int csi_id, bool mipi_csi2) +{ + unsigned long flags; + u32 val, mask; + + mask = (csi_id == 1) ? IPU_CONF_CSI1_DATA_SOURCE : + IPU_CONF_CSI0_DATA_SOURCE; + + spin_lock_irqsave(&ipu->lock, flags); + + val = ipu_cm_read(ipu, IPU_CONF); + if (mipi_csi2) + val |= mask; + else + val &= ~mask; + ipu_cm_write(ipu, val, IPU_CONF); + + spin_unlock_irqrestore(&ipu->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_set_csi_src_mux); + +/* + * Set the source mux for the IC. Selects either CSI[01] or the VDI. + */ +void ipu_set_ic_src_mux(struct ipu_soc *ipu, int csi_id, bool vdi) +{ + unsigned long flags; + u32 val; + + spin_lock_irqsave(&ipu->lock, flags); + + val = ipu_cm_read(ipu, IPU_CONF); + if (vdi) { + val |= IPU_CONF_IC_INPUT; + } else { + val &= ~IPU_CONF_IC_INPUT; + if (csi_id == 1) + val |= IPU_CONF_CSI_SEL; + else + val &= ~IPU_CONF_CSI_SEL; + } + ipu_cm_write(ipu, val, IPU_CONF); + + spin_unlock_irqrestore(&ipu->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_set_ic_src_mux); + struct ipu_devtype { const char *name; unsigned long cm_ofs; diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index ef64b66b18df..f80fe13b0d4d 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -92,6 +92,12 @@ int ipu_idmac_channel_irq(struct ipu_soc *ipu, struct ipuv3_channel *channel, #define IPU_IRQ_VSYNC_PRE_0 (448 + 14) #define IPU_IRQ_VSYNC_PRE_1 (448 + 15) +/* + * IPU Common functions + */ +void ipu_set_csi_src_mux(struct ipu_soc *ipu, int csi_id, bool mipi_csi2); +void ipu_set_ic_src_mux(struct ipu_soc *ipu, int csi_id, bool vdi); + /* * IPU Image DMA Controller (idmac) functions */ From c2d670fd3b16304124162bef99313eaa289f2bc3 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:31 -0700 Subject: [PATCH 172/640] gpu: ipu-v3: Rename and add IDMAC channels Rename the ENC/VF/PP rotation channel names, to be more consistent with the convention that *_MEM is write-to-memory channels and MEM_* is read-from-memory channels. Also add the channels who's source and destination is the IC. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-prv.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h index 0a7b2adaba39..1a5c55c05fe8 100644 --- a/drivers/gpu/ipu-v3/ipu-prv.h +++ b/drivers/gpu/ipu-v3/ipu-prv.h @@ -28,17 +28,25 @@ struct ipu_soc; #define IPUV3_CHANNEL_CSI1 1 #define IPUV3_CHANNEL_CSI2 2 #define IPUV3_CHANNEL_CSI3 3 +#define IPUV3_CHANNEL_VDI_MEM_IC_VF 5 +#define IPUV3_CHANNEL_MEM_IC_PP 11 +#define IPUV3_CHANNEL_MEM_IC_PRP_VF 12 +#define IPUV3_CHANNEL_G_MEM_IC_PRP_VF 14 +#define IPUV3_CHANNEL_G_MEM_IC_PP 15 +#define IPUV3_CHANNEL_IC_PRP_ENC_MEM 20 +#define IPUV3_CHANNEL_IC_PRP_VF_MEM 21 +#define IPUV3_CHANNEL_IC_PP_MEM 22 #define IPUV3_CHANNEL_MEM_BG_SYNC 23 #define IPUV3_CHANNEL_MEM_FG_SYNC 27 #define IPUV3_CHANNEL_MEM_DC_SYNC 28 #define IPUV3_CHANNEL_MEM_FG_SYNC_ALPHA 31 #define IPUV3_CHANNEL_MEM_DC_ASYNC 41 -#define IPUV3_CHANNEL_ROT_ENC_MEM 45 -#define IPUV3_CHANNEL_ROT_VF_MEM 46 -#define IPUV3_CHANNEL_ROT_PP_MEM 47 -#define IPUV3_CHANNEL_ROT_ENC_MEM_OUT 48 -#define IPUV3_CHANNEL_ROT_VF_MEM_OUT 49 -#define IPUV3_CHANNEL_ROT_PP_MEM_OUT 50 +#define IPUV3_CHANNEL_MEM_ROT_ENC 45 +#define IPUV3_CHANNEL_MEM_ROT_VF 46 +#define IPUV3_CHANNEL_MEM_ROT_PP 47 +#define IPUV3_CHANNEL_ROT_ENC_MEM 48 +#define IPUV3_CHANNEL_ROT_VF_MEM 49 +#define IPUV3_CHANNEL_ROT_PP_MEM 50 #define IPUV3_CHANNEL_MEM_BG_SYNC_ALPHA 51 #define IPU_MCU_T_DEFAULT 8 From b7c71823f11158340b9d61325d3c44124650dc4e Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Fri, 15 Aug 2014 12:01:31 +0100 Subject: [PATCH 173/640] drm/i915/bdw: Don't write PDP in the legacy way when using LRCs This is mostly for correctness so that we know we are running the LR context correctly (this is, the PDPs are contained inside the context object). v2: Move the check to inside the enable PPGTT function. The switch happens in two places: the legacy context switch (that we won't hit when Execlists are enabled) and the PPGTT enable, which unfortunately we need. This would look much nicer if the ppgtt->enable was part of the ring init, where it logically belongs. v3: Move the check to the start of the enable PPGTT function. None of the legacy PPGTT enabling is required when using LRCs as the PPGTT is enabled in the context descriptor and the PDPs are written in the LRC. v4: Clarify comment based on review feedback. Signed-off-by: Oscar Mateo Signed-off-by: Thomas Daniel Reviewed-by: Damien Lespiau [danvet: Resolve conflicts with ppgtt_enable rework.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d97b280861ee..4db237065610 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -826,6 +826,12 @@ static void gen8_ppgtt_enable(struct drm_device *dev) struct intel_engine_cs *ring; int j; + /* In the case of execlists, PPGTT is enabled by the context descriptor + * and the PDPs are contained within the context itself. We don't + * need to do anything here. */ + if (i915.enable_execlists) + return; + for_each_ring(ring, dev_priv, j) { I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); From cc9130be805d955f0e06642e57741dd9df1fbc86 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:42 +0100 Subject: [PATCH 174/640] drm/i915/bdw: Make sure gpu reset still works with Execlists If we reset a ring after a hang, we have to make sure that we clear out all queued Execlists requests. v2: The ring is, at this point, already being correctly re-programmed for Execlists, and the hangcheck counters cleared. v3: Daniel suggests to drop the "if (execlists)" because the Execlists queue should be empty in legacy mode (which is true, if we do the INIT_LIST_HEAD). v4: Do the pending intel_runtime_pm_put Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 32fa1e9eb844..aa4103bdd352 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2551,6 +2551,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, i915_gem_free_request(request); } + while (!list_empty(&ring->execlist_queue)) { + struct intel_ctx_submit_request *submit_req; + + submit_req = list_first_entry(&ring->execlist_queue, + struct intel_ctx_submit_request, + execlist_link); + list_del(&submit_req->execlist_link); + intel_runtime_pm_put(dev_priv); + i915_gem_context_unreference(submit_req->ctx); + kfree(submit_req); + } + /* These may not have been flush before the reset, do so now */ kfree(ring->preallocated_lazy_request); ring->preallocated_lazy_request = NULL; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 13543f8528c2..4fb1ec95ec08 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1612,6 +1612,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); + INIT_LIST_HEAD(&ring->execlist_queue); ringbuf->size = 32 * PAGE_SIZE; ringbuf->ring = ring; memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); From 71386ef9008817feebd863e46d8711ebe9e7cbbb Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:44 +0100 Subject: [PATCH 175/640] drm/i915/bdw: Disable semaphores for Execlists Up until recently, semaphores weren't enabled in BDW so we didn't care about them. But then Rodrigo came and enabled them: commit 521e62e49a42661a4ee0102644517dbe2f100a23 Author: Rodrigo Vivi drm/i915: Enable semaphores on BDW So now we have to explicitly disable them for Execlists until both features play nicely. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2f112853c36f..117f5c16df74 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -481,6 +481,10 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) if (i915.semaphores >= 0) return i915.semaphores; + /* TODO: make semaphores and Execlists play nicely together */ + if (i915.enable_execlists) + return false; + /* Until we get further testing... */ if (IS_GEN8(dev)) return false; From 4ba70e448be91f52032595678c306e4aee2fae5c Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 7 Aug 2014 13:23:20 +0100 Subject: [PATCH 176/640] drm/i915/bdw: Display execlists info in debugfs v2: Warn and return if LRCs are not enabled. v3: Grab the Execlists spinlock (noticed by Daniel Vetter). Signed-off-by: Oscar Mateo v4: Lock the struct mutex for atomic state capture Signed-off-by: Thomas Daniel Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 81 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 6 --- drivers/gpu/drm/i915/intel_lrc.h | 7 +++ 3 files changed, 88 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d42db6bc34e0..68335813ef4c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1721,6 +1721,86 @@ static int i915_context_status(struct seq_file *m, void *unused) return 0; } +static int i915_execlists(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + u32 status_pointer; + u8 read_pointer; + u8 write_pointer; + u32 status; + u32 ctx_id; + struct list_head *cursor; + int ring_id, i; + int ret; + + if (!i915.enable_execlists) { + seq_puts(m, "Logical Ring Contexts are disabled\n"); + return 0; + } + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + for_each_ring(ring, dev_priv, ring_id) { + struct intel_ctx_submit_request *head_req = NULL; + int count = 0; + unsigned long flags; + + seq_printf(m, "%s\n", ring->name); + + status = I915_READ(RING_EXECLIST_STATUS(ring)); + ctx_id = I915_READ(RING_EXECLIST_STATUS(ring) + 4); + seq_printf(m, "\tExeclist status: 0x%08X, context: %u\n", + status, ctx_id); + + status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring)); + seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer); + + read_pointer = ring->next_context_status_buffer; + write_pointer = status_pointer & 0x07; + if (read_pointer > write_pointer) + write_pointer += 6; + seq_printf(m, "\tRead pointer: 0x%08X, write pointer 0x%08X\n", + read_pointer, write_pointer); + + for (i = 0; i < 6; i++) { + status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i); + ctx_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i + 4); + + seq_printf(m, "\tStatus buffer %d: 0x%08X, context: %u\n", + i, status, ctx_id); + } + + spin_lock_irqsave(&ring->execlist_lock, flags); + list_for_each(cursor, &ring->execlist_queue) + count++; + head_req = list_first_entry_or_null(&ring->execlist_queue, + struct intel_ctx_submit_request, execlist_link); + spin_unlock_irqrestore(&ring->execlist_lock, flags); + + seq_printf(m, "\t%d requests in queue\n", count); + if (head_req) { + struct drm_i915_gem_object *ctx_obj; + + ctx_obj = head_req->ctx->engine[ring_id].state; + seq_printf(m, "\tHead request id: %u\n", + intel_execlists_ctx_id(ctx_obj)); + seq_printf(m, "\tHead request tail: %u\n", + head_req->tail); + } + + seq_putc(m, '\n'); + } + + mutex_unlock(&dev->struct_mutex); + + return 0; +} + static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; @@ -3974,6 +4054,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_opregion", i915_opregion, 0}, {"i915_gem_framebuffer", i915_gem_framebuffer_info, 0}, {"i915_context_status", i915_context_status, 0}, + {"i915_execlists", i915_execlists, 0}, {"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0}, {"i915_swizzle_info", i915_swizzle_info, 0}, {"i915_ppgtt_info", i915_ppgtt_info, 0}, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6f6c5a931faf..cc923a96fa4c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -46,12 +46,6 @@ #define GEN8_LR_CONTEXT_ALIGN 4096 -#define RING_ELSP(ring) ((ring)->mmio_base+0x230) -#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) -#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) -#define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) -#define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) - #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) #define RING_EXECLIST0_VALID (1 << 0x4) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 331c6c2ba376..117d1a4eb3b9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -24,6 +24,13 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ +/* Execlists regs */ +#define RING_ELSP(ring) ((ring)->mmio_base+0x230) +#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) +#define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) +#define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) +#define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) + /* Logical Rings */ void intel_logical_ring_stop(struct intel_engine_cs *ring); void intel_logical_ring_cleanup(struct intel_engine_cs *ring); From c9fe99bd4c4f8730207fed5e863d8f25224fd20b Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:46 +0100 Subject: [PATCH 177/640] drm/i915/bdw: Display context backing obj & ringbuffer info in debugfs Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau [danvet: Checkpatch.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 37 +++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 68335813ef4c..4f279cfe67b8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1677,6 +1677,14 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) return 0; } +static void describe_ctx_ringbuf(struct seq_file *m, + struct intel_ringbuffer *ringbuf) +{ + seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)", + ringbuf->space, ringbuf->head, ringbuf->tail, + ringbuf->last_retired_head); +} + static int i915_context_status(struct seq_file *m, void *unused) { struct drm_info_node *node = m->private; @@ -1703,16 +1711,37 @@ static int i915_context_status(struct seq_file *m, void *unused) } list_for_each_entry(ctx, &dev_priv->context_list, link) { - if (ctx->legacy_hw_ctx.rcs_state == NULL) + if (!i915.enable_execlists && + ctx->legacy_hw_ctx.rcs_state == NULL) continue; seq_puts(m, "HW context "); describe_ctx(m, ctx); - for_each_ring(ring, dev_priv, i) + for_each_ring(ring, dev_priv, i) { if (ring->default_context == ctx) - seq_printf(m, "(default context %s) ", ring->name); + seq_printf(m, "(default context %s) ", + ring->name); + } + + if (i915.enable_execlists) { + seq_putc(m, '\n'); + for_each_ring(ring, dev_priv, i) { + struct drm_i915_gem_object *ctx_obj = + ctx->engine[i].state; + struct intel_ringbuffer *ringbuf = + ctx->engine[i].ringbuf; + + seq_printf(m, "%s: ", ring->name); + if (ctx_obj) + describe_obj(m, ctx_obj); + if (ringbuf) + describe_ctx_ringbuf(m, ringbuf); + seq_putc(m, '\n'); + } + } else { + describe_obj(m, ctx->legacy_hw_ctx.rcs_state); + } - describe_obj(m, ctx->legacy_hw_ctx.rcs_state); seq_putc(m, '\n'); } From c0ab1ae9028f14bcb7bfb655bd2120c60681c479 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 7 Aug 2014 13:24:26 +0100 Subject: [PATCH 178/640] drm/i915/bdw: Print context state in debugfs This has turned out to be really handy in debug so far. Update: Since writing this patch, I've gotten similar code upstream for error state. I've used it quite a bit in debugfs however, and I'd like to keep it here at least until preemption is working. Signed-off-by: Ben Widawsky This patch was accidentally dropped in the first Execlists version, and it has been very useful indeed. Put it back again, but as a standalone debugfs file. Signed-off-by: Oscar Mateo v2: Take the device struct_mutex rather than mode_config mutex for atomic state capture. Signed-off-by: Thomas Daniel Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_debugfs.c | 52 +++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4f279cfe67b8..6c82bdaa0822 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1750,6 +1750,57 @@ static int i915_context_status(struct seq_file *m, void *unused) return 0; } +static int i915_dump_lrc(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_engine_cs *ring; + struct intel_context *ctx; + int ret, i; + + if (!i915.enable_execlists) { + seq_printf(m, "Logical Ring Contexts are disabled\n"); + return 0; + } + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + list_for_each_entry(ctx, &dev_priv->context_list, link) { + for_each_ring(ring, dev_priv, i) { + struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; + + if (ring->default_context == ctx) + continue; + + if (ctx_obj) { + struct page *page = i915_gem_object_get_page(ctx_obj, 1); + uint32_t *reg_state = kmap_atomic(page); + int j; + + seq_printf(m, "CONTEXT: %s %u\n", ring->name, + intel_execlists_ctx_id(ctx_obj)); + + for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { + seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n", + i915_gem_obj_ggtt_offset(ctx_obj) + 4096 + (j * 4), + reg_state[j], reg_state[j + 1], + reg_state[j + 2], reg_state[j + 3]); + } + kunmap_atomic(reg_state); + + seq_putc(m, '\n'); + } + } + } + + mutex_unlock(&dev->struct_mutex); + + return 0; +} + static int i915_execlists(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *)m->private; @@ -4083,6 +4134,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_opregion", i915_opregion, 0}, {"i915_gem_framebuffer", i915_gem_framebuffer_info, 0}, {"i915_context_status", i915_context_status, 0}, + {"i915_dump_lrc", i915_dump_lrc, 0}, {"i915_execlists", i915_execlists, 0}, {"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0}, {"i915_swizzle_info", i915_swizzle_info, 0}, From 73e4d07f8ae9cff8c869d73df4e299a3a6f5ad98 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:48 +0100 Subject: [PATCH 179/640] drm/i915/bdw: Document Logical Rings, LR contexts and Execlists Add theory of operation notes to intel_lrc.c and comments to externally visible functions. v2: Add notes on logical ring context creation. v3: Use kerneldoc. v4: Integrate it in the DocBook template. Signed-off-by: Thomas Daniel (v1) Signed-off-by: Oscar Mateo (v2, v3) Reviewed-by: Damien Lespiau [danvet: Drop hunk about render ring init function since that's not yet merged.] Signed-off-by: Daniel Vetter --- Documentation/DocBook/drm.tmpl | 5 + drivers/gpu/drm/i915/intel_lrc.c | 203 ++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_lrc.h | 30 +++++ 3 files changed, 237 insertions(+), 1 deletion(-) diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 972759489376..689e3e38b9c3 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -3919,6 +3919,11 @@ int num_ioctls; !Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser !Idrivers/gpu/drm/i915/i915_cmd_parser.c + + Logical Rings, Logical Ring Contexts and Execlists +!Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and Execlists +!Idrivers/gpu/drm/i915/intel_lrc.c + diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index cc923a96fa4c..c096b9b7f22a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -28,13 +28,108 @@ * */ -/* +/** + * DOC: Logical Rings, Logical Ring Contexts and Execlists + * + * Motivation: * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". * These expanded contexts enable a number of new abilities, especially * "Execlists" (also implemented in this file). * + * One of the main differences with the legacy HW contexts is that logical + * ring contexts incorporate many more things to the context's state, like + * PDPs or ringbuffer control registers: + * + * The reason why PDPs are included in the context is straightforward: as + * PPGTTs (per-process GTTs) are actually per-context, having the PDPs + * contained there mean you don't need to do a ppgtt->switch_mm yourself, + * instead, the GPU will do it for you on the context switch. + * + * But, what about the ringbuffer control registers (head, tail, etc..)? + * shouldn't we just need a set of those per engine command streamer? This is + * where the name "Logical Rings" starts to make sense: by virtualizing the + * rings, the engine cs shifts to a new "ring buffer" with every context + * switch. When you want to submit a workload to the GPU you: A) choose your + * context, B) find its appropriate virtualized ring, C) write commands to it + * and then, finally, D) tell the GPU to switch to that context. + * + * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch + * to a contexts is via a context execution list, ergo "Execlists". + * + * LRC implementation: + * Regarding the creation of contexts, we have: + * + * - One global default context. + * - One local default context for each opened fd. + * - One local extra context for each context create ioctl call. + * + * Now that ringbuffers belong per-context (and not per-engine, like before) + * and that contexts are uniquely tied to a given engine (and not reusable, + * like before) we need: + * + * - One ringbuffer per-engine inside each context. + * - One backing object per-engine inside each context. + * + * The global default context starts its life with these new objects fully + * allocated and populated. The local default context for each opened fd is + * more complex, because we don't know at creation time which engine is going + * to use them. To handle this, we have implemented a deferred creation of LR + * contexts: + * + * The local context starts its life as a hollow or blank holder, that only + * gets populated for a given engine once we receive an execbuffer. If later + * on we receive another execbuffer ioctl for the same context but a different + * engine, we allocate/populate a new ringbuffer and context backing object and + * so on. + * + * Finally, regarding local contexts created using the ioctl call: as they are + * only allowed with the render ring, we can allocate & populate them right + * away (no need to defer anything, at least for now). + * + * Execlists implementation: * Execlists are the new method by which, on gen8+ hardware, workloads are * submitted for execution (as opposed to the legacy, ringbuffer-based, method). + * This method works as follows: + * + * When a request is committed, its commands (the BB start and any leading or + * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer + * for the appropriate context. The tail pointer in the hardware context is not + * updated at this time, but instead, kept by the driver in the ringbuffer + * structure. A structure representing this request is added to a request queue + * for the appropriate engine: this structure contains a copy of the context's + * tail after the request was written to the ring buffer and a pointer to the + * context itself. + * + * If the engine's request queue was empty before the request was added, the + * queue is processed immediately. Otherwise the queue will be processed during + * a context switch interrupt. In any case, elements on the queue will get sent + * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a + * globally unique 20-bits submission ID. + * + * When execution of a request completes, the GPU updates the context status + * buffer with a context complete event and generates a context switch interrupt. + * During the interrupt handling, the driver examines the events in the buffer: + * for each context complete event, if the announced ID matches that on the head + * of the request queue, then that request is retired and removed from the queue. + * + * After processing, if any requests were retired and the queue is not empty + * then a new execution list can be submitted. The two requests at the front of + * the queue are next to be submitted but since a context may not occur twice in + * an execution list, if subsequent requests have the same ID as the first then + * the two requests must be combined. This is done simply by discarding requests + * at the head of the queue until either only one requests is left (in which case + * we use a NULL second context) or the first two requests have unique IDs. + * + * By always executing the first two requests in the queue the driver ensures + * that the GPU is kept as busy as possible. In the case where a single context + * completes but a second context is still executing, the request for this second + * context will be at the head of the queue when we remove the first one. This + * request will then be resubmitted along with a new request for a different context, + * which will cause the hardware to continue executing the second request and queue + * the new request (the GPU detects the condition of a context getting preempted + * with the same context and optimizes the context switch flow by not doing + * preemption, but just sampling the new tail pointer). + * */ #include @@ -109,6 +204,17 @@ enum { }; #define GEN8_CTX_ID_SHIFT 32 +/** + * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists + * @dev: DRM device. + * @enable_execlists: value of i915.enable_execlists module parameter. + * + * Only certain platforms support Execlists (the prerequisites being + * support for Logical Ring Contexts and Aliasing PPGTT or better), + * and only when enabled via module parameter. + * + * Return: 1 if Execlists is supported and has to be enabled. + */ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists) { WARN_ON(i915.enable_ppgtt == -1); @@ -123,6 +229,18 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists return 0; } +/** + * intel_execlists_ctx_id() - get the Execlists Context ID + * @ctx_obj: Logical Ring Context backing object. + * + * Do not confuse with ctx->id! Unfortunately we have a name overload + * here: the old context ID we pass to userspace as a handler so that + * they can refer to a context, and the new context ID we pass to the + * ELSP so that the GPU can inform us of the context status via + * interrupts. + * + * Return: 20-bits globally unique context ID. + */ u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) { u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj); @@ -313,6 +431,13 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, return false; } +/** + * intel_execlists_handle_ctx_events() - handle Context Switch interrupts + * @ring: Engine Command Streamer to handle. + * + * Check the unread Context Status Buffers and manage the submission of new + * contexts to the ELSP accordingly. + */ void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; @@ -481,6 +606,23 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, return logical_ring_invalidate_all_caches(ringbuf); } +/** + * execlists_submission() - submit a batchbuffer for execution, Execlists style + * @dev: DRM device. + * @file: DRM file. + * @ring: Engine Command Streamer to submit to. + * @ctx: Context to employ for this submission. + * @args: execbuffer call arguments. + * @vmas: list of vmas. + * @batch_obj: the batchbuffer to submit. + * @exec_start: batchbuffer start virtual address pointer. + * @flags: translated execbuffer call flags. + * + * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts + * away the submission details of the execbuffer ioctl call. + * + * Return: non-zero if the submission fails. + */ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *ring, struct intel_context *ctx, @@ -608,6 +750,15 @@ int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf) return 0; } +/** + * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload + * @ringbuf: Logical Ringbuffer to advance. + * + * The tail is updated in our logical ringbuffer struct, not in the actual context. What + * really happens during submission is that the context and current tail will be placed + * on a queue waiting for the ELSP to be ready to accept a new context submission. At that + * point, the tail *inside* the context is updated and the ELSP written to. + */ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf) { struct intel_engine_cs *ring = ringbuf->ring; @@ -781,6 +932,19 @@ static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes) return 0; } +/** + * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands + * + * @ringbuf: Logical ringbuffer. + * @num_dwords: number of DWORDs that we plan to write to the ringbuffer. + * + * The ringbuffer might not be ready to accept the commands right away (maybe it needs to + * be wrapped, or wait a bit for the tail to be updated). This function takes care of that + * and also preallocates a request (every workload submission is still mediated through + * requests, same as it did with legacy ringbuffer submission). + * + * Return: non-zero if the ringbuffer is not ready to be written to. + */ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords) { struct intel_engine_cs *ring = ringbuf->ring; @@ -1021,6 +1185,12 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf) return 0; } +/** + * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer + * + * @ring: Engine Command Streamer. + * + */ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; @@ -1215,6 +1385,16 @@ static int logical_vebox_ring_init(struct drm_device *dev) return logical_ring_init(dev, ring); } +/** + * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers + * @dev: DRM device. + * + * This function inits the engines for an Execlists submission style (the equivalent in the + * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for + * those engines that are present in the hardware. + * + * Return: non-zero if the initialization failed. + */ int intel_logical_rings_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -1377,6 +1557,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o return 0; } +/** + * intel_lr_context_free() - free the LRC specific bits of a context + * @ctx: the LR context to free. + * + * The real context freeing is done in i915_gem_context_free: this only + * takes care of the bits that are LRC related: the per-engine backing + * objects and the logical ringbuffer. + */ void intel_lr_context_free(struct intel_context *ctx) { int i; @@ -1415,6 +1603,19 @@ static uint32_t get_lr_context_size(struct intel_engine_cs *ring) return ret; } +/** + * intel_lr_context_deferred_create() - create the LRC specific bits of a context + * @ctx: LR context to create. + * @ring: engine to be used with the context. + * + * This function can be called more than once, with different engines, if we plan + * to use the context with them. The context backing objects and the ringbuffers + * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why + * the creation is a deferred call: it's better to make sure first that we need to use + * a given ring with the context. + * + * Return: non-zero on eror. + */ int intel_lr_context_deferred_create(struct intel_context *ctx, struct intel_engine_cs *ring) { diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 117d1a4eb3b9..991d4499fb03 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -38,10 +38,21 @@ int intel_logical_rings_init(struct drm_device *dev); int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf); void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf); +/** + * intel_logical_ring_advance() - advance the ringbuffer tail + * @ringbuf: Ringbuffer to advance. + * + * The tail is only updated in our logical ringbuffer struct. + */ static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf) { ringbuf->tail &= ringbuf->size - 1; } +/** + * intel_logical_ring_emit() - write a DWORD to the ringbuffer. + * @ringbuf: Ringbuffer to write to. + * @data: DWORD to write. + */ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf, u32 data) { @@ -66,6 +77,25 @@ int intel_execlists_submission(struct drm_device *dev, struct drm_file *file, u64 exec_start, u32 flags); u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj); +/** + * struct intel_ctx_submit_request - queued context submission request + * @ctx: Context to submit to the ELSP. + * @ring: Engine to submit it to. + * @tail: how far in the context's ringbuffer this request goes to. + * @execlist_link: link in the submission queue. + * @work: workqueue for processing this request in a bottom half. + * @elsp_submitted: no. of times this request has been sent to the ELSP. + * + * The ELSP only accepts two elements at a time, so we queue context/tail + * pairs on a given queue (ring->execlist_queue) until the hardware is + * available. The queue serves a double purpose: we also use it to keep track + * of the up to 2 contexts currently in the hardware (usually one in execution + * and the other queued up by the GPU): We only remove elements from the head + * of the queue when the hardware informs us that an element has been + * completed. + * + * All accesses to the queue are mediated by a spinlock (ring->execlist_lock). + */ struct intel_ctx_submit_request { struct intel_context *ctx; struct intel_engine_cs *ring; From d7f621e50704306c348ccb192f17047f1499f9bc Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Thu, 24 Jul 2014 17:04:49 +0100 Subject: [PATCH 180/640] drm/i915/bdw: Enable Logical Ring Contexts (hence, Execlists) The time has come, the Walrus said, to talk of many things. Signed-off-by: Oscar Mateo Reviewed-by: Damien Lespiau Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ed52ac744105..d4d2abbb8e6c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2088,7 +2088,7 @@ struct drm_i915_cmd_table { #define I915_NEED_GFX_HWS(dev) (INTEL_INFO(dev)->need_gfx_hws) #define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 6) -#define HAS_LOGICAL_RING_CONTEXTS(dev) 0 +#define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->gen >= 8) #define HAS_ALIASING_PPGTT(dev) (INTEL_INFO(dev)->gen >= 6) #define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_GEN8(dev)) #define USES_PPGTT(dev) (i915.enable_ppgtt) From fd639ac6dcbcbae4f2131bf1390a032df659ffb7 Mon Sep 17 00:00:00 2001 From: Damien Lespiau Date: Fri, 15 Aug 2014 16:48:36 +0100 Subject: [PATCH 181/640] drm/i915/bdw: Disable execlists by default We still have a few missing bits and pieces to have execlists enabled by default eg. the error capture or the render state initialization and so it wouldn't be wise to enable it by default on BDW just yet. Cc: Daniel Vetter Cc: Thomas Daniel Signed-off-by: Damien Lespiau Reviewed-by: Paulo Zanoni Tested-by: Paulo Zanoni Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82740 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_params.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index f7f8350c3793..1dcb1bed5ef8 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -35,7 +35,7 @@ struct i915_params i915 __read_mostly = { .vbt_sdvo_panel_type = -1, .enable_rc6 = -1, .enable_fbc = -1, - .enable_execlists = -1, + .enable_execlists = 0, .enable_hangcheck = true, .enable_ppgtt = -1, .enable_psr = 1, @@ -122,7 +122,7 @@ MODULE_PARM_DESC(enable_ppgtt, module_param_named(enable_execlists, i915.enable_execlists, int, 0400); MODULE_PARM_DESC(enable_execlists, "Override execlists usage. " - "(-1=auto [default], 0=disabled, 1=enabled)"); + "(-1=auto, 0=disabled [default], 1=enabled)"); module_param_named(enable_psr, i915.enable_psr, int, 0600); MODULE_PARM_DESC(enable_psr, "Enable PSR (default: true)"); From 3a5f87c286515c54ff5c52c3e64d0c522b7570c0 Mon Sep 17 00:00:00 2001 From: Thomas Wood Date: Wed, 20 Aug 2014 14:45:00 +0100 Subject: [PATCH 182/640] drm: fix plane rotation when restoring fbdev configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure plane rotation is reset correctly when restoring the fbdev configuration by using drm_mode_plane_set_obj_prop which calls the driver's set_property callback. The rotation reset feature was introduced in commit 9783de2 (drm: Resetting rotation property) and the callback issue was originally addressed in a previous version of the patch, but the fix was not present in the final version. v2: Fix documentation warning Add some more details to the commit message (Daniel Vetter) Testcase: igt/kms_rotation_crc Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82236 Cc: Sonika Jindal Cc: Ville Syrjälä Cc: Dave Airlie Cc: Daniel Vetter Signed-off-by: Thomas Wood Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_crtc.c | 25 ++++++++++++++++++++----- drivers/gpu/drm/drm_fb_helper.c | 6 +++--- include/drm/drm_crtc.h | 3 +++ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 3c4a62169f28..d7e4c0e2e796 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -4156,12 +4156,25 @@ static int drm_mode_crtc_set_obj_prop(struct drm_mode_object *obj, return ret; } -static int drm_mode_plane_set_obj_prop(struct drm_mode_object *obj, - struct drm_property *property, - uint64_t value) +/** + * drm_mode_plane_set_obj_prop - set the value of a property + * @plane: drm plane object to set property value for + * @property: property to set + * @value: value the property should be set to + * + * This functions sets a given property on a given plane object. This function + * calls the driver's ->set_property callback and changes the software state of + * the property if the callback succeeds. + * + * Returns: + * Zero on success, error code on failure. + */ +int drm_mode_plane_set_obj_prop(struct drm_plane *plane, + struct drm_property *property, + uint64_t value) { int ret = -EINVAL; - struct drm_plane *plane = obj_to_plane(obj); + struct drm_mode_object *obj = &plane->base; if (plane->funcs->set_property) ret = plane->funcs->set_property(plane, property, value); @@ -4170,6 +4183,7 @@ static int drm_mode_plane_set_obj_prop(struct drm_mode_object *obj, return ret; } +EXPORT_SYMBOL(drm_mode_plane_set_obj_prop); /** * drm_mode_getproperty_ioctl - get the current value of a object's property @@ -4308,7 +4322,8 @@ int drm_mode_obj_set_property_ioctl(struct drm_device *dev, void *data, ret = drm_mode_crtc_set_obj_prop(arg_obj, property, arg->value); break; case DRM_MODE_OBJECT_PLANE: - ret = drm_mode_plane_set_obj_prop(arg_obj, property, arg->value); + ret = drm_mode_plane_set_obj_prop(obj_to_plane(arg_obj), + property, arg->value); break; } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index d139eddb3d61..99569ee5adee 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -350,9 +350,9 @@ static bool restore_fbdev_mode(struct drm_fb_helper *fb_helper) drm_plane_force_disable(plane); if (dev->mode_config.rotation_property) { - drm_object_property_set_value(&plane->base, - dev->mode_config.rotation_property, - BIT(DRM_ROTATE_0)); + drm_mode_plane_set_obj_prop(plane, + dev->mode_config.rotation_property, + BIT(DRM_ROTATE_0)); } } diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 62f73bdbcc47..38fae5d9ad73 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -1121,6 +1121,9 @@ extern int drm_mode_obj_get_properties_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int drm_mode_obj_set_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +extern int drm_mode_plane_set_obj_prop(struct drm_plane *plane, + struct drm_property *property, + uint64_t value); extern void drm_fb_get_bpp_depth(uint32_t format, unsigned int *depth, int *bpp); From c28135481428d0674fcc1da0740ed3f4343df5b2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 22 Aug 2014 22:39:37 +0200 Subject: [PATCH 183/640] drm/i915: Update DRIVER_DATE to 20140822 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d4d2abbb8e6c..d309725a7d7b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -54,7 +54,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20140808" +#define DRIVER_DATE "20140822" enum pipe { INVALID_PIPE = -1, From 604effb782a8a4d9a20c8af16bcbf86d742db119 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 26 Aug 2014 13:26:56 +0300 Subject: [PATCH 184/640] drm/i915: fix suspend/resume for GENs w/o runtime PM support Before sharing common parts between the system and runtime s/r handlers we WARNed if the runtime s/r handlers were called on GENs that didn't support RPM. But this WARN is not correct if the same handler is called from the system s/r path, since that can happen on any platform. This also broke system s/r on old platforms. The issue was introduced in commit 016970beb05da6285c2f3ed2bee1c676cb75972e Author: Sagar Kamble Date: Wed Aug 13 23:07:06 2014 +0530 v2: - remove the WARN and depend on the HAS_RUNTIME_PM check in rutime_suspend/resume instead (Daniel) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82751 Signed-off-by: Imre Deak Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 117f5c16df74..0f7a522682a5 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1373,7 +1373,9 @@ static int intel_runtime_suspend(struct device *device) if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6(dev)))) return -ENODEV; - WARN_ON(!HAS_RUNTIME_PM(dev)); + if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev))) + return -ENODEV; + assert_force_wake_inactive(dev_priv); DRM_DEBUG_KMS("Suspending device\n"); @@ -1441,7 +1443,8 @@ static int intel_runtime_resume(struct device *device) struct drm_i915_private *dev_priv = dev->dev_private; int ret; - WARN_ON(!HAS_RUNTIME_PM(dev)); + if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev))) + return -ENODEV; DRM_DEBUG_KMS("Resuming device\n"); @@ -1476,16 +1479,12 @@ static int intel_suspend_complete(struct drm_i915_private *dev_priv) struct drm_device *dev = dev_priv->dev; int ret; - if (IS_GEN6(dev)) { - ret = 0; - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) ret = hsw_suspend_complete(dev_priv); - } else if (IS_VALLEYVIEW(dev)) { + else if (IS_VALLEYVIEW(dev)) ret = vlv_suspend_complete(dev_priv); - } else { - ret = -ENODEV; - WARN_ON(1); - } + else + ret = 0; return ret; } @@ -1501,16 +1500,14 @@ static int intel_resume_prepare(struct drm_i915_private *dev_priv, struct drm_device *dev = dev_priv->dev; int ret; - if (IS_GEN6(dev)) { + if (IS_GEN6(dev)) ret = snb_resume_prepare(dev_priv, rpm_resume); - } else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { + else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) ret = hsw_resume_prepare(dev_priv, rpm_resume); - } else if (IS_VALLEYVIEW(dev)) { + else if (IS_VALLEYVIEW(dev)) ret = vlv_resume_prepare(dev_priv, rpm_resume); - } else { - WARN_ON(1); - ret = -ENODEV; - } + else + ret = 0; return ret; } From f1217ed09f827e42a49ffa6a5aab672aa6f57a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 13:16:04 +0200 Subject: [PATCH 185/640] drm/ttm: move fpfn and lpfn into each placement v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows us to more fine grained specify where to place the buffer object. v2: rebased on drm-next, add bochs changes as well Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/ast/ast_drv.h | 2 +- drivers/gpu/drm/ast/ast_ttm.c | 20 ++- drivers/gpu/drm/bochs/bochs.h | 2 +- drivers/gpu/drm/bochs/bochs_mm.c | 20 ++- drivers/gpu/drm/cirrus/cirrus_drv.h | 2 +- drivers/gpu/drm/cirrus/cirrus_ttm.c | 17 +- drivers/gpu/drm/mgag200/mgag200_drv.h | 2 +- drivers/gpu/drm/mgag200/mgag200_ttm.c | 20 ++- drivers/gpu/drm/nouveau/nouveau_bo.c | 52 ++++-- drivers/gpu/drm/nouveau/nouveau_bo.h | 4 +- drivers/gpu/drm/nouveau/nouveau_ttm.c | 9 +- drivers/gpu/drm/qxl/qxl_drv.h | 2 +- drivers/gpu/drm/qxl/qxl_object.c | 17 +- drivers/gpu/drm/qxl/qxl_ttm.c | 8 +- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 71 ++++++--- drivers/gpu/drm/radeon/radeon_ttm.c | 25 +-- drivers/gpu/drm/radeon/radeon_uvd.c | 8 +- drivers/gpu/drm/ttm/ttm_bo.c | 93 +++++------ drivers/gpu/drm/ttm/ttm_bo_manager.c | 9 +- drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 148 +++++++++++------- drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c | 22 ++- drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 10 +- drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 3 +- include/drm/ttm/ttm_bo_api.h | 40 +++-- include/drm/ttm/ttm_bo_driver.h | 3 +- 26 files changed, 352 insertions(+), 259 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 957d4fabf1e1..cb91c2acc3cb 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -316,7 +316,7 @@ struct ast_bo { struct ttm_placement placement; struct ttm_bo_kmap_obj kmap; struct drm_gem_object gem; - u32 placements[3]; + struct ttm_place placements[3]; int pin_count; }; #define gem_to_ast_bo(gobj) container_of((gobj), struct ast_bo, gem) diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c index b8246227bab0..8008ea0bc76c 100644 --- a/drivers/gpu/drm/ast/ast_ttm.c +++ b/drivers/gpu/drm/ast/ast_ttm.c @@ -293,18 +293,22 @@ void ast_mm_fini(struct ast_private *ast) void ast_ttm_placement(struct ast_bo *bo, int domain) { u32 c = 0; - bo->placement.fpfn = 0; - bo->placement.lpfn = 0; + unsigned i; + bo->placement.placement = bo->placements; bo->placement.busy_placement = bo->placements; if (domain & TTM_PL_FLAG_VRAM) - bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; + bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; if (domain & TTM_PL_FLAG_SYSTEM) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; if (!c) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; bo->placement.num_placement = c; bo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + bo->placements[i].fpfn = 0; + bo->placements[i].lpfn = 0; + } } int ast_bo_create(struct drm_device *dev, int size, int align, @@ -360,7 +364,7 @@ int ast_bo_pin(struct ast_bo *bo, u32 pl_flag, u64 *gpu_addr) ast_ttm_placement(bo, pl_flag); for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -383,7 +387,7 @@ int ast_bo_unpin(struct ast_bo *bo) return 0; for (i = 0; i < bo->placement.num_placement ; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -407,7 +411,7 @@ int ast_bo_push_sysram(struct ast_bo *bo) ast_ttm_placement(bo, TTM_PL_FLAG_SYSTEM); for (i = 0; i < bo->placement.num_placement ; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) { diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h index 7eb52dd44b01..4f6e7b3a3635 100644 --- a/drivers/gpu/drm/bochs/bochs.h +++ b/drivers/gpu/drm/bochs/bochs.h @@ -99,7 +99,7 @@ struct bochs_bo { struct ttm_placement placement; struct ttm_bo_kmap_obj kmap; struct drm_gem_object gem; - u32 placements[3]; + struct ttm_place placements[3]; int pin_count; }; diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c index 1728a1b0b813..2af30e7607d7 100644 --- a/drivers/gpu/drm/bochs/bochs_mm.c +++ b/drivers/gpu/drm/bochs/bochs_mm.c @@ -257,20 +257,26 @@ void bochs_mm_fini(struct bochs_device *bochs) static void bochs_ttm_placement(struct bochs_bo *bo, int domain) { + unsigned i; u32 c = 0; - bo->placement.fpfn = 0; - bo->placement.lpfn = 0; bo->placement.placement = bo->placements; bo->placement.busy_placement = bo->placements; if (domain & TTM_PL_FLAG_VRAM) { - bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED + bo->placements[c++].flags = TTM_PL_FLAG_WC + | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; } if (domain & TTM_PL_FLAG_SYSTEM) { - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING + | TTM_PL_FLAG_SYSTEM; } if (!c) { - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING + | TTM_PL_FLAG_SYSTEM; + } + for (i = 0; i < c; ++i) { + bo->placements[i].fpfn = 0; + bo->placements[i].lpfn = 0; } bo->placement.num_placement = c; bo->placement.num_busy_placement = c; @@ -294,7 +300,7 @@ int bochs_bo_pin(struct bochs_bo *bo, u32 pl_flag, u64 *gpu_addr) bochs_ttm_placement(bo, pl_flag); for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -319,7 +325,7 @@ int bochs_bo_unpin(struct bochs_bo *bo) return 0; for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.h b/drivers/gpu/drm/cirrus/cirrus_drv.h index 401c890b6c6a..dd2cfc9024aa 100644 --- a/drivers/gpu/drm/cirrus/cirrus_drv.h +++ b/drivers/gpu/drm/cirrus/cirrus_drv.h @@ -163,7 +163,7 @@ struct cirrus_bo { struct ttm_placement placement; struct ttm_bo_kmap_obj kmap; struct drm_gem_object gem; - u32 placements[3]; + struct ttm_place placements[3]; int pin_count; }; #define gem_to_cirrus_bo(gobj) container_of((gobj), struct cirrus_bo, gem) diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c index 92e6b7786097..3e7d758330a9 100644 --- a/drivers/gpu/drm/cirrus/cirrus_ttm.c +++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c @@ -298,18 +298,21 @@ void cirrus_mm_fini(struct cirrus_device *cirrus) void cirrus_ttm_placement(struct cirrus_bo *bo, int domain) { u32 c = 0; - bo->placement.fpfn = 0; - bo->placement.lpfn = 0; + unsigned i; bo->placement.placement = bo->placements; bo->placement.busy_placement = bo->placements; if (domain & TTM_PL_FLAG_VRAM) - bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; + bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; if (domain & TTM_PL_FLAG_SYSTEM) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; if (!c) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; bo->placement.num_placement = c; bo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + bo->placements[i].fpfn = 0; + bo->placements[i].lpfn = 0; + } } int cirrus_bo_create(struct drm_device *dev, int size, int align, @@ -365,7 +368,7 @@ int cirrus_bo_pin(struct cirrus_bo *bo, u32 pl_flag, u64 *gpu_addr) cirrus_ttm_placement(bo, pl_flag); for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -392,7 +395,7 @@ int cirrus_bo_push_sysram(struct cirrus_bo *bo) cirrus_ttm_placement(bo, TTM_PL_FLAG_SYSTEM); for (i = 0; i < bo->placement.num_placement ; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) { diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index 80de23d9b9c9..2e2b76aa4e17 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -224,7 +224,7 @@ struct mgag200_bo { struct ttm_placement placement; struct ttm_bo_kmap_obj kmap; struct drm_gem_object gem; - u32 placements[3]; + struct ttm_place placements[3]; int pin_count; }; #define gem_to_mga_bo(gobj) container_of((gobj), struct mgag200_bo, gem) diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c index 5a00e90696de..be883ef5a1d3 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ttm.c +++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c @@ -293,18 +293,22 @@ void mgag200_mm_fini(struct mga_device *mdev) void mgag200_ttm_placement(struct mgag200_bo *bo, int domain) { u32 c = 0; - bo->placement.fpfn = 0; - bo->placement.lpfn = 0; + unsigned i; + bo->placement.placement = bo->placements; bo->placement.busy_placement = bo->placements; if (domain & TTM_PL_FLAG_VRAM) - bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; + bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; if (domain & TTM_PL_FLAG_SYSTEM) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; if (!c) - bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; bo->placement.num_placement = c; bo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + bo->placements[i].fpfn = 0; + bo->placements[i].lpfn = 0; + } } int mgag200_bo_create(struct drm_device *dev, int size, int align, @@ -361,7 +365,7 @@ int mgag200_bo_pin(struct mgag200_bo *bo, u32 pl_flag, u64 *gpu_addr) mgag200_ttm_placement(bo, pl_flag); for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -384,7 +388,7 @@ int mgag200_bo_unpin(struct mgag200_bo *bo) return 0; for (i = 0; i < bo->placement.num_placement ; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) return ret; @@ -408,7 +412,7 @@ int mgag200_bo_push_sysram(struct mgag200_bo *bo) mgag200_ttm_placement(bo, TTM_PL_FLAG_SYSTEM); for (i = 0; i < bo->placement.num_placement ; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false); if (ret) { diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 01da508625f2..0591ca0734e3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -241,16 +241,16 @@ nouveau_bo_new(struct drm_device *dev, int size, int align, } static void -set_placement_list(uint32_t *pl, unsigned *n, uint32_t type, uint32_t flags) +set_placement_list(struct ttm_place *pl, unsigned *n, uint32_t type, uint32_t flags) { *n = 0; if (type & TTM_PL_FLAG_VRAM) - pl[(*n)++] = TTM_PL_FLAG_VRAM | flags; + pl[(*n)++].flags = TTM_PL_FLAG_VRAM | flags; if (type & TTM_PL_FLAG_TT) - pl[(*n)++] = TTM_PL_FLAG_TT | flags; + pl[(*n)++].flags = TTM_PL_FLAG_TT | flags; if (type & TTM_PL_FLAG_SYSTEM) - pl[(*n)++] = TTM_PL_FLAG_SYSTEM | flags; + pl[(*n)++].flags = TTM_PL_FLAG_SYSTEM | flags; } static void @@ -258,6 +258,7 @@ set_placement_range(struct nouveau_bo *nvbo, uint32_t type) { struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev); u32 vram_pages = drm->device.info.ram_size >> PAGE_SHIFT; + unsigned i, fpfn, lpfn; if (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS && nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) && @@ -269,11 +270,19 @@ set_placement_range(struct nouveau_bo *nvbo, uint32_t type) * at the same time. */ if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) { - nvbo->placement.fpfn = vram_pages / 2; - nvbo->placement.lpfn = ~0; + fpfn = vram_pages / 2; + lpfn = ~0; } else { - nvbo->placement.fpfn = 0; - nvbo->placement.lpfn = vram_pages / 2; + fpfn = 0; + lpfn = vram_pages / 2; + } + for (i = 0; i < nvbo->placement.num_placement; ++i) { + nvbo->placements[i].fpfn = fpfn; + nvbo->placements[i].lpfn = lpfn; + } + for (i = 0; i < nvbo->placement.num_busy_placement; ++i) { + nvbo->busy_placements[i].fpfn = fpfn; + nvbo->busy_placements[i].lpfn = lpfn; } } } @@ -1041,12 +1050,15 @@ static int nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { - u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING; + struct ttm_place placement_memtype = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING + }; struct ttm_placement placement; struct ttm_mem_reg tmp_mem; int ret; - placement.fpfn = placement.lpfn = 0; placement.num_placement = placement.num_busy_placement = 1; placement.placement = placement.busy_placement = &placement_memtype; @@ -1074,12 +1086,15 @@ static int nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr, bool no_wait_gpu, struct ttm_mem_reg *new_mem) { - u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING; + struct ttm_place placement_memtype = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING + }; struct ttm_placement placement; struct ttm_mem_reg tmp_mem; int ret; - placement.fpfn = placement.lpfn = 0; placement.num_placement = placement.num_busy_placement = 1; placement.placement = placement.busy_placement = &placement_memtype; @@ -1294,7 +1309,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo) struct nouveau_bo *nvbo = nouveau_bo(bo); struct nvif_device *device = &drm->device; u32 mappable = nv_device_resource_len(nvkm_device(device), 1) >> PAGE_SHIFT; - int ret; + int i, ret; /* as long as the bo isn't in vram, and isn't tiled, we've got * nothing to do here. @@ -1319,9 +1334,16 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo) bo->mem.start + bo->mem.num_pages < mappable) return 0; + for (i = 0; i < nvbo->placement.num_placement; ++i) { + nvbo->placements[i].fpfn = 0; + nvbo->placements[i].lpfn = mappable; + } + + for (i = 0; i < nvbo->placement.num_busy_placement; ++i) { + nvbo->busy_placements[i].fpfn = 0; + nvbo->busy_placements[i].lpfn = mappable; + } - nvbo->placement.fpfn = 0; - nvbo->placement.lpfn = mappable; nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_VRAM, 0); return nouveau_bo_validate(nvbo, false, false); } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index ff17c1f432fc..4ef88e84a694 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -9,8 +9,8 @@ struct nouveau_bo { struct ttm_buffer_object bo; struct ttm_placement placement; u32 valid_domains; - u32 placements[3]; - u32 busy_placements[3]; + struct ttm_place placements[3]; + struct ttm_place busy_placements[3]; struct ttm_bo_kmap_obj kmap; struct list_head head; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 53874b76b031..e81d086577ce 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -71,8 +71,7 @@ nouveau_vram_manager_del(struct ttm_mem_type_manager *man, static int nouveau_vram_manager_new(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem) { struct nouveau_drm *drm = nouveau_bdev(man->bdev); @@ -158,8 +157,7 @@ nouveau_gart_manager_del(struct ttm_mem_type_manager *man, static int nouveau_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem) { struct nouveau_drm *drm = nouveau_bdev(bo->bdev); @@ -239,8 +237,7 @@ nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem) static int nv04_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem) { struct nouveau_mem *node; diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 36ed40ba773f..f6022b703645 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -106,7 +106,7 @@ struct qxl_bo { /* Protected by gem.mutex */ struct list_head list; /* Protected by tbo.reserved */ - u32 placements[3]; + struct ttm_place placements[3]; struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c index b95f144f0b49..adad12d30372 100644 --- a/drivers/gpu/drm/qxl/qxl_object.c +++ b/drivers/gpu/drm/qxl/qxl_object.c @@ -55,21 +55,24 @@ void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned) { u32 c = 0; u32 pflag = pinned ? TTM_PL_FLAG_NO_EVICT : 0; + unsigned i; - qbo->placement.fpfn = 0; - qbo->placement.lpfn = 0; qbo->placement.placement = qbo->placements; qbo->placement.busy_placement = qbo->placements; if (domain == QXL_GEM_DOMAIN_VRAM) - qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM | pflag; + qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM | pflag; if (domain == QXL_GEM_DOMAIN_SURFACE) - qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0 | pflag; + qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0 | pflag; if (domain == QXL_GEM_DOMAIN_CPU) - qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | pflag; + qbo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | pflag; if (!c) - qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + qbo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; qbo->placement.num_placement = c; qbo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + qbo->placements[i].fpfn = 0; + qbo->placements[i].lpfn = 0; + } } @@ -259,7 +262,7 @@ int qxl_bo_unpin(struct qxl_bo *bo) if (bo->pin_count) return 0; for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (unlikely(r != 0)) dev_err(qdev->dev, "%p validate failed for unpin\n", bo); diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 71a1baeac14e..f66c59b222f1 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -188,11 +188,13 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *placement) { struct qxl_bo *qbo; - static u32 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + static struct ttm_place placements = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM + }; if (!qxl_ttm_bo_is_qxl_bo(bo)) { - placement->fpfn = 0; - placement->lpfn = 0; placement->placement = &placements; placement->busy_placement = &placements; placement->num_placement = 1; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b321ad4dcafd..bb01dab513dd 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -469,7 +469,7 @@ struct radeon_bo { struct list_head list; /* Protected by tbo.reserved */ u32 initial_domain; - u32 placements[3]; + struct ttm_place placements[3]; struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 287523807989..0129c7efae3b 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -97,40 +97,56 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) { u32 c = 0, i; - rbo->placement.fpfn = 0; - rbo->placement.lpfn = 0; rbo->placement.placement = rbo->placements; rbo->placement.busy_placement = rbo->placements; if (domain & RADEON_GEM_DOMAIN_VRAM) - rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | - TTM_PL_FLAG_VRAM; + rbo->placements[c++].flags = TTM_PL_FLAG_WC | + TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_VRAM; + if (domain & RADEON_GEM_DOMAIN_GTT) { if (rbo->flags & RADEON_GEM_GTT_UC) { - rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT; + rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_TT; + } else if ((rbo->flags & RADEON_GEM_GTT_WC) || (rbo->rdev->flags & RADEON_IS_AGP)) { - rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | + rbo->placements[c++].flags = TTM_PL_FLAG_WC | + TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT; } else { - rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; + rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | + TTM_PL_FLAG_TT; } } + if (domain & RADEON_GEM_DOMAIN_CPU) { if (rbo->flags & RADEON_GEM_GTT_UC) { - rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM; + rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_SYSTEM; + } else if ((rbo->flags & RADEON_GEM_GTT_WC) || rbo->rdev->flags & RADEON_IS_AGP) { - rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | + rbo->placements[c++].flags = TTM_PL_FLAG_WC | + TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM; } else { - rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM; + rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | + TTM_PL_FLAG_SYSTEM; } } if (!c) - rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; + rbo->placements[c++].flags = TTM_PL_MASK_CACHING | + TTM_PL_FLAG_SYSTEM; + rbo->placement.num_placement = c; rbo->placement.num_busy_placement = c; + for (i = 0; i < c; ++i) { + rbo->placements[i].fpfn = 0; + rbo->placements[i].lpfn = 0; + } + /* * Use two-ended allocation depending on the buffer size to * improve fragmentation quality. @@ -138,7 +154,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) */ if (rbo->tbo.mem.size > 512 * 1024) { for (i = 0; i < c; i++) { - rbo->placements[i] |= TTM_PL_FLAG_TOPDOWN; + rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; } } } @@ -287,21 +303,22 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, return 0; } radeon_ttm_placement_from_domain(bo, domain); - if (domain == RADEON_GEM_DOMAIN_VRAM) { + for (i = 0; i < bo->placement.num_placement; i++) { + unsigned lpfn = 0; + /* force to pin into visible video ram */ - bo->placement.lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; - } - if (max_offset) { - u64 lpfn = max_offset >> PAGE_SHIFT; + if (bo->placements[i].flags & TTM_PL_FLAG_VRAM) + lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; + else + lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT; /* ??? */ - if (!bo->placement.lpfn) - bo->placement.lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT; + if (max_offset) + lpfn = min (lpfn, (unsigned)(max_offset >> PAGE_SHIFT)); - if (lpfn < bo->placement.lpfn) - bo->placement.lpfn = lpfn; + bo->placements[i].lpfn = lpfn; + bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; } - for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; + r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (likely(r == 0)) { bo->pin_count = 1; @@ -333,8 +350,10 @@ int radeon_bo_unpin(struct radeon_bo *bo) bo->pin_count--; if (bo->pin_count) return 0; - for (i = 0; i < bo->placement.num_placement; i++) - bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; + for (i = 0; i < bo->placement.num_placement; i++) { + bo->placements[i].lpfn = 0; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; + } r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); if (likely(r == 0)) { if (bo->tbo.mem.mem_type == TTM_PL_VRAM) @@ -735,7 +754,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* hurrah the memory is not visible ! */ radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); - rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; + rbo->placements[0].lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; r = ttm_bo_validate(bo, &rbo->placement, false, false); if (unlikely(r == -ENOMEM)) { radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 12e37b1ddc40..822eb3630045 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -178,12 +178,15 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, static void radeon_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *placement) { + static struct ttm_place placements = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM + }; + struct radeon_bo *rbo; - static u32 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; if (!radeon_ttm_bo_is_radeon_bo(bo)) { - placement->fpfn = 0; - placement->lpfn = 0; placement->placement = &placements; placement->busy_placement = &placements; placement->num_placement = 1; @@ -286,20 +289,20 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, struct radeon_device *rdev; struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; - u32 placements; + struct ttm_place placements; struct ttm_placement placement; int r; rdev = radeon_get_rdev(bo->bdev); tmp_mem = *new_mem; tmp_mem.mm_node = NULL; - placement.fpfn = 0; - placement.lpfn = 0; placement.num_placement = 1; placement.placement = &placements; placement.num_busy_placement = 1; placement.busy_placement = &placements; - placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; + placements.fpfn = 0; + placements.lpfn = 0; + placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); if (unlikely(r)) { @@ -334,19 +337,19 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem = &bo->mem; struct ttm_mem_reg tmp_mem; struct ttm_placement placement; - u32 placements; + struct ttm_place placements; int r; rdev = radeon_get_rdev(bo->bdev); tmp_mem = *new_mem; tmp_mem.mm_node = NULL; - placement.fpfn = 0; - placement.lpfn = 0; placement.num_placement = 1; placement.placement = &placements; placement.num_busy_placement = 1; placement.busy_placement = &placements; - placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; + placements.fpfn = 0; + placements.lpfn = 0; + placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_gpu); if (unlikely(r)) { diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 341848a14376..25c8a1fd152c 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -233,8 +233,12 @@ int radeon_uvd_resume(struct radeon_device *rdev) void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) { - rbo->placement.fpfn = 0 >> PAGE_SHIFT; - rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; + int i; + + for (i = 0; i < rbo->placement.num_placement; ++i) { + rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; + rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; + } } void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 3da89d5dab60..b992ec3c318a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -53,12 +53,13 @@ static struct attribute ttm_bo_count = { .mode = S_IRUGO }; -static inline int ttm_mem_type_from_flags(uint32_t flags, uint32_t *mem_type) +static inline int ttm_mem_type_from_place(const struct ttm_place *place, + uint32_t *mem_type) { int i; for (i = 0; i <= TTM_PL_PRIV5; i++) - if (flags & (1 << i)) { + if (place->flags & (1 << i)) { *mem_type = i; return 0; } @@ -89,12 +90,12 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, bo, bo->mem.num_pages, bo->mem.size >> 10, bo->mem.size >> 20); for (i = 0; i < placement->num_placement; i++) { - ret = ttm_mem_type_from_flags(placement->placement[i], + ret = ttm_mem_type_from_place(&placement->placement[i], &mem_type); if (ret) return; pr_err(" placement[%d]=0x%08X (%d)\n", - i, placement->placement[i], mem_type); + i, placement->placement[i].flags, mem_type); ttm_mem_type_debug(bo->bdev, mem_type); } } @@ -685,8 +686,6 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, evict_mem.bus.io_reserved_vm = false; evict_mem.bus.io_reserved_count = 0; - placement.fpfn = 0; - placement.lpfn = 0; placement.num_placement = 0; placement.num_busy_placement = 0; bdev->driver->evict_flags(bo, &placement); @@ -774,7 +773,7 @@ EXPORT_SYMBOL(ttm_bo_mem_put); */ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, uint32_t mem_type, - struct ttm_placement *placement, + const struct ttm_place *place, struct ttm_mem_reg *mem, bool interruptible, bool no_wait_gpu) @@ -784,7 +783,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, int ret; do { - ret = (*man->func->get_node)(man, bo, placement, 0, mem); + ret = (*man->func->get_node)(man, bo, place, mem); if (unlikely(ret != 0)) return ret; if (mem->mm_node) @@ -827,18 +826,18 @@ static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man, static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man, uint32_t mem_type, - uint32_t proposed_placement, + const struct ttm_place *place, uint32_t *masked_placement) { uint32_t cur_flags = ttm_bo_type_flags(mem_type); - if ((cur_flags & proposed_placement & TTM_PL_MASK_MEM) == 0) + if ((cur_flags & place->flags & TTM_PL_MASK_MEM) == 0) return false; - if ((proposed_placement & man->available_caching) == 0) + if ((place->flags & man->available_caching) == 0) return false; - cur_flags |= (proposed_placement & man->available_caching); + cur_flags |= (place->flags & man->available_caching); *masked_placement = cur_flags; return true; @@ -869,15 +868,14 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, mem->mm_node = NULL; for (i = 0; i < placement->num_placement; ++i) { - ret = ttm_mem_type_from_flags(placement->placement[i], - &mem_type); + const struct ttm_place *place = &placement->placement[i]; + + ret = ttm_mem_type_from_place(place, &mem_type); if (ret) return ret; man = &bdev->man[mem_type]; - type_ok = ttm_bo_mt_compatible(man, - mem_type, - placement->placement[i], + type_ok = ttm_bo_mt_compatible(man, mem_type, place, &cur_flags); if (!type_ok) @@ -889,7 +887,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, * Use the access and other non-mapping-related flag bits from * the memory placement flags to the current flags */ - ttm_flag_masked(&cur_flags, placement->placement[i], + ttm_flag_masked(&cur_flags, place->flags, ~TTM_PL_MASK_MEMTYPE); if (mem_type == TTM_PL_SYSTEM) @@ -897,8 +895,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, if (man->has_type && man->use_type) { type_found = true; - ret = (*man->func->get_node)(man, bo, placement, - cur_flags, mem); + ret = (*man->func->get_node)(man, bo, place, mem); if (unlikely(ret)) return ret; } @@ -916,17 +913,15 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, return -EINVAL; for (i = 0; i < placement->num_busy_placement; ++i) { - ret = ttm_mem_type_from_flags(placement->busy_placement[i], - &mem_type); + const struct ttm_place *place = &placement->busy_placement[i]; + + ret = ttm_mem_type_from_place(place, &mem_type); if (ret) return ret; man = &bdev->man[mem_type]; if (!man->has_type) continue; - if (!ttm_bo_mt_compatible(man, - mem_type, - placement->busy_placement[i], - &cur_flags)) + if (!ttm_bo_mt_compatible(man, mem_type, place, &cur_flags)) continue; cur_flags = ttm_bo_select_caching(man, bo->mem.placement, @@ -935,7 +930,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, * Use the access and other non-mapping-related flag bits from * the memory placement flags to the current flags */ - ttm_flag_masked(&cur_flags, placement->busy_placement[i], + ttm_flag_masked(&cur_flags, place->flags, ~TTM_PL_MASK_MEMTYPE); if (mem_type == TTM_PL_SYSTEM) { @@ -945,7 +940,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, return 0; } - ret = ttm_bo_mem_force_space(bo, mem_type, placement, mem, + ret = ttm_bo_mem_force_space(bo, mem_type, place, mem, interruptible, no_wait_gpu); if (ret == 0 && mem->mm_node) { mem->placement = cur_flags; @@ -1006,20 +1001,27 @@ static bool ttm_bo_mem_compat(struct ttm_placement *placement, { int i; - if (mem->mm_node && placement->lpfn != 0 && - (mem->start < placement->fpfn || - mem->start + mem->num_pages > placement->lpfn)) - return false; - for (i = 0; i < placement->num_placement; i++) { - *new_flags = placement->placement[i]; + const struct ttm_place *heap = &placement->placement[i]; + if (mem->mm_node && heap->lpfn != 0 && + (mem->start < heap->fpfn || + mem->start + mem->num_pages > heap->lpfn)) + continue; + + *new_flags = heap->flags; if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) && (*new_flags & mem->placement & TTM_PL_MASK_MEM)) return true; } for (i = 0; i < placement->num_busy_placement; i++) { - *new_flags = placement->busy_placement[i]; + const struct ttm_place *heap = &placement->busy_placement[i]; + if (mem->mm_node && heap->lpfn != 0 && + (mem->start < heap->fpfn || + mem->start + mem->num_pages > heap->lpfn)) + continue; + + *new_flags = heap->flags; if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) && (*new_flags & mem->placement & TTM_PL_MASK_MEM)) return true; @@ -1037,11 +1039,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, uint32_t new_flags; lockdep_assert_held(&bo->resv->lock.base); - /* Check that range is valid */ - if (placement->lpfn || placement->fpfn) - if (placement->fpfn > placement->lpfn || - (placement->lpfn - placement->fpfn) < bo->num_pages) - return -EINVAL; /* * Check whether we need to move buffer. */ @@ -1070,15 +1067,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_validate); -int ttm_bo_check_placement(struct ttm_buffer_object *bo, - struct ttm_placement *placement) -{ - BUG_ON((placement->fpfn || placement->lpfn) && - (bo->mem.num_pages > (placement->lpfn - placement->fpfn))); - - return 0; -} - int ttm_bo_init(struct ttm_bo_device *bdev, struct ttm_buffer_object *bo, unsigned long size, @@ -1147,15 +1135,12 @@ int ttm_bo_init(struct ttm_bo_device *bdev, atomic_inc(&bo->glob->bo_count); drm_vma_node_reset(&bo->vma_node); - ret = ttm_bo_check_placement(bo, placement); - /* * For ttm_bo_type_device buffers, allocate * address space from the device. */ - if (likely(!ret) && - (bo->type == ttm_bo_type_device || - bo->type == ttm_bo_type_sg)) + if (bo->type == ttm_bo_type_device || + bo->type == ttm_bo_type_sg) ret = drm_vma_offset_add(&bdev->vma_manager, &bo->vma_node, bo->mem.num_pages); diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c index 9e103a4875c8..964387fc5c8f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_manager.c +++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c @@ -49,8 +49,7 @@ struct ttm_range_manager { static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem) { struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv; @@ -60,7 +59,7 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man, unsigned long lpfn; int ret; - lpfn = placement->lpfn; + lpfn = place->lpfn; if (!lpfn) lpfn = man->size; @@ -68,13 +67,13 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man, if (!node) return -ENOMEM; - if (flags & TTM_PL_FLAG_TOPDOWN) + if (place->flags & TTM_PL_FLAG_TOPDOWN) aflags = DRM_MM_CREATE_TOP; spin_lock(&rman->lock); ret = drm_mm_insert_node_in_range_generic(mm, node, mem->num_pages, mem->page_alignment, 0, - placement->fpfn, lpfn, + place->fpfn, lpfn, DRM_MM_SEARCH_BEST, aflags); spin_unlock(&rman->lock); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c index 6327cfc36805..37c093c0c7b8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c @@ -30,66 +30,101 @@ #include #include -static uint32_t vram_placement_flags = TTM_PL_FLAG_VRAM | - TTM_PL_FLAG_CACHED; - -static uint32_t vram_ne_placement_flags = TTM_PL_FLAG_VRAM | - TTM_PL_FLAG_CACHED | - TTM_PL_FLAG_NO_EVICT; - -static uint32_t sys_placement_flags = TTM_PL_FLAG_SYSTEM | - TTM_PL_FLAG_CACHED; - -static uint32_t sys_ne_placement_flags = TTM_PL_FLAG_SYSTEM | - TTM_PL_FLAG_CACHED | - TTM_PL_FLAG_NO_EVICT; - -static uint32_t gmr_placement_flags = VMW_PL_FLAG_GMR | - TTM_PL_FLAG_CACHED; - -static uint32_t gmr_ne_placement_flags = VMW_PL_FLAG_GMR | - TTM_PL_FLAG_CACHED | - TTM_PL_FLAG_NO_EVICT; - -static uint32_t mob_placement_flags = VMW_PL_FLAG_MOB | - TTM_PL_FLAG_CACHED; - -struct ttm_placement vmw_vram_placement = { +static struct ttm_place vram_placement_flags = { .fpfn = 0, .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED +}; + +static struct ttm_place vram_ne_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT +}; + +static struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED +}; + +static struct ttm_place sys_ne_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT +}; + +static struct ttm_place gmr_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED +}; + +static struct ttm_place gmr_ne_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT +}; + +static struct ttm_place mob_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED +}; + +struct ttm_placement vmw_vram_placement = { .num_placement = 1, .placement = &vram_placement_flags, .num_busy_placement = 1, .busy_placement = &vram_placement_flags }; -static uint32_t vram_gmr_placement_flags[] = { - TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED, - VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED +static struct ttm_place vram_gmr_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED + } }; -static uint32_t gmr_vram_placement_flags[] = { - VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED, - TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED +static struct ttm_place gmr_vram_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED + } }; struct ttm_placement vmw_vram_gmr_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 2, .placement = vram_gmr_placement_flags, .num_busy_placement = 1, .busy_placement = &gmr_placement_flags }; -static uint32_t vram_gmr_ne_placement_flags[] = { - TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT, - VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT +static struct ttm_place vram_gmr_ne_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | + TTM_PL_FLAG_NO_EVICT + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | + TTM_PL_FLAG_NO_EVICT + } }; struct ttm_placement vmw_vram_gmr_ne_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 2, .placement = vram_gmr_ne_placement_flags, .num_busy_placement = 1, @@ -97,8 +132,6 @@ struct ttm_placement vmw_vram_gmr_ne_placement = { }; struct ttm_placement vmw_vram_sys_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .placement = &vram_placement_flags, .num_busy_placement = 1, @@ -106,8 +139,6 @@ struct ttm_placement vmw_vram_sys_placement = { }; struct ttm_placement vmw_vram_ne_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .placement = &vram_ne_placement_flags, .num_busy_placement = 1, @@ -115,8 +146,6 @@ struct ttm_placement vmw_vram_ne_placement = { }; struct ttm_placement vmw_sys_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .placement = &sys_placement_flags, .num_busy_placement = 1, @@ -124,24 +153,33 @@ struct ttm_placement vmw_sys_placement = { }; struct ttm_placement vmw_sys_ne_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .placement = &sys_ne_placement_flags, .num_busy_placement = 1, .busy_placement = &sys_ne_placement_flags }; -static uint32_t evictable_placement_flags[] = { - TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED, - TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED, - VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED, - VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED +static struct ttm_place evictable_placement_flags[] = { + { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED + }, { + .fpfn = 0, + .lpfn = 0, + .flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED + } }; struct ttm_placement vmw_evictable_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 4, .placement = evictable_placement_flags, .num_busy_placement = 1, @@ -149,8 +187,6 @@ struct ttm_placement vmw_evictable_placement = { }; struct ttm_placement vmw_srf_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .num_busy_placement = 2, .placement = &gmr_placement_flags, @@ -158,8 +194,6 @@ struct ttm_placement vmw_srf_placement = { }; struct ttm_placement vmw_mob_placement = { - .fpfn = 0, - .lpfn = 0, .num_placement = 1, .num_busy_placement = 1, .placement = &mob_placement_flags, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c index ed1d51006ab1..914b375763dc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c @@ -198,13 +198,19 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private *dev_priv, { struct ttm_buffer_object *bo = &buf->base; struct ttm_placement placement; + struct ttm_place place; int ret = 0; if (pin) - placement = vmw_vram_ne_placement; + place = vmw_vram_ne_placement.placement[0]; else - placement = vmw_vram_placement; - placement.lpfn = bo->num_pages; + place = vmw_vram_placement.placement[0]; + place.lpfn = bo->num_pages; + + placement.num_placement = 1; + placement.placement = &place; + placement.num_busy_placement = 1; + placement.busy_placement = &place; ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible); if (unlikely(ret != 0)) @@ -293,21 +299,23 @@ void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *bo, */ void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin) { - uint32_t pl_flags; + struct ttm_place pl; struct ttm_placement placement; uint32_t old_mem_type = bo->mem.mem_type; int ret; lockdep_assert_held(&bo->resv->lock.base); - pl_flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | VMW_PL_FLAG_MOB + pl.fpfn = 0; + pl.lpfn = 0; + pl.flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | VMW_PL_FLAG_MOB | TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED; if (pin) - pl_flags |= TTM_PL_FLAG_NO_EVICT; + pl.flags |= TTM_PL_FLAG_NO_EVICT; memset(&placement, 0, sizeof(placement)); placement.num_placement = 1; - placement.placement = &pl_flags; + placement.placement = &pl; ret = ttm_bo_validate(bo, &placement, false, true); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index b031b48dbb3c..0a474f391fad 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -374,10 +374,16 @@ static int vmw_fb_create_bo(struct vmw_private *vmw_priv, size_t size, struct vmw_dma_buffer **out) { struct vmw_dma_buffer *vmw_bo; - struct ttm_placement ne_placement = vmw_vram_ne_placement; + struct ttm_place ne_place = vmw_vram_ne_placement.placement[0]; + struct ttm_placement ne_placement; int ret; - ne_placement.lpfn = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + ne_placement.num_placement = 1; + ne_placement.placement = &ne_place; + ne_placement.num_busy_placement = 1; + ne_placement.busy_placement = &ne_place; + + ne_place.lpfn = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; (void) ttm_write_lock(&vmw_priv->reservation_sem, false); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index 26f8bdde3529..170b61be1e4e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -46,8 +46,7 @@ struct vmwgfx_gmrid_man { static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem) { struct vmwgfx_gmrid_man *gman = diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 7526c5bf5610..e3d39c80a091 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -45,12 +45,24 @@ struct ttm_bo_device; struct drm_mm_node; +/** + * struct ttm_place + * + * @fpfn: first valid page frame number to put the object + * @lpfn: last valid page frame number to put the object + * @flags: memory domain and caching flags for the object + * + * Structure indicating a possible place to put an object. + */ +struct ttm_place { + unsigned fpfn; + unsigned lpfn; + uint32_t flags; +}; /** * struct ttm_placement * - * @fpfn: first valid page frame number to put the object - * @lpfn: last valid page frame number to put the object * @num_placement: number of preferred placements * @placement: preferred placements * @num_busy_placement: number of preferred placements when need to evict buffer @@ -59,12 +71,10 @@ struct drm_mm_node; * Structure indicating the placement you request for an object. */ struct ttm_placement { - unsigned fpfn; - unsigned lpfn; - unsigned num_placement; - const uint32_t *placement; - unsigned num_busy_placement; - const uint32_t *busy_placement; + unsigned num_placement; + const struct ttm_place *placement; + unsigned num_busy_placement; + const struct ttm_place *busy_placement; }; /** @@ -518,20 +528,6 @@ extern int ttm_bo_create(struct ttm_bo_device *bdev, struct file *persistent_swap_storage, struct ttm_buffer_object **p_bo); -/** - * ttm_bo_check_placement - * - * @bo: the buffer object. - * @placement: placements - * - * Performs minimal validity checking on an intended change of - * placement flags. - * Returns - * -EINVAL: Intended change is invalid or not allowed. - */ -extern int ttm_bo_check_placement(struct ttm_buffer_object *bo, - struct ttm_placement *placement); - /** * ttm_bo_init_mm * diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 1d9f0f1ff52d..5c8bb5699a6f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -208,8 +208,7 @@ struct ttm_mem_type_manager_func { */ int (*get_node)(struct ttm_mem_type_manager *man, struct ttm_buffer_object *bo, - struct ttm_placement *placement, - uint32_t flags, + const struct ttm_place *place, struct ttm_mem_reg *mem); /** From 13a7d299dbbcd4c76ff088ec240d7cd896174c2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Sun, 24 Aug 2014 14:52:46 +0200 Subject: [PATCH 186/640] drm/radeon: move the IB test after the AGP fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we won't test if the fallback to PCIe GART really worked. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index a5d202a7c0a4..b6aee40e6ef3 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1397,10 +1397,6 @@ int radeon_device_init(struct radeon_device *rdev, if (r) return r; - r = radeon_ib_ring_tests(rdev); - if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); - r = radeon_gem_debugfs_init(rdev); if (r) { DRM_ERROR("registering gem debugfs failed (%d).\n", r); @@ -1418,6 +1414,10 @@ int radeon_device_init(struct radeon_device *rdev, return r; } + r = radeon_ib_ring_tests(rdev); + if (r) + DRM_ERROR("ib ring test failed (%d).\n", r); + if ((radeon_testing & 1)) { if (rdev->accel_working) radeon_test_moves(rdev); From b6a7eeeaa1cdf76f2522b75a2fd46280e8c3b3d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 16 Apr 2013 15:41:25 +0200 Subject: [PATCH 187/640] drm/radeon: force UVD buffers into VRAM on RS[78]80 v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: only necessary on RS[78]80 Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 0669399efcea..db739bd64f16 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -137,10 +137,13 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) + !!r->write_domain; /* the first reloc of an UVD job is the msg and that must be in - VRAM, also but everything into VRAM on AGP cards to avoid - image corruptions */ + VRAM, also but everything into VRAM on AGP cards and older + IGP chips to avoid image corruptions */ if (p->ring == R600_RING_TYPE_UVD_INDEX && - (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { + (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) || + p->rdev->family == CHIP_RS780 || + p->rdev->family == CHIP_RS880)) { + /* TODO: is this still needed for NI+ ? */ p->relocs[i].prefered_domains = RADEON_GEM_DOMAIN_VRAM; From a8fba64ab08cf4a5baf211f1126b475e03f90fc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 25 Apr 2013 18:54:07 +0200 Subject: [PATCH 188/640] drm/radeon: properly init UVD MC bits on R600 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600.c | 4 ++++ drivers/gpu/drm/radeon/r600d.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e8bf0ea2dade..e7dca47b7196 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -992,6 +992,8 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev) WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE); WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12); @@ -1042,6 +1044,8 @@ static void r600_pcie_gart_disable(struct radeon_device *rdev) WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp); WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp); + WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp); radeon_gart_table_vram_unpin(rdev); } diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 0c4a7d8d93e0..3df030dc2352 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -334,7 +334,7 @@ #define MC_VM_AGP_BOT 0x2188 #define MC_VM_AGP_BASE 0x218C #define MC_VM_FB_LOCATION 0x2180 -#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C +#define MC_VM_L1_TLB_MCB_RD_UVD_CNTL 0x2124 #define ENABLE_L1_TLB (1 << 0) #define ENABLE_L1_FRAGMENT_PROCESSING (1 << 1) #define ENABLE_L1_STRICT_ORDERING (1 << 2) @@ -354,12 +354,14 @@ #define EFFECTIVE_L1_QUEUE_SIZE(x) (((x) & 7) << 15) #define EFFECTIVE_L1_QUEUE_SIZE_MASK 0x00038000 #define EFFECTIVE_L1_QUEUE_SIZE_SHIFT 15 +#define MC_VM_L1_TLB_MCD_RD_A_CNTL 0x219C #define MC_VM_L1_TLB_MCD_RD_B_CNTL 0x21A0 #define MC_VM_L1_TLB_MCB_RD_GFX_CNTL 0x21FC #define MC_VM_L1_TLB_MCB_RD_HDP_CNTL 0x2204 #define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL 0x2208 #define MC_VM_L1_TLB_MCB_RD_SEM_CNTL 0x220C #define MC_VM_L1_TLB_MCB_RD_SYS_CNTL 0x2200 +#define MC_VM_L1_TLB_MCB_WR_UVD_CNTL 0x212c #define MC_VM_L1_TLB_MCD_WR_A_CNTL 0x21A4 #define MC_VM_L1_TLB_MCD_WR_B_CNTL 0x21A8 #define MC_VM_L1_TLB_MCB_WR_GFX_CNTL 0x2210 From 4a956a70a8d4cc5268a60f6718de58892fa1275e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 28 Nov 2012 16:55:21 -0500 Subject: [PATCH 189/640] drm/radeon: add set_uvd_clocks callback for r6xx v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: wake up PLL, set [VD]CLK_SRC, cleanup code v3: handle RV670,RV635,RV620 as well v4: merge rv6xx and rs780/rs880 code, fix ref divider mask Signed-off-by: Alex Deucher Signed-off-by: Christian König --- drivers/gpu/drm/radeon/r600.c | 88 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/r600d.h | 26 ++++++++++ 2 files changed, 114 insertions(+) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e7dca47b7196..011d97f6fc7f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -122,6 +122,94 @@ u32 r600_get_xclk(struct radeon_device *rdev) int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) { + unsigned fb_div = 0, ref_div, vclk_div = 0, dclk_div = 0; + int r; + + /* bypass vclk and dclk with bclk */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + /* assert BYPASS_EN, deassert UPLL_RESET, UPLL_SLEEP and UPLL_CTLREQ */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~( + UPLL_RESET_MASK | UPLL_SLEEP_MASK | UPLL_CTLREQ_MASK)); + + if (rdev->family >= CHIP_RS780) + WREG32_P(GFX_MACRO_BYPASS_CNTL, UPLL_BYPASS_CNTL, + ~UPLL_BYPASS_CNTL); + + if (!vclk || !dclk) { + /* keep the Bypass mode, put PLL to sleep */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK); + return 0; + } + + if (rdev->clock.spll.reference_freq == 10000) + ref_div = 34; + else + ref_div = 4; + + r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 50000, 160000, + ref_div + 1, 0xFFF, 2, 30, ~0, + &fb_div, &vclk_div, &dclk_div); + if (r) + return r; + + if (rdev->family >= CHIP_RV670 && rdev->family < CHIP_RS780) + fb_div >>= 1; + else + fb_div |= 1; + + r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* assert PLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK); + + /* For RS780 we have to choose ref clk */ + if (rdev->family >= CHIP_RS780) + WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REFCLK_SRC_SEL_MASK, + ~UPLL_REFCLK_SRC_SEL_MASK); + + /* set the required fb, ref and post divder values */ + WREG32_P(CG_UPLL_FUNC_CNTL, + UPLL_FB_DIV(fb_div) | + UPLL_REF_DIV(ref_div), + ~(UPLL_FB_DIV_MASK | UPLL_REF_DIV_MASK)); + WREG32_P(CG_UPLL_FUNC_CNTL_2, + UPLL_SW_HILEN(vclk_div >> 1) | + UPLL_SW_LOLEN((vclk_div >> 1) + (vclk_div & 1)) | + UPLL_SW_HILEN2(dclk_div >> 1) | + UPLL_SW_LOLEN2((dclk_div >> 1) + (dclk_div & 1)) | + UPLL_DIVEN_MASK | UPLL_DIVEN2_MASK, + ~UPLL_SW_MASK); + + /* give the PLL some time to settle */ + mdelay(15); + + /* deassert PLL_RESET */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK); + + mdelay(15); + + /* deassert BYPASS EN */ + WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK); + + if (rdev->family >= CHIP_RS780) + WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~UPLL_BYPASS_CNTL); + + r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL); + if (r) + return r; + + /* switch VCLK and DCLK selection */ + WREG32_P(CG_UPLL_FUNC_CNTL_2, + VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2), + ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK)); + + mdelay(100); + return 0; } diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 3df030dc2352..8c3fdd581a72 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1526,9 +1526,35 @@ #define UVD_CONTEXT_ID 0xf6f4 +/* rs780 only */ +#define GFX_MACRO_BYPASS_CNTL 0x30c0 +#define SPLL_BYPASS_CNTL (1 << 0) +#define UPLL_BYPASS_CNTL (1 << 1) + +#define CG_UPLL_FUNC_CNTL 0x7e0 +# define UPLL_RESET_MASK 0x00000001 +# define UPLL_SLEEP_MASK 0x00000002 +# define UPLL_BYPASS_EN_MASK 0x00000004 # define UPLL_CTLREQ_MASK 0x00000008 +# define UPLL_FB_DIV(x) ((x) << 4) +# define UPLL_FB_DIV_MASK 0x0000FFF0 +# define UPLL_REF_DIV(x) ((x) << 16) +# define UPLL_REF_DIV_MASK 0x003F0000 +# define UPLL_REFCLK_SRC_SEL_MASK 0x20000000 # define UPLL_CTLACK_MASK 0x40000000 # define UPLL_CTLACK2_MASK 0x80000000 +#define CG_UPLL_FUNC_CNTL_2 0x7e4 +# define UPLL_SW_HILEN(x) ((x) << 0) +# define UPLL_SW_LOLEN(x) ((x) << 4) +# define UPLL_SW_HILEN2(x) ((x) << 8) +# define UPLL_SW_LOLEN2(x) ((x) << 12) +# define UPLL_DIVEN_MASK 0x00010000 +# define UPLL_DIVEN2_MASK 0x00020000 +# define UPLL_SW_MASK 0x0003FFFF +# define VCLK_SRC_SEL(x) ((x) << 20) +# define VCLK_SRC_SEL_MASK 0x01F00000 +# define DCLK_SRC_SEL(x) ((x) << 25) +# define DCLK_SRC_SEL_MASK 0x3E000000 /* * PM4 From 856754c3a23a622d26a82b29fab6429481705511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 16 Apr 2013 22:11:22 +0200 Subject: [PATCH 190/640] drm/radeon: add UVD support for older asics v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: cleanup R600 support v3: rebased on current drm-fixes-3.12 v4: rebased on drm-next-3.14 Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600.c | 31 +++++++++++ drivers/gpu/drm/radeon/r600d.h | 8 +++ drivers/gpu/drm/radeon/radeon_asic.c | 15 ++++++ drivers/gpu/drm/radeon/radeon_asic.h | 3 ++ drivers/gpu/drm/radeon/uvd_v1_0.c | 77 ++++++++++++++++++++++++++++ drivers/gpu/drm/radeon/uvd_v2_2.c | 4 ++ 6 files changed, 138 insertions(+) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 011d97f6fc7f..14cb31e25c2f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3009,6 +3009,17 @@ static int r600_startup(struct radeon_device *rdev) return r; } + r = uvd_v1_0_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r); + } + } + if (r) { + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + } + /* Enable IRQ */ if (!rdev->irq.installed) { r = radeon_irq_kms_init(rdev); @@ -3037,6 +3048,16 @@ static int r600_startup(struct radeon_device *rdev) if (r) return r; + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + RADEON_CP_PACKET2); + if (!r) + r = uvd_v1_0_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + } + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -3096,6 +3117,8 @@ int r600_suspend(struct radeon_device *rdev) radeon_pm_suspend(rdev); r600_audio_fini(rdev); r600_cp_stop(rdev); + uvd_v1_0_fini(rdev); + radeon_uvd_suspend(rdev); r600_irq_suspend(rdev); radeon_wb_disable(rdev); r600_pcie_gart_disable(rdev); @@ -3175,6 +3198,12 @@ int r600_init(struct radeon_device *rdev) rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); + r = radeon_uvd_init(rdev); + if (!r) { + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -3204,6 +3233,8 @@ void r600_fini(struct radeon_device *rdev) r600_audio_fini(rdev); r600_cp_fini(rdev); r600_irq_fini(rdev); + uvd_v1_0_fini(rdev); + radeon_uvd_fini(rdev); radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 8c3fdd581a72..420bed19e139 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1485,6 +1485,7 @@ #define UVD_CGC_GATE 0xf4a8 #define UVD_LMI_CTRL2 0xf4f4 #define UVD_MASTINT_EN 0xf500 +#define UVD_FW_START 0xf51C #define UVD_LMI_ADDR_EXT 0xf594 #define UVD_LMI_CTRL 0xf598 #define UVD_LMI_SWAP_CNTL 0xf5b4 @@ -1497,6 +1498,13 @@ #define UVD_MPC_SET_MUX 0xf5f4 #define UVD_MPC_SET_ALU 0xf5f8 +#define UVD_VCPU_CACHE_OFFSET0 0xf608 +#define UVD_VCPU_CACHE_SIZE0 0xf60c +#define UVD_VCPU_CACHE_OFFSET1 0xf610 +#define UVD_VCPU_CACHE_SIZE1 0xf614 +#define UVD_VCPU_CACHE_OFFSET2 0xf618 +#define UVD_VCPU_CACHE_SIZE2 0xf61c + #define UVD_VCPU_CNTL 0xf660 #define UVD_SOFT_RESET 0xf680 #define RBC_SOFT_RESET (1<<0) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index eeeeabe09758..9e6699a9a0b4 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -965,6 +965,19 @@ static struct radeon_asic r600_asic = { }, }; +static struct radeon_asic_ring rv6xx_uvd_ring = { + .ib_execute = &uvd_v1_0_ib_execute, + .emit_fence = &uvd_v1_0_fence_emit, + .emit_semaphore = &uvd_v1_0_semaphore_emit, + .cs_parse = &radeon_uvd_cs_parse, + .ring_test = &uvd_v1_0_ring_test, + .ib_test = &uvd_v1_0_ib_test, + .is_lockup = &radeon_ring_test_lockup, + .get_rptr = &uvd_v1_0_get_rptr, + .get_wptr = &uvd_v1_0_get_wptr, + .set_wptr = &uvd_v1_0_set_wptr, +}; + static struct radeon_asic rv6xx_asic = { .init = &r600_init, .fini = &r600_fini, @@ -984,6 +997,7 @@ static struct radeon_asic rv6xx_asic = { .ring = { [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring, }, .irq = { .set = &r600_irq_set, @@ -1074,6 +1088,7 @@ static struct radeon_asic rs780_asic = { .ring = { [RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring, [R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring, + [R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring, }, .irq = { .set = &r600_irq_set, diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 275a5dc01780..987a3b713e06 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -883,6 +883,7 @@ uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void uvd_v1_0_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); +int uvd_v1_0_resume(struct radeon_device *rdev); int uvd_v1_0_init(struct radeon_device *rdev); void uvd_v1_0_fini(struct radeon_device *rdev); @@ -890,6 +891,8 @@ int uvd_v1_0_start(struct radeon_device *rdev); void uvd_v1_0_stop(struct radeon_device *rdev); int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); +void uvd_v1_0_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence); int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev, struct radeon_ring *ring, diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index cda391347286..62d7086f0e08 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -22,6 +22,7 @@ * Authors: Christian König */ +#include #include #include "radeon.h" #include "radeon_asic.h" @@ -69,6 +70,82 @@ void uvd_v1_0_set_wptr(struct radeon_device *rdev, WREG32(UVD_RBC_RB_WPTR, ring->wptr); } +/** + * uvd_v1_0_fence_emit - emit an fence & trap command + * + * @rdev: radeon_device pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +void uvd_v1_0_fence_emit(struct radeon_device *rdev, + struct radeon_fence *fence) +{ + struct radeon_ring *ring = &rdev->ring[fence->ring]; + uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr; + + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, addr & 0xffffffff); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, fence->seq); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 0); + + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0)); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0)); + radeon_ring_write(ring, 2); + return; +} + +/** + * uvd_v1_0_resume - memory controller programming + * + * @rdev: radeon_device pointer + * + * Let the UVD memory controller know it's offsets + */ +int uvd_v1_0_resume(struct radeon_device *rdev) +{ + uint64_t addr; + uint32_t size; + int r; + + r = radeon_uvd_resume(rdev); + if (r) + return r; + + /* programm the VCPU memory controller bits 0-27 */ + addr = (rdev->uvd.gpu_addr >> 3) + 16; + size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size) >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET0, addr); + WREG32(UVD_VCPU_CACHE_SIZE0, size); + + addr += size; + size = RADEON_UVD_STACK_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET1, addr); + WREG32(UVD_VCPU_CACHE_SIZE1, size); + + addr += size; + size = RADEON_UVD_HEAP_SIZE >> 3; + WREG32(UVD_VCPU_CACHE_OFFSET2, addr); + WREG32(UVD_VCPU_CACHE_SIZE2, size); + + /* bits 28-31 */ + addr = (rdev->uvd.gpu_addr >> 28) & 0xF; + WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0)); + + /* bits 32-39 */ + addr = (rdev->uvd.gpu_addr >> 32) & 0xFF; + WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + + WREG32(UVD_FW_START, *((uint32_t*)rdev->uvd.cpu_addr)); + + return 0; +} + /** * uvd_v1_0_init - start and test UVD block * diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c index 8bfdadd56598..89193519f8a1 100644 --- a/drivers/gpu/drm/radeon/uvd_v2_2.c +++ b/drivers/gpu/drm/radeon/uvd_v2_2.c @@ -72,6 +72,10 @@ int uvd_v2_2_resume(struct radeon_device *rdev) uint32_t chip_id, size; int r; + /* RV770 uses V1.0 MC */ + if (rdev->family == CHIP_RV770) + return uvd_v1_0_resume(rdev); + r = radeon_uvd_resume(rdev); if (r) return r; From 115365e8a33e4ce5e12bee7999568a26b4c33d3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 25 Apr 2013 09:02:14 +0200 Subject: [PATCH 191/640] drm/radeon: implement UVD hw workarounds for R6xx v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only the essentials, cause this hw generation is really buggy. v2: start supporting RV670,RV620 and RV635 as well v3: activate more workarounds Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600d.h | 3 +++ drivers/gpu/drm/radeon/uvd_v1_0.c | 26 +++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index 420bed19e139..671b48032a3d 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -330,6 +330,7 @@ #define HDP_TILING_CONFIG 0x2F3C #define HDP_DEBUG1 0x2F34 +#define MC_CONFIG 0x2000 #define MC_VM_AGP_TOP 0x2184 #define MC_VM_AGP_BOT 0x2188 #define MC_VM_AGP_BASE 0x218C @@ -375,6 +376,8 @@ #define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2194 #define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x2198 +#define RS_DQ_RD_RET_CONF 0x2348 + #define PA_CL_ENHANCE 0x8A14 #define CLIP_VTX_REORDER_ENA (1 << 0) #define NUM_CLIP_SEQ(x) ((x) << 1) diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index 62d7086f0e08..c3e182bc6c59 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -207,8 +207,32 @@ done: /* lower clocks again */ radeon_set_uvd_clocks(rdev, 0, 0); - if (!r) + if (!r) { + switch (rdev->family) { + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV620: + /* 64byte granularity workaround */ + WREG32(MC_CONFIG, 0); + WREG32(MC_CONFIG, 1 << 4); + WREG32(RS_DQ_RD_RET_CONF, 0x3f); + WREG32(MC_CONFIG, 0x1f); + + /* fall through */ + case CHIP_RV670: + case CHIP_RV635: + + /* write clean workaround */ + WREG32_P(UVD_VCPU_CNTL, 0x10, ~0x10); + break; + + default: + /* TODO: Do we need more? */ + break; + } + DRM_INFO("UVD initialized successfully.\n"); + } return r; } From bdc99722d007ed1db7188b09404bda080d1d737a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 Aug 2014 13:11:36 -0400 Subject: [PATCH 192/640] drm/radeon: 760G/780V/880V don't have UVD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't enable UVD on these asics as they don't have UVD hardware. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600.c | 57 ++++++++++++++++------------ drivers/gpu/drm/radeon/radeon_asic.c | 10 ++++- 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 14cb31e25c2f..a95ced569d84 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3009,15 +3009,16 @@ static int r600_startup(struct radeon_device *rdev) return r; } - r = uvd_v1_0_resume(rdev); - if (!r) { - r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); - if (r) { - dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r); + if (rdev->has_uvd) { + r = uvd_v1_0_resume(rdev); + if (!r) { + r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX); + if (r) { + dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r); + } } - } - if (r) { - rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + if (r) + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; } /* Enable IRQ */ @@ -3048,14 +3049,16 @@ static int r600_startup(struct radeon_device *rdev) if (r) return r; - ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; - if (ring->ring_size) { - r = radeon_ring_init(rdev, ring, ring->ring_size, 0, - RADEON_CP_PACKET2); - if (!r) - r = uvd_v1_0_init(rdev); - if (r) - DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + if (rdev->has_uvd) { + ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX]; + if (ring->ring_size) { + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + RADEON_CP_PACKET2); + if (!r) + r = uvd_v1_0_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); + } } r = radeon_ib_pool_init(rdev); @@ -3117,8 +3120,10 @@ int r600_suspend(struct radeon_device *rdev) radeon_pm_suspend(rdev); r600_audio_fini(rdev); r600_cp_stop(rdev); - uvd_v1_0_fini(rdev); - radeon_uvd_suspend(rdev); + if (rdev->has_uvd) { + uvd_v1_0_fini(rdev); + radeon_uvd_suspend(rdev); + } r600_irq_suspend(rdev); radeon_wb_disable(rdev); r600_pcie_gart_disable(rdev); @@ -3198,10 +3203,12 @@ int r600_init(struct radeon_device *rdev) rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); - r = radeon_uvd_init(rdev); - if (!r) { - rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; - r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096); + if (rdev->has_uvd) { + r = radeon_uvd_init(rdev); + if (!r) { + rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL; + r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096); + } } rdev->ih.ring_obj = NULL; @@ -3233,8 +3240,10 @@ void r600_fini(struct radeon_device *rdev) r600_audio_fini(rdev); r600_cp_fini(rdev); r600_irq_fini(rdev); - uvd_v1_0_fini(rdev); - radeon_uvd_fini(rdev); + if (rdev->has_uvd) { + uvd_v1_0_fini(rdev); + radeon_uvd_fini(rdev); + } radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 9e6699a9a0b4..d91f965e8219 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -2313,7 +2313,15 @@ int radeon_asic_init(struct radeon_device *rdev) case CHIP_RS780: case CHIP_RS880: rdev->asic = &rs780_asic; - rdev->has_uvd = true; + /* 760G/780V/880V don't have UVD */ + if ((rdev->pdev->device == 0x9616)|| + (rdev->pdev->device == 0x9611)|| + (rdev->pdev->device == 0x9613)|| + (rdev->pdev->device == 0x9711)|| + (rdev->pdev->device == 0x9713)) + rdev->has_uvd = false; + else + rdev->has_uvd = true; break; case CHIP_RV770: case CHIP_RV730: From 32517d59ebb3b6eb7a5a5736020072ce7e609e76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 09:59:45 +0200 Subject: [PATCH 193/640] drm/radeon: enable RB_ARB before resetting the VCPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes "UVD not responding, trying to reset the VCPU" messages on earlier ASICs. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/uvd_v1_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index c3e182bc6c59..e72b3cb59358 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -319,12 +319,12 @@ int uvd_v1_0_start(struct radeon_device *rdev) /* enable UMC */ WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8)); + WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); + /* boot up the VCPU */ WREG32(UVD_SOFT_RESET, 0); mdelay(10); - WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3)); - for (i = 0; i < 10; ++i) { uint32_t status; for (j = 0; j < 100; ++j) { From 14e935aeb0213e2ef522d0d8a14d2d29fb194956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 16 Apr 2013 22:11:37 +0200 Subject: [PATCH 194/640] drm/radeon: add UVD fw names for older asic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Activating the UVD support. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_uvd.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 341848a14376..a6ebaf0bda15 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -40,12 +40,18 @@ #define UVD_IDLE_TIMEOUT_MS 1000 /* Firmware Names */ +#define FIRMWARE_R600 "radeon/R600_uvd.bin" +#define FIRMWARE_RS780 "radeon/RS780_uvd.bin" +#define FIRMWARE_RV770 "radeon/RV770_uvd.bin" #define FIRMWARE_RV710 "radeon/RV710_uvd.bin" #define FIRMWARE_CYPRESS "radeon/CYPRESS_uvd.bin" #define FIRMWARE_SUMO "radeon/SUMO_uvd.bin" #define FIRMWARE_TAHITI "radeon/TAHITI_uvd.bin" #define FIRMWARE_BONAIRE "radeon/BONAIRE_uvd.bin" +MODULE_FIRMWARE(FIRMWARE_R600); +MODULE_FIRMWARE(FIRMWARE_RS780); +MODULE_FIRMWARE(FIRMWARE_RV770); MODULE_FIRMWARE(FIRMWARE_RV710); MODULE_FIRMWARE(FIRMWARE_CYPRESS); MODULE_FIRMWARE(FIRMWARE_SUMO); @@ -63,6 +69,23 @@ int radeon_uvd_init(struct radeon_device *rdev) INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler); switch (rdev->family) { + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + fw_name = FIRMWARE_R600; + break; + + case CHIP_RS780: + case CHIP_RS880: + fw_name = FIRMWARE_RS780; + break; + + case CHIP_RV770: + fw_name = FIRMWARE_RV770; + break; + case CHIP_RV710: case CHIP_RV730: case CHIP_RV740: From f0d970b4fd05cb7af89307bb17689c18c835d739 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 15:21:53 +0200 Subject: [PATCH 195/640] drm/radeon: wake up all fences on manual reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wake up all fences when we manually trigger a reset. Signed-off-by: Christian König Reviewed-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_fence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 913787085dfa..5bd837afb939 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -833,6 +833,7 @@ static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data) down_read(&rdev->exclusive_lock); seq_printf(m, "%d\n", rdev->needs_reset); rdev->needs_reset = true; + wake_up_all(&rdev->fence_queue); up_read(&rdev->exclusive_lock); return 0; From eb98c709907c7a78b9cd0d18642477d47d348f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 15:21:56 +0200 Subject: [PATCH 196/640] drm/radeon: force fence completion only on problematic rings (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of resetting all fence numbers, only reset the number of the problematic ring. Split out from a patch from Maarten Lankhorst v2 (agd5f): rebase build fix Signed-off-by: Christian König Reviewed-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_device.c | 8 ++------ drivers/gpu/drm/radeon/radeon_fence.c | 12 ++++-------- drivers/gpu/drm/radeon/radeon_ib.c | 1 + 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b321ad4dcafd..5f967c0b5aa3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -371,7 +371,7 @@ struct radeon_fence { int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); -void radeon_fence_driver_force_completion(struct radeon_device *rdev); +void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring); int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b6aee40e6ef3..8b442e4ab1da 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1488,7 +1488,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) struct drm_crtc *crtc; struct drm_connector *connector; int i, r; - bool force_completion = false; if (dev == NULL || dev->dev_private == NULL) { return -ENODEV; @@ -1532,12 +1531,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) r = radeon_fence_wait_empty(rdev, i); if (r) { /* delay GPU reset to resume */ - force_completion = true; + radeon_fence_driver_force_completion(rdev, i); } } - if (force_completion) { - radeon_fence_driver_force_completion(rdev); - } radeon_save_bios_scratch_regs(rdev); @@ -1722,8 +1718,8 @@ retry: } } } else { - radeon_fence_driver_force_completion(rdev); for (i = 0; i < RADEON_NUM_RINGS; ++i) { + radeon_fence_driver_force_completion(rdev, i); kfree(ring_data[i]); } } diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 5bd837afb939..e8a28e7b39c7 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -758,7 +758,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) r = radeon_fence_wait_empty(rdev, ring); if (r) { /* no need to trigger GPU reset as we are unloading */ - radeon_fence_driver_force_completion(rdev); + radeon_fence_driver_force_completion(rdev, ring); } wake_up_all(&rdev->fence_queue); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); @@ -771,19 +771,15 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) * radeon_fence_driver_force_completion - force all fence waiter to complete * * @rdev: radeon device pointer + * @ring: the ring to complete * * In case of GPU reset failure make sure no process keep waiting on fence * that will never complete. */ -void radeon_fence_driver_force_completion(struct radeon_device *rdev) +void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring) { - int ring; - - for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { - if (!rdev->fence_drv[ring].initialized) - continue; + if (rdev->fence_drv[ring].initialized) radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); - } } diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 5bf2c0a05827..6fc7461d70c4 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -269,6 +269,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev) r = radeon_ib_test(rdev, i, ring); if (r) { + radeon_fence_driver_force_completion(rdev, i); ring->ready = false; rdev->needs_reset = false; From 9bb39ff43e15e85bc1bd9bbbdc5b9cef7a670fd5 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 27 Aug 2014 16:45:18 -0400 Subject: [PATCH 197/640] drm/radeon: take exclusive_lock in read mode during ring tests, v5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is needed for the next commit, because the lockup detection will need the read lock to run. v4 (chk): split out forced fence completion, remove unrelated changes, add and handle in_reset flag v5 (agd5f): rebase fix Signed-off-by: Maarten Lankhorst Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_cs.c | 7 +++++ drivers/gpu/drm/radeon/radeon_device.c | 37 +++++++++++-------------- drivers/gpu/drm/radeon/radeon_display.c | 4 ++- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5f967c0b5aa3..8cd1b3f60d4a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2326,7 +2326,7 @@ struct radeon_device { bool need_dma32; bool accel_working; bool fastfb_working; /* IGP feature*/ - bool needs_reset; + bool needs_reset, in_reset; struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES]; const struct firmware *me_fw; /* all family ME firmware */ const struct firmware *pfp_fw; /* r6/700 PFP firmware */ diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index db739bd64f16..bd328cb6fa61 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -653,6 +653,13 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) up_read(&rdev->exclusive_lock); return -EBUSY; } + if (rdev->in_reset) { + up_read(&rdev->exclusive_lock); + r = radeon_gpu_reset(rdev); + if (!r) + r = -EAGAIN; + return r; + } /* initialize parser */ memset(&parser, 0, sizeof(struct radeon_cs_parser)); parser.filp = filp; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 8b442e4ab1da..9f666370b5ac 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1673,6 +1673,7 @@ int radeon_gpu_reset(struct radeon_device *rdev) return 0; } + rdev->in_reset = true; rdev->needs_reset = false; radeon_save_bios_scratch_regs(rdev); @@ -1691,7 +1692,6 @@ int radeon_gpu_reset(struct radeon_device *rdev) } } -retry: r = radeon_asic_reset(rdev); if (!r) { dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n"); @@ -1700,25 +1700,11 @@ retry: radeon_restore_bios_scratch_regs(rdev); - if (!r) { - for (i = 0; i < RADEON_NUM_RINGS; ++i) { + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!r && ring_data[i]) { radeon_ring_restore(rdev, &rdev->ring[i], ring_sizes[i], ring_data[i]); - ring_sizes[i] = 0; - ring_data[i] = NULL; - } - - r = radeon_ib_ring_tests(rdev); - if (r) { - dev_err(rdev->dev, "ib ring test failed (%d).\n", r); - if (saved) { - saved = false; - radeon_suspend(rdev); - goto retry; - } - } - } else { - for (i = 0; i < RADEON_NUM_RINGS; ++i) { + } else { radeon_fence_driver_force_completion(rdev, i); kfree(ring_data[i]); } @@ -1751,19 +1737,28 @@ retry: /* reset hpd state */ radeon_hpd_init(rdev); + ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); + downgrade_write(&rdev->exclusive_lock); + drm_helper_resume_force_mode(rdev->ddev); /* set the power state here in case we are a PX system or headless */ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) radeon_pm_compute_clocks(rdev); - ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); - if (r) { + if (!r) { + r = radeon_ib_ring_tests(rdev); + if (r && saved) + r = -EAGAIN; + } else { /* bad news, how to tell it to userspace ? */ dev_info(rdev->dev, "GPU reset failed\n"); } - up_write(&rdev->exclusive_lock); + rdev->needs_reset = r == -EAGAIN; + rdev->in_reset = false; + + up_read(&rdev->exclusive_lock); return r; } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 3fdf87318069..bd0d687379ee 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work) r = radeon_fence_wait(work->fence, false); if (r == -EDEADLK) { up_read(&rdev->exclusive_lock); - r = radeon_gpu_reset(rdev); + do { + r = radeon_gpu_reset(rdev); + } while (r == -EAGAIN); down_read(&rdev->exclusive_lock); } if (r) From 0bfa4b41268ad5fd741f16f484e4fee190822ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 15:21:58 +0200 Subject: [PATCH 198/640] drm/radeon: handle lockup in delayed work, v5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v5 (chk): complete rework, start when the first fence is emitted, stop when the last fence is signalled, make it work correctly with GPU resets, cleanup radeon_fence_wait_seq Signed-off-by: Maarten Lankhorst Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 2 + drivers/gpu/drm/radeon/radeon_fence.c | 208 ++++++++++++++++---------- 2 files changed, 128 insertions(+), 82 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 8cd1b3f60d4a..74919ef57ac3 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -350,6 +350,7 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw, * Fences. */ struct radeon_fence_driver { + struct radeon_device *rdev; uint32_t scratch_reg; uint64_t gpu_addr; volatile uint32_t *cpu_addr; @@ -357,6 +358,7 @@ struct radeon_fence_driver { uint64_t sync_seq[RADEON_NUM_RINGS]; atomic64_t last_seq; bool initialized; + struct delayed_work lockup_work; }; struct radeon_fence { diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index e8a28e7b39c7..ac15f3418478 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -97,6 +97,25 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring) return seq; } +/** + * radeon_fence_schedule_check - schedule lockup check + * + * @rdev: radeon_device pointer + * @ring: ring index we should work with + * + * Queues a delayed work item to check for lockups. + */ +static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring) +{ + /* + * Do not reset the timer here with mod_delayed_work, + * this can livelock in an interaction with TTM delayed destroy. + */ + queue_delayed_work(system_power_efficient_wq, + &rdev->fence_drv[ring].lockup_work, + RADEON_FENCE_JIFFIES_TIMEOUT); +} + /** * radeon_fence_emit - emit a fence on the requested ring * @@ -122,19 +141,21 @@ int radeon_fence_emit(struct radeon_device *rdev, (*fence)->ring = ring; radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); + radeon_fence_schedule_check(rdev, ring); return 0; } /** - * radeon_fence_process - process a fence + * radeon_fence_activity - check for fence activity * * @rdev: radeon_device pointer * @ring: ring index the fence is associated with * - * Checks the current fence value and wakes the fence queue - * if the sequence number has increased (all asics). + * Checks the current fence value and calculates the last + * signalled fence value. Returns true if activity occured + * on the ring, and the fence_queue should be waken up. */ -void radeon_fence_process(struct radeon_device *rdev, int ring) +static bool radeon_fence_activity(struct radeon_device *rdev, int ring) { uint64_t seq, last_seq, last_emitted; unsigned count_loop = 0; @@ -190,7 +211,67 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) } } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq); - if (wake) + if (seq < last_emitted) + radeon_fence_schedule_check(rdev, ring); + + return wake; +} + +/** + * radeon_fence_check_lockup - check for hardware lockup + * + * @work: delayed work item + * + * Checks for fence activity and if there is none probe + * the hardware if a lockup occured. + */ +static void radeon_fence_check_lockup(struct work_struct *work) +{ + struct radeon_fence_driver *fence_drv; + struct radeon_device *rdev; + int ring; + + fence_drv = container_of(work, struct radeon_fence_driver, + lockup_work.work); + rdev = fence_drv->rdev; + ring = fence_drv - &rdev->fence_drv[0]; + + if (!down_read_trylock(&rdev->exclusive_lock)) { + /* just reschedule the check if a reset is going on */ + radeon_fence_schedule_check(rdev, ring); + return; + } + + if (radeon_fence_activity(rdev, ring)) + wake_up_all(&rdev->fence_queue); + + else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) { + + /* good news we believe it's a lockup */ + dev_warn(rdev->dev, "GPU lockup (current fence id " + "0x%016llx last fence id 0x%016llx on ring %d)\n", + (uint64_t)atomic64_read(&fence_drv->last_seq), + fence_drv->sync_seq[ring], ring); + + /* remember that we need an reset */ + rdev->needs_reset = true; + wake_up_all(&rdev->fence_queue); + } + up_read(&rdev->exclusive_lock); +} + +/** + * radeon_fence_process - process a fence + * + * @rdev: radeon_device pointer + * @ring: ring index the fence is associated with + * + * Checks the current fence value and wakes the fence queue + * if the sequence number has increased (all asics). + */ +void radeon_fence_process(struct radeon_device *rdev, int ring) +{ + if (radeon_fence_activity(rdev, ring)) wake_up_all(&rdev->fence_queue); } @@ -300,86 +381,43 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, bool intr) { - uint64_t last_seq[RADEON_NUM_RINGS]; - bool signaled; - int i, r; + long r; + int i; - while (!radeon_fence_any_seq_signaled(rdev, target_seq)) { + if (radeon_fence_any_seq_signaled(rdev, target_seq)) + return 0; - /* Save current sequence values, used to check for GPU lockups */ - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; + /* enable IRQs and tracing */ + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!target_seq[i]) + continue; - last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq); - trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); - radeon_irq_kms_sw_irq_get(rdev, i); - } - - if (intr) { - r = wait_event_interruptible_timeout(rdev->fence_queue, ( - (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)) - || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT); - } else { - r = wait_event_timeout(rdev->fence_queue, ( - (signaled = radeon_fence_any_seq_signaled(rdev, target_seq)) - || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT); - } - - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; - - radeon_irq_kms_sw_irq_put(rdev, i); - trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); - } - - if (unlikely(r < 0)) - return r; - - if (unlikely(!signaled)) { - if (rdev->needs_reset) - return -EDEADLK; - - /* we were interrupted for some reason and fence - * isn't signaled yet, resume waiting */ - if (r) - continue; - - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; - - if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq)) - break; - } - - if (i != RADEON_NUM_RINGS) - continue; - - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (!target_seq[i]) - continue; - - if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i])) - break; - } - - if (i < RADEON_NUM_RINGS) { - /* good news we believe it's a lockup */ - dev_warn(rdev->dev, "GPU lockup (waiting for " - "0x%016llx last fence id 0x%016llx on" - " ring %d)\n", - target_seq[i], last_seq[i], i); - - /* remember that we need an reset */ - rdev->needs_reset = true; - wake_up_all(&rdev->fence_queue); - return -EDEADLK; - } - } + trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]); + radeon_irq_kms_sw_irq_get(rdev, i); } - return 0; + + if (intr) { + r = wait_event_interruptible_timeout(rdev->fence_queue, ( + radeon_fence_any_seq_signaled(rdev, target_seq) + || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT); + } else { + r = wait_event_timeout(rdev->fence_queue, ( + radeon_fence_any_seq_signaled(rdev, target_seq) + || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT); + } + + if (rdev->needs_reset) + r = -EDEADLK; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + if (!target_seq[i]) + continue; + + radeon_irq_kms_sw_irq_put(rdev, i); + trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); + } + + return r < 0 ? r : 0; } /** @@ -711,6 +749,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) rdev->fence_drv[ring].sync_seq[i] = 0; atomic64_set(&rdev->fence_drv[ring].last_seq, 0); rdev->fence_drv[ring].initialized = false; + INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work, + radeon_fence_check_lockup); + rdev->fence_drv[ring].rdev = rdev; } /** @@ -760,6 +801,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) /* no need to trigger GPU reset as we are unloading */ radeon_fence_driver_force_completion(rdev, ring); } + cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work); wake_up_all(&rdev->fence_queue); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); rdev->fence_drv[ring].initialized = false; @@ -778,8 +820,10 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) */ void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring) { - if (rdev->fence_drv[ring].initialized) + if (rdev->fence_drv[ring].initialized) { radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); + cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work); + } } From 9867d00dbaef42e346e5d12eaa9591b057fea6d8 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 27 Aug 2014 15:21:59 +0200 Subject: [PATCH 199/640] drm/radeon: add timeout argument to radeon_fence_wait_seq v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes it possible to wait for a specific amount of time, rather than wait until infinity. v2 (chk): rebased on other changes Signed-off-by: Maarten Lankhorst Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_fence.c | 48 ++++++++++++++++----------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index ac15f3418478..a54bfd60510b 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -364,28 +364,31 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq) } /** - * radeon_fence_wait_seq - wait for a specific sequence numbers + * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers * * @rdev: radeon device pointer * @target_seq: sequence number(s) we want to wait for * @intr: use interruptable sleep + * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait * * Wait for the requested sequence number(s) to be written by any ring * (all asics). Sequnce number array is indexed by ring id. * @intr selects whether to use interruptable (true) or non-interruptable * (false) sleep when waiting for the sequence number. Helper function * for radeon_fence_wait_*(). - * Returns 0 if the sequence number has passed, error for all other cases. + * Returns remaining time if the sequence number has passed, 0 when + * the wait timeout, or an error for all other cases. * -EDEADLK is returned when a GPU lockup has been detected. */ -static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, - bool intr) +static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, + u64 *target_seq, bool intr, + long timeout) { long r; int i; if (radeon_fence_any_seq_signaled(rdev, target_seq)) - return 0; + return timeout; /* enable IRQs and tracing */ for (i = 0; i < RADEON_NUM_RINGS; ++i) { @@ -399,11 +402,11 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, if (intr) { r = wait_event_interruptible_timeout(rdev->fence_queue, ( radeon_fence_any_seq_signaled(rdev, target_seq) - || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT); + || rdev->needs_reset), timeout); } else { r = wait_event_timeout(rdev->fence_queue, ( radeon_fence_any_seq_signaled(rdev, target_seq) - || rdev->needs_reset), MAX_SCHEDULE_TIMEOUT); + || rdev->needs_reset), timeout); } if (rdev->needs_reset) @@ -417,14 +420,14 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]); } - return r < 0 ? r : 0; + return r; } /** * radeon_fence_wait - wait for a fence to signal * * @fence: radeon fence object - * @intr: use interruptable sleep + * @intr: use interruptible sleep * * Wait for the requested fence to signal (all asics). * @intr selects whether to use interruptable (true) or non-interruptable @@ -434,7 +437,7 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq, int radeon_fence_wait(struct radeon_fence *fence, bool intr) { uint64_t seq[RADEON_NUM_RINGS] = {}; - int r; + long r; if (fence == NULL) { WARN(1, "Querying an invalid fence : %p !\n", fence); @@ -445,9 +448,10 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) return 0; - r = radeon_fence_wait_seq(fence->rdev, seq, intr); - if (r) + r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { return r; + } fence->seq = RADEON_FENCE_SIGNALED_SEQ; return 0; @@ -472,7 +476,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, { uint64_t seq[RADEON_NUM_RINGS]; unsigned i, num_rings = 0; - int r; + long r; for (i = 0; i < RADEON_NUM_RINGS; ++i) { seq[i] = 0; @@ -493,8 +497,8 @@ int radeon_fence_wait_any(struct radeon_device *rdev, if (num_rings == 0) return -ENOENT; - r = radeon_fence_wait_seq(rdev, seq, intr); - if (r) { + r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { return r; } return 0; @@ -513,6 +517,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev, int radeon_fence_wait_next(struct radeon_device *rdev, int ring) { uint64_t seq[RADEON_NUM_RINGS] = {}; + long r; seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) { @@ -520,7 +525,10 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) already the last emited fence */ return -ENOENT; } - return radeon_fence_wait_seq(rdev, seq, false); + r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; + return 0; } /** @@ -536,18 +544,18 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring) int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) { uint64_t seq[RADEON_NUM_RINGS] = {}; - int r; + long r; seq[ring] = rdev->fence_drv[ring].sync_seq[ring]; if (!seq[ring]) return 0; - r = radeon_fence_wait_seq(rdev, seq, false); - if (r) { + r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { if (r == -EDEADLK) return -EDEADLK; - dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n", + dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n", ring, r); } return 0; From d6d5c5b8364bcc4d52cddc68bcb0a330d2af20f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 15:22:00 +0200 Subject: [PATCH 200/640] drm/radeon: drop RADEON_FENCE_SIGNALED_SEQ v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's causing issues with VMID handling and comparing the fence value two times actually doesn't make handling faster. v2: rebased on reset changes Signed-off-by: Christian König Reviewed-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 3 --- drivers/gpu/drm/radeon/radeon_fence.c | 18 ++---------------- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 74919ef57ac3..896a84779650 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -120,9 +120,6 @@ extern int radeon_bapm; #define RADEONFB_CONN_LIMIT 4 #define RADEON_BIOS_NUM_SCRATCH 8 -/* fence seq are set to this number when signaled */ -#define RADEON_FENCE_SIGNALED_SEQ 0LL - /* internal ring indices */ /* r1xx+ has gfx CP ring */ #define RADEON_RING_TYPE_GFX_INDEX 0 diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index a54bfd60510b..ecdba3afa2c3 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -328,16 +328,10 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, */ bool radeon_fence_signaled(struct radeon_fence *fence) { - if (!fence) { + if (!fence) return true; - } - if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { + if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) return true; - } - if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { - fence->seq = RADEON_FENCE_SIGNALED_SEQ; - return true; - } return false; } @@ -445,15 +439,11 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) } seq[fence->ring] = fence->seq; - if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ) - return 0; - r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); if (r < 0) { return r; } - fence->seq = RADEON_FENCE_SIGNALED_SEQ; return 0; } @@ -487,10 +477,6 @@ int radeon_fence_wait_any(struct radeon_device *rdev, seq[i] = fences[i]->seq; ++num_rings; - - /* test if something was allready signaled */ - if (seq[i] == RADEON_FENCE_SIGNALED_SEQ) - return 0; } /* nothing to wait for ? */ From 3c0363891c0fa5d17b683b758bff0d81fa6a9775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 27 Aug 2014 15:22:01 +0200 Subject: [PATCH 201/640] drm/radeon: drop doing resets in a work item MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Blocking completely innocent processes with a GPU reset is a pretty bad idea. Just set needs_reset and let the next command submission or fence wait do the job. Signed-off-by: Christian König Reviewed-by: Maarten Lankhorst Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 6 ++++-- drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_device.c | 7 ++++--- drivers/gpu/drm/radeon/radeon_irq_kms.c | 18 ------------------ 4 files changed, 8 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 79a5a5519bd6..1f598ab3b9a7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -8246,8 +8246,10 @@ restart_ih: } if (queue_hotplug) schedule_work(&rdev->hotplug_work); - if (queue_reset) - schedule_work(&rdev->reset_work); + if (queue_reset) { + rdev->needs_reset = true; + wake_up_all(&rdev->fence_queue); + } if (queue_thermal) schedule_work(&rdev->pm.dpm.thermal.work); rdev->ih.rptr = rptr; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 896a84779650..c163b4f89149 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2346,7 +2346,6 @@ struct radeon_device { struct radeon_mec mec; struct work_struct hotplug_work; struct work_struct audio_work; - struct work_struct reset_work; int num_crtc; /* number of crtcs */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */ bool has_uvd; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 9f666370b5ac..d30f1cc1aa12 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1673,9 +1673,6 @@ int radeon_gpu_reset(struct radeon_device *rdev) return 0; } - rdev->in_reset = true; - rdev->needs_reset = false; - radeon_save_bios_scratch_regs(rdev); /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); @@ -1738,6 +1735,10 @@ int radeon_gpu_reset(struct radeon_device *rdev) radeon_hpd_init(rdev); ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); + + rdev->in_reset = true; + rdev->needs_reset = false; + downgrade_write(&rdev->exclusive_lock); drm_helper_resume_force_mode(rdev->ddev); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 16807afab362..f0bff4be67f1 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -87,23 +87,6 @@ static void radeon_hotplug_work_func(struct work_struct *work) drm_helper_hpd_irq_event(dev); } -/** - * radeon_irq_reset_work_func - execute gpu reset - * - * @work: work struct - * - * Execute scheduled gpu reset (cayman+). - * This function is called when the irq handler - * thinks we need a gpu reset. - */ -static void radeon_irq_reset_work_func(struct work_struct *work) -{ - struct radeon_device *rdev = container_of(work, struct radeon_device, - reset_work); - - radeon_gpu_reset(rdev); -} - /** * radeon_driver_irq_preinstall_kms - drm irq preinstall callback * @@ -284,7 +267,6 @@ int radeon_irq_kms_init(struct radeon_device *rdev) INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi); - INIT_WORK(&rdev->reset_work, radeon_irq_reset_work_func); rdev->irq.installed = true; r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq); From 3852752ca89ca00aa13f12a9b9450fd97ff437d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 21 Aug 2014 12:18:12 +0200 Subject: [PATCH 202/640] drm/radeon: allow UVD to use a second 256MB segment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This improves concurrent stream decoding. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon.h | 3 ++- drivers/gpu/drm/radeon/radeon_object.c | 5 +++-- drivers/gpu/drm/radeon/radeon_uvd.c | 20 ++++++++++++++++++-- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index a5ac95bf05ea..83a24614138a 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1642,7 +1642,8 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence); -void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo); +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, + uint32_t allowed_domains); void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp); int radeon_uvd_cs_parse(struct radeon_cs_parser *parser); diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 0129c7efae3b..c97a42432e2b 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -491,6 +491,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, bo = lobj->robj; if (!bo->pin_count) { u32 domain = lobj->prefered_domains; + u32 allowed = lobj->allowed_domains; u32 current_domain = radeon_mem_type_to_domain(bo->tbo.mem.mem_type); @@ -502,7 +503,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, * into account. We don't want to disallow buffer moves * completely. */ - if ((lobj->allowed_domains & current_domain) != 0 && + if ((allowed & current_domain) != 0 && (domain & current_domain) == 0 && /* will be moved */ bytes_moved > bytes_moved_threshold) { /* don't move it */ @@ -512,7 +513,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, retry: radeon_ttm_placement_from_domain(bo, domain); if (ring == R600_RING_TYPE_UVD_INDEX) - radeon_uvd_force_into_uvd_segment(bo); + radeon_uvd_force_into_uvd_segment(bo, allowed); initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 464d80145dfe..1dedadd8f5df 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -254,7 +254,8 @@ int radeon_uvd_resume(struct radeon_device *rdev) return 0; } -void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) +void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo, + uint32_t allowed_domains) { int i; @@ -262,6 +263,21 @@ void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo) rbo->placements[i].fpfn = 0 >> PAGE_SHIFT; rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; } + + /* If it must be in VRAM it must be in the first segment as well */ + if (allowed_domains == RADEON_GEM_DOMAIN_VRAM) + return; + + /* abort if we already have more than one placement */ + if (rbo->placement.num_placement > 1) + return; + + /* add another 256MB segment */ + rbo->placements[1] = rbo->placements[0]; + rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; + rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT; + rbo->placement.num_placement++; + rbo->placement.num_busy_placement++; } void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp) @@ -652,7 +668,7 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, return r; radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); - radeon_uvd_force_into_uvd_segment(bo); + radeon_uvd_force_into_uvd_segment(bo, RADEON_GEM_DOMAIN_VRAM); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); if (r) From feba9b0bcf492ba991d7fbfc211dd49ebbc95a4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 22 Aug 2014 14:25:55 +0200 Subject: [PATCH 203/640] drm/radeon: preallocate mem for UVD create/destroy msg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit llocating memory for UVD create and destroy messages can fail, which is rather annoying when this happens in the middle of a GPU reset. Try to avoid this condition by preallocating a page for those dummy messages. Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_uvd.c | 101 +++++++--------------------- 1 file changed, 26 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 1dedadd8f5df..5729e9bebd9d 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -138,7 +138,8 @@ int radeon_uvd_init(struct radeon_device *rdev) } bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) + - RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE; + RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + + RADEON_GPU_PAGE_SIZE; r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->uvd.vcpu_bo); if (r) { @@ -647,38 +648,16 @@ int radeon_uvd_cs_parse(struct radeon_cs_parser *p) } static int radeon_uvd_send_msg(struct radeon_device *rdev, - int ring, struct radeon_bo *bo, + int ring, uint64_t addr, struct radeon_fence **fence) { - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head head; struct radeon_ib ib; - uint64_t addr; int i, r; - memset(&tv, 0, sizeof(tv)); - tv.bo = &bo->tbo; - - INIT_LIST_HEAD(&head); - list_add(&tv.head, &head); - - r = ttm_eu_reserve_buffers(&ticket, &head); - if (r) - return r; - - radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM); - radeon_uvd_force_into_uvd_segment(bo, RADEON_GEM_DOMAIN_VRAM); - - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - if (r) - goto err; - r = radeon_ib_get(rdev, ring, &ib, NULL, 64); if (r) - goto err; + return r; - addr = radeon_bo_gpu_offset(bo); ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0); ib.ptr[1] = addr; ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0); @@ -690,19 +669,11 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, ib.length_dw = 16; r = radeon_ib_schedule(rdev, &ib, NULL, false); - if (r) - goto err; - ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); if (fence) *fence = radeon_fence_ref(ib.fence); radeon_ib_free(rdev, &ib); - radeon_bo_unref(&bo); - return 0; - -err: - ttm_eu_backoff_reservation(&ticket, &head); return r; } @@ -712,28 +683,19 @@ err: int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence) { - struct radeon_bo *bo; - uint32_t *msg; + /* we use the last page of the vcpu bo for the UVD message */ + uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - + RADEON_GPU_PAGE_SIZE; + + uint32_t *msg = rdev->uvd.cpu_addr + offs; + uint64_t addr = rdev->uvd.gpu_addr + offs; + int r, i; - r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo); + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); if (r) return r; - r = radeon_bo_reserve(bo, false); - if (r) { - radeon_bo_unref(&bo); - return r; - } - - r = radeon_bo_kmap(bo, (void **)&msg); - if (r) { - radeon_bo_unreserve(bo); - radeon_bo_unref(&bo); - return r; - } - /* stitch together an UVD create msg */ msg[0] = cpu_to_le32(0x00000de4); msg[1] = cpu_to_le32(0x00000000); @@ -749,37 +711,27 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, for (i = 11; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - radeon_bo_kunmap(bo); - radeon_bo_unreserve(bo); - - return radeon_uvd_send_msg(rdev, ring, bo, fence); + r = radeon_uvd_send_msg(rdev, ring, addr, fence); + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + return r; } int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, uint32_t handle, struct radeon_fence **fence) { - struct radeon_bo *bo; - uint32_t *msg; + /* we use the last page of the vcpu bo for the UVD message */ + uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - + RADEON_GPU_PAGE_SIZE; + + uint32_t *msg = rdev->uvd.cpu_addr + offs; + uint64_t addr = rdev->uvd.gpu_addr + offs; + int r, i; - r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo); + r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true); if (r) return r; - r = radeon_bo_reserve(bo, false); - if (r) { - radeon_bo_unref(&bo); - return r; - } - - r = radeon_bo_kmap(bo, (void **)&msg); - if (r) { - radeon_bo_unreserve(bo); - radeon_bo_unref(&bo); - return r; - } - /* stitch together an UVD destroy msg */ msg[0] = cpu_to_le32(0x00000de4); msg[1] = cpu_to_le32(0x00000002); @@ -788,10 +740,9 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, for (i = 4; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - radeon_bo_kunmap(bo); - radeon_bo_unreserve(bo); - - return radeon_uvd_send_msg(rdev, ring, bo, fence); + r = radeon_uvd_send_msg(rdev, ring, addr, fence); + radeon_bo_unreserve(rdev->uvd.vcpu_bo); + return r; } /** From d0b3c3b6c26c6eed1ba3fa37242dfc8942b5e997 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 21 Jan 2014 13:00:24 +0100 Subject: [PATCH 204/640] drm/nouveau: add reservation to nouveau_gem_ioctl_cpu_prep Apart from some code inside ttm itself and nouveau_bo_vma_del, this is the only place where ttm_bo_wait is used without a reservation. Fix this so we can remove the fence_lock later on. After the switch to rcu the reservation lock will be removed again. Signed-off-by: Maarten Lankhorst Acked-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_gem.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 292a677bfed4..0054315eb879 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -884,17 +884,31 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, struct drm_gem_object *gem; struct nouveau_bo *nvbo; bool no_wait = !!(req->flags & NOUVEAU_GEM_CPU_PREP_NOWAIT); - int ret = -EINVAL; + int ret; + struct nouveau_fence *fence = NULL; gem = drm_gem_object_lookup(dev, file_priv, req->handle); if (!gem) return -ENOENT; nvbo = nouveau_gem_object(gem); - spin_lock(&nvbo->bo.bdev->fence_lock); - ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait); - spin_unlock(&nvbo->bo.bdev->fence_lock); + ret = ttm_bo_reserve(&nvbo->bo, true, false, false, NULL); + if (!ret) { + spin_lock(&nvbo->bo.bdev->fence_lock); + ret = ttm_bo_wait(&nvbo->bo, true, true, true); + if (!no_wait && ret) + fence = nouveau_fence_ref(nvbo->bo.sync_obj); + spin_unlock(&nvbo->bo.bdev->fence_lock); + + ttm_bo_unreserve(&nvbo->bo); + } drm_gem_object_unreference_unlocked(gem); + + if (fence) { + ret = nouveau_fence_wait(fence, true, no_wait); + nouveau_fence_unref(&fence); + } + return ret; } From bdaf7ddf652ef51fd363b052e320711c06f6f553 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 21 Jan 2014 13:02:14 +0100 Subject: [PATCH 205/640] drm/nouveau: require reservations for nouveau_fence_sync and nouveau_bo_fence This will ensure we always hold the required lock when calling those functions. Signed-off-by: Maarten Lankhorst Acked-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_bo.c | 2 ++ drivers/gpu/drm/nouveau/nouveau_display.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0591ca0734e3..ed966f51e29b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1463,6 +1463,8 @@ nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence) struct nouveau_fence *new_fence = nouveau_fence_ref(fence); struct nouveau_fence *old_fence = NULL; + lockdep_assert_held(&nvbo->bo.resv->lock.base); + spin_lock(&nvbo->bo.bdev->fence_lock); old_fence = nvbo->bo.sync_obj; nvbo->bo.sync_obj = new_fence; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 65b4fd53dd4e..54b1f3d8fc7f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -717,6 +717,9 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, } mutex_lock(&cli->mutex); + ret = ttm_bo_reserve(&new_bo->bo, true, false, false, NULL); + if (ret) + goto fail_unpin; /* synchronise rendering channel with the kernel's channel */ spin_lock(&new_bo->bo.bdev->fence_lock); @@ -724,12 +727,18 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, spin_unlock(&new_bo->bo.bdev->fence_lock); ret = nouveau_fence_sync(fence, chan); nouveau_fence_unref(&fence); - if (ret) + if (ret) { + ttm_bo_unreserve(&new_bo->bo); goto fail_unpin; + } - ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL); - if (ret) - goto fail_unpin; + if (new_bo != old_bo) { + ttm_bo_unreserve(&new_bo->bo); + + ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL); + if (ret) + goto fail_unpin; + } /* Initialize a page flip struct */ *s = (struct nouveau_page_flip_state) From 7040138ff85501931138970663a988f48c0666f0 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 21 Jan 2014 13:07:01 +0100 Subject: [PATCH 206/640] drm/ttm: call ttm_bo_wait while inside a reservation This is the last remaining function that doesn't use the reservation lock completely to fence off access to a buffer. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/ttm/ttm_bo.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index b992ec3c318a..4f1bc948bda0 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -503,17 +503,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, if (ret) return ret; - /* - * remove sync_obj with ttm_bo_wait, the wait should be - * finished, and no new wait object should have been added. - */ - spin_lock(&bdev->fence_lock); - ret = ttm_bo_wait(bo, false, false, true); - WARN_ON(ret); - spin_unlock(&bdev->fence_lock); - if (ret) - return ret; - spin_lock(&glob->lru_lock); ret = __ttm_bo_reserve(bo, false, true, false, NULL); @@ -529,8 +518,16 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, spin_unlock(&glob->lru_lock); return 0; } - } else - spin_unlock(&bdev->fence_lock); + + /* + * remove sync_obj with ttm_bo_wait, the wait should be + * finished, and no new wait object should have been added. + */ + spin_lock(&bdev->fence_lock); + ret = ttm_bo_wait(bo, false, false, true); + WARN_ON(ret); + } + spin_unlock(&bdev->fence_lock); if (ret || unlikely(list_empty(&bo->ddestroy))) { __ttm_bo_unreserve(bo); @@ -1524,6 +1521,8 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, void *sync_obj; int ret = 0; + lockdep_assert_held(&bo->resv->lock.base); + if (likely(bo->sync_obj == NULL)) return 0; From dd7cfd641228abb2669d8d047d5ec377b1835900 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 21 Jan 2014 13:07:31 +0100 Subject: [PATCH 207/640] drm/ttm: kill fence_lock No users are left, kill it off! :D Conversion to the reservation api is next on the list, after that the functionality can be restored with rcu. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/nouveau/nouveau_bo.c | 27 +++----- drivers/gpu/drm/nouveau/nouveau_display.c | 6 +- drivers/gpu/drm/nouveau/nouveau_gem.c | 16 +---- drivers/gpu/drm/qxl/qxl_cmd.c | 2 - drivers/gpu/drm/qxl/qxl_fence.c | 4 -- drivers/gpu/drm/qxl/qxl_object.h | 2 - drivers/gpu/drm/qxl/qxl_release.c | 2 - drivers/gpu/drm/radeon/radeon_display.c | 7 +- drivers/gpu/drm/radeon/radeon_object.c | 2 - drivers/gpu/drm/ttm/ttm_bo.c | 81 ++++++----------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 5 -- drivers/gpu/drm/ttm/ttm_bo_vm.c | 3 - drivers/gpu/drm/ttm/ttm_execbuf_util.c | 2 - drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c | 4 -- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 16 ++--- include/drm/ttm/ttm_bo_api.h | 5 +- include/drm/ttm/ttm_bo_driver.h | 3 - 17 files changed, 41 insertions(+), 146 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index ed966f51e29b..8d8e5f6340d0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -1212,9 +1212,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr, } /* Fallback to software copy. */ - spin_lock(&bo->bdev->fence_lock); ret = ttm_bo_wait(bo, true, intr, no_wait_gpu); - spin_unlock(&bo->bdev->fence_lock); if (ret == 0) ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem); @@ -1457,28 +1455,21 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm) ttm_pool_unpopulate(ttm); } -void -nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence) -{ - struct nouveau_fence *new_fence = nouveau_fence_ref(fence); - struct nouveau_fence *old_fence = NULL; - - lockdep_assert_held(&nvbo->bo.resv->lock.base); - - spin_lock(&nvbo->bo.bdev->fence_lock); - old_fence = nvbo->bo.sync_obj; - nvbo->bo.sync_obj = new_fence; - spin_unlock(&nvbo->bo.bdev->fence_lock); - - nouveau_fence_unref(&old_fence); -} - static void nouveau_bo_fence_unref(void **sync_obj) { nouveau_fence_unref((struct nouveau_fence **)sync_obj); } +void +nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence) +{ + lockdep_assert_held(&nvbo->bo.resv->lock.base); + + nouveau_bo_fence_unref(&nvbo->bo.sync_obj); + nvbo->bo.sync_obj = nouveau_fence_ref(fence); +} + static void * nouveau_bo_fence_ref(void *sync_obj) { diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 54b1f3d8fc7f..e6867b9ebb46 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -722,11 +722,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, goto fail_unpin; /* synchronise rendering channel with the kernel's channel */ - spin_lock(&new_bo->bo.bdev->fence_lock); - fence = nouveau_fence_ref(new_bo->bo.sync_obj); - spin_unlock(&new_bo->bo.bdev->fence_lock); - ret = nouveau_fence_sync(fence, chan); - nouveau_fence_unref(&fence); + ret = nouveau_fence_sync(new_bo->bo.sync_obj, chan); if (ret) { ttm_bo_unreserve(&new_bo->bo); goto fail_unpin; diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 0054315eb879..1650c0bdb0fc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -103,9 +103,7 @@ nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma) list_del(&vma->head); if (mapped) { - spin_lock(&nvbo->bo.bdev->fence_lock); fence = nouveau_fence_ref(nvbo->bo.sync_obj); - spin_unlock(&nvbo->bo.bdev->fence_lock); } if (fence) { @@ -430,17 +428,11 @@ retry: static int validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo) { - struct nouveau_fence *fence = NULL; + struct nouveau_fence *fence = nvbo->bo.sync_obj; int ret = 0; - spin_lock(&nvbo->bo.bdev->fence_lock); - fence = nouveau_fence_ref(nvbo->bo.sync_obj); - spin_unlock(&nvbo->bo.bdev->fence_lock); - - if (fence) { + if (fence) ret = nouveau_fence_sync(fence, chan); - nouveau_fence_unref(&fence); - } return ret; } @@ -659,9 +651,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli, data |= r->vor; } - spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, false, false, false); - spin_unlock(&nvbo->bo.bdev->fence_lock); if (ret) { NV_PRINTK(error, cli, "reloc wait_idle failed: %d\n", ret); break; @@ -894,11 +884,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, ret = ttm_bo_reserve(&nvbo->bo, true, false, false, NULL); if (!ret) { - spin_lock(&nvbo->bo.bdev->fence_lock); ret = ttm_bo_wait(&nvbo->bo, true, true, true); if (!no_wait && ret) fence = nouveau_fence_ref(nvbo->bo.sync_obj); - spin_unlock(&nvbo->bo.bdev->fence_lock); ttm_bo_unreserve(&nvbo->bo); } diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index eb89653a7a17..45fad7b45486 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -628,9 +628,7 @@ static int qxl_reap_surf(struct qxl_device *qdev, struct qxl_bo *surf, bool stal if (stall) mutex_unlock(&qdev->surf_evict_mutex); - spin_lock(&surf->tbo.bdev->fence_lock); ret = ttm_bo_wait(&surf->tbo, true, true, !stall); - spin_unlock(&surf->tbo.bdev->fence_lock); if (stall) mutex_lock(&qdev->surf_evict_mutex); diff --git a/drivers/gpu/drm/qxl/qxl_fence.c b/drivers/gpu/drm/qxl/qxl_fence.c index ae59e91cfb9a..c7248418117d 100644 --- a/drivers/gpu/drm/qxl/qxl_fence.c +++ b/drivers/gpu/drm/qxl/qxl_fence.c @@ -60,9 +60,6 @@ int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id) { void *ret; int retval = 0; - struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence); - - spin_lock(&bo->tbo.bdev->fence_lock); ret = radix_tree_delete(&qfence->tree, rel_id); if (ret == qfence) @@ -71,7 +68,6 @@ int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id) DRM_DEBUG("didn't find fence in radix tree for %d\n", rel_id); retval = -ENOENT; } - spin_unlock(&bo->tbo.bdev->fence_lock); return retval; } diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h index 83a423293afd..1edaf5768086 100644 --- a/drivers/gpu/drm/qxl/qxl_object.h +++ b/drivers/gpu/drm/qxl/qxl_object.h @@ -76,12 +76,10 @@ static inline int qxl_bo_wait(struct qxl_bo *bo, u32 *mem_type, } return r; } - spin_lock(&bo->tbo.bdev->fence_lock); if (mem_type) *mem_type = bo->tbo.mem.mem_type; if (bo->tbo.sync_obj) r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.bdev->fence_lock); ttm_bo_unreserve(&bo->tbo); return r; } diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 14e776f1d14e..2e5e38fee9b2 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -337,7 +337,6 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) glob = bo->glob; spin_lock(&glob->lru_lock); - spin_lock(&bdev->fence_lock); list_for_each_entry(entry, &release->bos, head) { bo = entry->bo; @@ -352,7 +351,6 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) __ttm_bo_unreserve(bo); entry->reserved = false; } - spin_unlock(&bdev->fence_lock); spin_unlock(&glob->lru_lock); ww_acquire_fini(&release->ticket); } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index bd0d687379ee..7d0a7abdab2a 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -476,11 +476,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, obj = new_radeon_fb->obj; new_rbo = gem_to_radeon_bo(obj); - spin_lock(&new_rbo->tbo.bdev->fence_lock); - if (new_rbo->tbo.sync_obj) - work->fence = radeon_fence_ref(new_rbo->tbo.sync_obj); - spin_unlock(&new_rbo->tbo.bdev->fence_lock); - /* pin the new buffer */ DRM_DEBUG_DRIVER("flip-ioctl() cur_rbo = %p, new_rbo = %p\n", work->old_rbo, new_rbo); @@ -499,6 +494,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc, DRM_ERROR("failed to pin new rbo buffer before flip\n"); goto cleanup; } + work->fence = radeon_fence_ref(new_rbo->tbo.sync_obj); radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL); radeon_bo_unreserve(new_rbo); @@ -582,7 +578,6 @@ cleanup: drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base); radeon_fence_unref(&work->fence); kfree(work); - return r; } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index c97a42432e2b..cbac963571c0 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -779,12 +779,10 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL); if (unlikely(r != 0)) return r; - spin_lock(&bo->tbo.bdev->fence_lock); if (mem_type) *mem_type = bo->tbo.mem.mem_type; if (bo->tbo.sync_obj) r = ttm_bo_wait(&bo->tbo, true, true, no_wait); - spin_unlock(&bo->tbo.bdev->fence_lock); ttm_bo_unreserve(&bo->tbo); return r; } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 4f1bc948bda0..195386f16ca4 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -415,24 +415,20 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) spin_lock(&glob->lru_lock); ret = __ttm_bo_reserve(bo, false, true, false, NULL); - spin_lock(&bdev->fence_lock); - (void) ttm_bo_wait(bo, false, false, true); - if (!ret && !bo->sync_obj) { - spin_unlock(&bdev->fence_lock); - put_count = ttm_bo_del_from_lru(bo); - - spin_unlock(&glob->lru_lock); - ttm_bo_cleanup_memtype_use(bo); - - ttm_bo_list_ref_sub(bo, put_count, true); - - return; - } - if (bo->sync_obj) - sync_obj = driver->sync_obj_ref(bo->sync_obj); - spin_unlock(&bdev->fence_lock); - if (!ret) { + (void) ttm_bo_wait(bo, false, false, true); + + if (!bo->sync_obj) { + put_count = ttm_bo_del_from_lru(bo); + + spin_unlock(&glob->lru_lock); + ttm_bo_cleanup_memtype_use(bo); + + ttm_bo_list_ref_sub(bo, put_count, true); + + return; + } + sync_obj = driver->sync_obj_ref(bo->sync_obj); /* * Make NO_EVICT bos immediately available to @@ -481,7 +477,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, int put_count; int ret; - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, true); if (ret && !no_wait_gpu) { @@ -493,7 +488,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, * no new sync objects can be attached. */ sync_obj = driver->sync_obj_ref(bo->sync_obj); - spin_unlock(&bdev->fence_lock); __ttm_bo_unreserve(bo); spin_unlock(&glob->lru_lock); @@ -523,11 +517,9 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo, * remove sync_obj with ttm_bo_wait, the wait should be * finished, and no new wait object should have been added. */ - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, true); WARN_ON(ret); } - spin_unlock(&bdev->fence_lock); if (ret || unlikely(list_empty(&bo->ddestroy))) { __ttm_bo_unreserve(bo); @@ -665,9 +657,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible, struct ttm_placement placement; int ret = 0; - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bdev->fence_lock); if (unlikely(ret != 0)) { if (ret != -ERESTARTSYS) { @@ -958,7 +948,6 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, { int ret = 0; struct ttm_mem_reg mem; - struct ttm_bo_device *bdev = bo->bdev; lockdep_assert_held(&bo->resv->lock.base); @@ -967,9 +956,7 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, * Have the driver move function wait for idle when necessary, * instead of doing it here. */ - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); - spin_unlock(&bdev->fence_lock); if (ret) return ret; mem.num_pages = bo->num_pages; @@ -1459,7 +1446,6 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, bdev->glob = glob; bdev->need_dma32 = need_dma32; bdev->val_seq = 0; - spin_lock_init(&bdev->fence_lock); mutex_lock(&glob->device_list_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&glob->device_list_mutex); @@ -1517,7 +1503,6 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, bool lazy, bool interruptible, bool no_wait) { struct ttm_bo_driver *driver = bo->bdev->driver; - struct ttm_bo_device *bdev = bo->bdev; void *sync_obj; int ret = 0; @@ -1526,53 +1511,33 @@ int ttm_bo_wait(struct ttm_buffer_object *bo, if (likely(bo->sync_obj == NULL)) return 0; - while (bo->sync_obj) { - + if (bo->sync_obj) { if (driver->sync_obj_signaled(bo->sync_obj)) { - void *tmp_obj = bo->sync_obj; - bo->sync_obj = NULL; + driver->sync_obj_unref(&bo->sync_obj); clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bdev->fence_lock); - driver->sync_obj_unref(&tmp_obj); - spin_lock(&bdev->fence_lock); - continue; + return 0; } if (no_wait) return -EBUSY; sync_obj = driver->sync_obj_ref(bo->sync_obj); - spin_unlock(&bdev->fence_lock); ret = driver->sync_obj_wait(sync_obj, lazy, interruptible); - if (unlikely(ret != 0)) { - driver->sync_obj_unref(&sync_obj); - spin_lock(&bdev->fence_lock); - return ret; - } - spin_lock(&bdev->fence_lock); - if (likely(bo->sync_obj == sync_obj)) { - void *tmp_obj = bo->sync_obj; - bo->sync_obj = NULL; + + if (likely(ret == 0)) { clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bdev->fence_lock); - driver->sync_obj_unref(&sync_obj); - driver->sync_obj_unref(&tmp_obj); - spin_lock(&bdev->fence_lock); - } else { - spin_unlock(&bdev->fence_lock); - driver->sync_obj_unref(&sync_obj); - spin_lock(&bdev->fence_lock); + driver->sync_obj_unref(&bo->sync_obj); } + driver->sync_obj_unref(&sync_obj); } - return 0; + return ret; } EXPORT_SYMBOL(ttm_bo_wait); int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) { - struct ttm_bo_device *bdev = bo->bdev; int ret = 0; /* @@ -1582,9 +1547,7 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait) ret = ttm_bo_reserve(bo, true, no_wait, false, NULL); if (unlikely(ret != 0)) return ret; - spin_lock(&bdev->fence_lock); ret = ttm_bo_wait(bo, false, true, no_wait); - spin_unlock(&bdev->fence_lock); if (likely(ret == 0)) atomic_inc(&bo->cpu_writers); ttm_bo_unreserve(bo); @@ -1641,9 +1604,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink) * Wait for GPU, then move to system cached. */ - spin_lock(&bo->bdev->fence_lock); ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bo->bdev->fence_lock); if (unlikely(ret != 0)) goto out; diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 30e5d90cb7bc..495aebf0f9c3 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -466,12 +466,10 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, drm_vma_node_reset(&fbo->vma_node); atomic_set(&fbo->cpu_writers, 0); - spin_lock(&bdev->fence_lock); if (bo->sync_obj) fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj); else fbo->sync_obj = NULL; - spin_unlock(&bdev->fence_lock); kref_init(&fbo->list_kref); kref_init(&fbo->kref); fbo->destroy = &ttm_transfered_destroy; @@ -657,7 +655,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct ttm_buffer_object *ghost_obj; void *tmp_obj = NULL; - spin_lock(&bdev->fence_lock); if (bo->sync_obj) { tmp_obj = bo->sync_obj; bo->sync_obj = NULL; @@ -665,7 +662,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, bo->sync_obj = driver->sync_obj_ref(sync_obj); if (evict) { ret = ttm_bo_wait(bo, false, false, false); - spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); if (ret) @@ -688,7 +684,6 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, */ set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); - spin_unlock(&bdev->fence_lock); if (tmp_obj) driver->sync_obj_unref(&tmp_obj); diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 0ce48e5a9cb4..d05437f219e9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -45,10 +45,8 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_area_struct *vma, struct vm_fault *vmf) { - struct ttm_bo_device *bdev = bo->bdev; int ret = 0; - spin_lock(&bdev->fence_lock); if (likely(!test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags))) goto out_unlock; @@ -82,7 +80,6 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, VM_FAULT_NOPAGE; out_unlock: - spin_unlock(&bdev->fence_lock); return ret; } diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index e8dac8758528..0fbbbbd67afc 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -217,7 +217,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, glob = bo->glob; spin_lock(&glob->lru_lock); - spin_lock(&bdev->fence_lock); list_for_each_entry(entry, list, head) { bo = entry->bo; @@ -227,7 +226,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, __ttm_bo_unreserve(bo); entry->reserved = false; } - spin_unlock(&bdev->fence_lock); spin_unlock(&glob->lru_lock); if (ticket) ww_acquire_fini(ticket); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c index 37c093c0c7b8..c133b3d10de8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c @@ -863,11 +863,7 @@ static void vmw_move_notify(struct ttm_buffer_object *bo, */ static void vmw_swap_notify(struct ttm_buffer_object *bo) { - struct ttm_bo_device *bdev = bo->bdev; - - spin_lock(&bdev->fence_lock); ttm_bo_wait(bo, false, false, false); - spin_unlock(&bdev->fence_lock); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index a432c0db257c..1ee86bf82750 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -567,12 +567,13 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo, int ret; if (flags & drm_vmw_synccpu_allow_cs) { - struct ttm_bo_device *bdev = bo->bdev; + bool nonblock = !!(flags & drm_vmw_synccpu_dontblock); - spin_lock(&bdev->fence_lock); - ret = ttm_bo_wait(bo, false, true, - !!(flags & drm_vmw_synccpu_dontblock)); - spin_unlock(&bdev->fence_lock); + ret = ttm_bo_reserve(bo, true, nonblock, false, NULL); + if (!ret) { + ret = ttm_bo_wait(bo, false, true, nonblock); + ttm_bo_unreserve(bo); + } return ret; } @@ -1429,12 +1430,10 @@ void vmw_fence_single_bo(struct ttm_buffer_object *bo, else driver->sync_obj_ref(fence); - spin_lock(&bdev->fence_lock); old_fence_obj = bo->sync_obj; bo->sync_obj = fence; - spin_unlock(&bdev->fence_lock); if (old_fence_obj) vmw_fence_obj_unreference(&old_fence_obj); @@ -1475,7 +1474,6 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo, if (mem->mem_type != VMW_PL_MOB) { struct vmw_resource *res, *n; - struct ttm_bo_device *bdev = bo->bdev; struct ttm_validate_buffer val_buf; val_buf.bo = bo; @@ -1491,9 +1489,7 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo, list_del_init(&res->mob_head); } - spin_lock(&bdev->fence_lock); (void) ttm_bo_wait(bo, false, false, false); - spin_unlock(&bdev->fence_lock); } } diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index e3d39c80a091..5805f4a49478 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -237,10 +237,7 @@ struct ttm_buffer_object { struct list_head io_reserve_lru; /** - * Members protected by struct buffer_object_device::fence_lock - * In addition, setting sync_obj to anything else - * than NULL requires bo::reserved to be held. This allows for - * checking NULL while reserved but not holding the mentioned lock. + * Members protected by a bo reservation. */ void *sync_obj; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 5c8bb5699a6f..e1ee141e26cc 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -521,8 +521,6 @@ struct ttm_bo_global { * * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. - * @fence_lock: Protects the synchronizing members on *all* bos belonging - * to this device. * @vma_manager: Address space manager * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. @@ -542,7 +540,6 @@ struct ttm_bo_device { struct ttm_bo_global *glob; struct ttm_bo_driver *driver; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; - spinlock_t fence_lock; /* * Protected by internal locks. From 58b4d720c1620bbf09e42b4f218dcb2d0d8cdf3e Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 9 Jan 2014 11:03:08 +0100 Subject: [PATCH 208/640] drm/ttm: add interruptible parameter to ttm_eu_reserve_buffers It seems some drivers really want this as a parameter, like vmwgfx. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/qxl/qxl_release.c | 2 +- drivers/gpu/drm/radeon/radeon_object.c | 2 +- drivers/gpu/drm/radeon/radeon_vm.c | 2 +- drivers/gpu/drm/ttm/ttm_execbuf_util.c | 22 +++++++++++++--------- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 7 ++----- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 2 +- include/drm/ttm/ttm_execbuf_util.h | 9 +++++---- 7 files changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 2e5e38fee9b2..656f9d3a946d 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -159,7 +159,7 @@ int qxl_release_reserve_list(struct qxl_release *release, bool no_intr) if (list_is_singular(&release->bos)) return 0; - ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos); + ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos, !no_intr); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index cbac963571c0..378fe9ea4d44 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -482,7 +482,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev, u64 bytes_moved = 0, initial_bytes_moved; u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); - r = ttm_eu_reserve_buffers(ticket, head); + r = ttm_eu_reserve_buffers(ticket, head, true); if (unlikely(r != 0)) { return r; } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 4751c6728fe9..3d9a6a036f8a 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -399,7 +399,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev, INIT_LIST_HEAD(&head); list_add(&tv.head, &head); - r = ttm_eu_reserve_buffers(&ticket, &head); + r = ttm_eu_reserve_buffers(&ticket, &head, true); if (r) return r; diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 0fbbbbd67afc..87d7deefc806 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -112,7 +112,7 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation); */ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, - struct list_head *list) + struct list_head *list, bool intr) { struct ttm_bo_global *glob; struct ttm_validate_buffer *entry; @@ -140,7 +140,7 @@ retry: if (entry->reserved) continue; - ret = __ttm_bo_reserve(bo, true, (ticket == NULL), true, + ret = __ttm_bo_reserve(bo, intr, (ticket == NULL), true, ticket); if (ret == -EDEADLK) { @@ -153,13 +153,17 @@ retry: ttm_eu_backoff_reservation_locked(list); spin_unlock(&glob->lru_lock); ttm_eu_list_ref_sub(list); - ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, - ticket); - if (unlikely(ret != 0)) { - if (ret == -EINTR) - ret = -ERESTARTSYS; - goto err_fini; - } + + if (intr) { + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + ticket); + if (unlikely(ret != 0)) { + if (ret == -EINTR) + ret = -ERESTARTSYS; + goto err_fini; + } + } else + ww_mutex_lock_slow(&bo->resv->lock, ticket); entry->reserved = true; if (unlikely(atomic_read(&bo->cpu_writers) > 0)) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 7bfdaa163a33..24f067bf438d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2496,7 +2496,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, if (unlikely(ret != 0)) goto out_err_nores; - ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes); + ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes, true); if (unlikely(ret != 0)) goto out_err; @@ -2684,10 +2684,7 @@ void __vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv, query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo); list_add_tail(&query_val.head, &validate_list); - do { - ret = ttm_eu_reserve_buffers(&ticket, &validate_list); - } while (ret == -ERESTARTSYS); - + ret = ttm_eu_reserve_buffers(&ticket, &validate_list, false); if (unlikely(ret != 0)) { vmw_execbuf_unpin_panic(dev_priv); goto out_no_reserve; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 1ee86bf82750..23169362bca8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1216,7 +1216,7 @@ vmw_resource_check_buffer(struct vmw_resource *res, INIT_LIST_HEAD(&val_list); val_buf->bo = ttm_bo_reference(&res->backup->base); list_add_tail(&val_buf->head, &val_list); - ret = ttm_eu_reserve_buffers(NULL, &val_list); + ret = ttm_eu_reserve_buffers(NULL, &val_list, interruptible); if (unlikely(ret != 0)) goto out_no_reserve; diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index 16db7d01a336..fd95fd569ca3 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -73,6 +73,7 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, * @ticket: [out] ww_acquire_ctx filled in by call, or NULL if only * non-blocking reserves should be tried. * @list: thread private list of ttm_validate_buffer structs. + * @intr: should the wait be interruptible * * Tries to reserve bos pointed to by the list entries for validation. * If the function returns 0, all buffers are marked as "unfenced", @@ -84,9 +85,9 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, * CPU write reservations to be cleared, and for other threads to * unreserve their buffers. * - * This function may return -ERESTART or -EAGAIN if the calling process - * receives a signal while waiting. In that case, no buffers on the list - * will be reserved upon return. + * If intr is set to true, this function may return -ERESTARTSYS if the + * calling process receives a signal while waiting. In that case, no + * buffers on the list will be reserved upon return. * * Buffers reserved by this function should be unreserved by * a call to either ttm_eu_backoff_reservation() or @@ -95,7 +96,7 @@ extern void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, */ extern int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, - struct list_head *list); + struct list_head *list, bool intr); /** * function ttm_eu_fence_buffer_objects. From 1f0dc9a59afeccb96a35ebec36661266260f5eee Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 9 Jan 2014 11:03:08 +0100 Subject: [PATCH 209/640] drm/ttm: kill off some members to ttm_validate_buffer This reorders the list to keep track of what buffers are reserved, so previous members are always unreserved. This gets rid of some bookkeeping that's no longer needed, while simplifying the code some. Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/qxl/qxl_release.c | 1 - drivers/gpu/drm/ttm/ttm_execbuf_util.c | 142 +++++++++--------------- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 1 - include/drm/ttm/ttm_execbuf_util.h | 3 - 4 files changed, 50 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 656f9d3a946d..4045ba873ab8 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -349,7 +349,6 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) ttm_bo_add_to_lru(bo); __ttm_bo_unreserve(bo); - entry->reserved = false; } spin_unlock(&glob->lru_lock); ww_acquire_fini(&release->ticket); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c index 87d7deefc806..108730e9147b 100644 --- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -32,20 +32,12 @@ #include #include -static void ttm_eu_backoff_reservation_locked(struct list_head *list) +static void ttm_eu_backoff_reservation_reverse(struct list_head *list, + struct ttm_validate_buffer *entry) { - struct ttm_validate_buffer *entry; - - list_for_each_entry(entry, list, head) { + list_for_each_entry_continue_reverse(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - if (!entry->reserved) - continue; - entry->reserved = false; - if (entry->removed) { - ttm_bo_add_to_lru(bo); - entry->removed = false; - } __ttm_bo_unreserve(bo); } } @@ -56,27 +48,9 @@ static void ttm_eu_del_from_lru_locked(struct list_head *list) list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - if (!entry->reserved) - continue; + unsigned put_count = ttm_bo_del_from_lru(bo); - if (!entry->removed) { - entry->put_count = ttm_bo_del_from_lru(bo); - entry->removed = true; - } - } -} - -static void ttm_eu_list_ref_sub(struct list_head *list) -{ - struct ttm_validate_buffer *entry; - - list_for_each_entry(entry, list, head) { - struct ttm_buffer_object *bo = entry->bo; - - if (entry->put_count) { - ttm_bo_list_ref_sub(bo, entry->put_count, true); - entry->put_count = 0; - } + ttm_bo_list_ref_sub(bo, put_count, true); } } @@ -91,11 +65,18 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, entry = list_first_entry(list, struct ttm_validate_buffer, head); glob = entry->bo->glob; + spin_lock(&glob->lru_lock); - ttm_eu_backoff_reservation_locked(list); + list_for_each_entry(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + + ttm_bo_add_to_lru(bo); + __ttm_bo_unreserve(bo); + } + spin_unlock(&glob->lru_lock); + if (ticket) ww_acquire_fini(ticket); - spin_unlock(&glob->lru_lock); } EXPORT_SYMBOL(ttm_eu_backoff_reservation); @@ -121,64 +102,55 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, if (list_empty(list)) return 0; - list_for_each_entry(entry, list, head) { - entry->reserved = false; - entry->put_count = 0; - entry->removed = false; - } - entry = list_first_entry(list, struct ttm_validate_buffer, head); glob = entry->bo->glob; if (ticket) ww_acquire_init(ticket, &reservation_ww_class); -retry: + list_for_each_entry(entry, list, head) { struct ttm_buffer_object *bo = entry->bo; - /* already slowpath reserved? */ - if (entry->reserved) - continue; - ret = __ttm_bo_reserve(bo, intr, (ticket == NULL), true, ticket); + if (!ret && unlikely(atomic_read(&bo->cpu_writers) > 0)) { + __ttm_bo_unreserve(bo); - if (ret == -EDEADLK) { - /* uh oh, we lost out, drop every reservation and try - * to only reserve this buffer, then start over if - * this succeeds. - */ - BUG_ON(ticket == NULL); - spin_lock(&glob->lru_lock); - ttm_eu_backoff_reservation_locked(list); - spin_unlock(&glob->lru_lock); - ttm_eu_list_ref_sub(list); - - if (intr) { - ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, - ticket); - if (unlikely(ret != 0)) { - if (ret == -EINTR) - ret = -ERESTARTSYS; - goto err_fini; - } - } else - ww_mutex_lock_slow(&bo->resv->lock, ticket); - - entry->reserved = true; - if (unlikely(atomic_read(&bo->cpu_writers) > 0)) { - ret = -EBUSY; - goto err; - } - goto retry; - } else if (ret) - goto err; - - entry->reserved = true; - if (unlikely(atomic_read(&bo->cpu_writers) > 0)) { ret = -EBUSY; - goto err; } + + if (!ret) + continue; + + /* uh oh, we lost out, drop every reservation and try + * to only reserve this buffer, then start over if + * this succeeds. + */ + ttm_eu_backoff_reservation_reverse(list, entry); + + if (ret == -EDEADLK && intr) { + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + ticket); + } else if (ret == -EDEADLK) { + ww_mutex_lock_slow(&bo->resv->lock, ticket); + ret = 0; + } + + if (unlikely(ret != 0)) { + if (ret == -EINTR) + ret = -ERESTARTSYS; + if (ticket) { + ww_acquire_done(ticket); + ww_acquire_fini(ticket); + } + return ret; + } + + /* move this item to the front of the list, + * forces correct iteration of the loop without keeping track + */ + list_del(&entry->head); + list_add(&entry->head, list); } if (ticket) @@ -186,20 +158,7 @@ retry: spin_lock(&glob->lru_lock); ttm_eu_del_from_lru_locked(list); spin_unlock(&glob->lru_lock); - ttm_eu_list_ref_sub(list); return 0; - -err: - spin_lock(&glob->lru_lock); - ttm_eu_backoff_reservation_locked(list); - spin_unlock(&glob->lru_lock); - ttm_eu_list_ref_sub(list); -err_fini: - if (ticket) { - ww_acquire_done(ticket); - ww_acquire_fini(ticket); - } - return ret; } EXPORT_SYMBOL(ttm_eu_reserve_buffers); @@ -228,7 +187,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, bo->sync_obj = driver->sync_obj_ref(sync_obj); ttm_bo_add_to_lru(bo); __ttm_bo_unreserve(bo); - entry->reserved = false; } spin_unlock(&glob->lru_lock); if (ticket) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 24f067bf438d..b19b2b980cb4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -346,7 +346,6 @@ static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context, ++sw_context->cur_val_buf; val_buf = &vval_buf->base; val_buf->bo = ttm_bo_reference(bo); - val_buf->reserved = false; list_add_tail(&val_buf->head, &sw_context->validate_nodes); vval_buf->validate_as_mob = validate_as_mob; } diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h index fd95fd569ca3..8490cb8ee0d8 100644 --- a/include/drm/ttm/ttm_execbuf_util.h +++ b/include/drm/ttm/ttm_execbuf_util.h @@ -48,9 +48,6 @@ struct ttm_validate_buffer { struct list_head head; struct ttm_buffer_object *bo; - bool reserved; - bool removed; - int put_count; void *old_sync_obj; }; From 954605ca3f897ad617123279eb3404a404cce5ab Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 9 Jan 2014 11:03:12 +0100 Subject: [PATCH 210/640] drm/radeon: use common fence implementation for fences, v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes since v1: - Kill the sw interrupt dance, add and use radeon_irq_kms_sw_irq_get_delayed instead. - Change custom wait function, lockdep complained about it. Holding exclusive_lock in the wait function might cause deadlocks. Instead do all the processing in .enable_signaling, and wait on the global fence_queue to pick up gpu resets. - Process all fences in radeon_gpu_reset after reset to close a race with the trylock in enable_signaling. Changes since v2: - Small changes to work with the rewritten lockup recovery patches. Changes since v3: - Call radeon_fence_schedule_check when exclusive_lock cannot be acquired to always cause a wake up. - Reset irqs from hangup check. - Drop reading seqno in the callback, use cached value. - Fix indentation in radeon_fence_default_wait - Add a radeon_test_signaled function, drop a few test_bit calls. - Make to_radeon_fence global. Signed-off-by: Maarten Lankhorst Reviewed-by: Christian König --- drivers/gpu/drm/radeon/radeon.h | 23 ++- drivers/gpu/drm/radeon/radeon_device.c | 1 + drivers/gpu/drm/radeon/radeon_fence.c | 225 +++++++++++++++++++++--- drivers/gpu/drm/radeon/radeon_irq_kms.c | 15 ++ 4 files changed, 235 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 83a24614138a..d80dc547a105 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -354,17 +355,19 @@ struct radeon_fence_driver { /* sync_seq is protected by ring emission lock */ uint64_t sync_seq[RADEON_NUM_RINGS]; atomic64_t last_seq; - bool initialized; + bool initialized, delayed_irq; struct delayed_work lockup_work; }; struct radeon_fence { + struct fence base; + struct radeon_device *rdev; - struct kref kref; - /* protected by radeon_fence.lock */ uint64_t seq; /* RB, DMA, etc. */ unsigned ring; + + wait_queue_t fence_wake; }; int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); @@ -782,6 +785,7 @@ struct radeon_irq { int radeon_irq_kms_init(struct radeon_device *rdev); void radeon_irq_kms_fini(struct radeon_device *rdev); void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); +bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); @@ -2308,6 +2312,7 @@ struct radeon_device { struct radeon_mman mman; struct radeon_fence_driver fence_drv[RADEON_NUM_RINGS]; wait_queue_head_t fence_queue; + unsigned fence_context; struct mutex ring_lock; struct radeon_ring ring[RADEON_NUM_RINGS]; bool ib_pool_ready; @@ -2441,7 +2446,17 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v); /* * Cast helper */ -#define to_radeon_fence(p) ((struct radeon_fence *)(p)) +extern const struct fence_ops radeon_fence_ops; + +static inline struct radeon_fence *to_radeon_fence(struct fence *f) +{ + struct radeon_fence *__f = container_of(f, struct radeon_fence, base); + + if (__f->base.ops == &radeon_fence_ops) + return __f; + + return NULL; +} /* * Registers read & write functions. diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index d30f1cc1aa12..e84a76e6656a 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1253,6 +1253,7 @@ int radeon_device_init(struct radeon_device *rdev, for (i = 0; i < RADEON_NUM_RINGS; i++) { rdev->ring[i].idx = i; } + rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS); DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n", radeon_family_name[rdev->family], pdev->vendor, pdev->device, diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index ecdba3afa2c3..af9f2d6bd7d0 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -130,21 +130,59 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring) { + u64 seq = ++rdev->fence_drv[ring].sync_seq[ring]; + /* we are protected by the ring emission mutex */ *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); if ((*fence) == NULL) { return -ENOMEM; } - kref_init(&((*fence)->kref)); (*fence)->rdev = rdev; - (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; + (*fence)->seq = seq; (*fence)->ring = ring; + fence_init(&(*fence)->base, &radeon_fence_ops, + &rdev->fence_queue.lock, rdev->fence_context + ring, seq); radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); radeon_fence_schedule_check(rdev, ring); return 0; } +/** + * radeon_fence_check_signaled - callback from fence_queue + * + * this function is called with fence_queue lock held, which is also used + * for the fence locking itself, so unlocked variants are used for + * fence_signal, and remove_wait_queue. + */ +static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key) +{ + struct radeon_fence *fence; + u64 seq; + + fence = container_of(wait, struct radeon_fence, fence_wake); + + /* + * We cannot use radeon_fence_process here because we're already + * in the waitqueue, in a call from wake_up_all. + */ + seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq); + if (seq >= fence->seq) { + int ret = fence_signal_locked(&fence->base); + + if (!ret) + FENCE_TRACE(&fence->base, "signaled from irq context\n"); + else + FENCE_TRACE(&fence->base, "was already signaled\n"); + + radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring); + __remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake); + fence_put(&fence->base); + } else + FENCE_TRACE(&fence->base, "pending\n"); + return 0; +} + /** * radeon_fence_activity - check for fence activity * @@ -242,6 +280,15 @@ static void radeon_fence_check_lockup(struct work_struct *work) return; } + if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) { + unsigned long irqflags; + + fence_drv->delayed_irq = false; + spin_lock_irqsave(&rdev->irq.lock, irqflags); + radeon_irq_set(rdev); + spin_unlock_irqrestore(&rdev->irq.lock, irqflags); + } + if (radeon_fence_activity(rdev, ring)) wake_up_all(&rdev->fence_queue); @@ -275,21 +322,6 @@ void radeon_fence_process(struct radeon_device *rdev, int ring) wake_up_all(&rdev->fence_queue); } -/** - * radeon_fence_destroy - destroy a fence - * - * @kref: fence kref - * - * Frees the fence object (all asics). - */ -static void radeon_fence_destroy(struct kref *kref) -{ - struct radeon_fence *fence; - - fence = container_of(kref, struct radeon_fence, kref); - kfree(fence); -} - /** * radeon_fence_seq_signaled - check if a fence sequence number has signaled * @@ -318,6 +350,75 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev, return false; } +static bool radeon_fence_is_signaled(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + struct radeon_device *rdev = fence->rdev; + unsigned ring = fence->ring; + u64 seq = fence->seq; + + if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { + return true; + } + + if (down_read_trylock(&rdev->exclusive_lock)) { + radeon_fence_process(rdev, ring); + up_read(&rdev->exclusive_lock); + + if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) { + return true; + } + } + return false; +} + +/** + * radeon_fence_enable_signaling - enable signalling on fence + * @fence: fence + * + * This function is called with fence_queue lock held, and adds a callback + * to fence_queue that checks if this fence is signaled, and if so it + * signals the fence and removes itself. + */ +static bool radeon_fence_enable_signaling(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + struct radeon_device *rdev = fence->rdev; + + if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) + return false; + + if (down_read_trylock(&rdev->exclusive_lock)) { + radeon_irq_kms_sw_irq_get(rdev, fence->ring); + + if (radeon_fence_activity(rdev, fence->ring)) + wake_up_all_locked(&rdev->fence_queue); + + /* did fence get signaled after we enabled the sw irq? */ + if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) { + radeon_irq_kms_sw_irq_put(rdev, fence->ring); + up_read(&rdev->exclusive_lock); + return false; + } + + up_read(&rdev->exclusive_lock); + } else { + /* we're probably in a lockup, lets not fiddle too much */ + if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring)) + rdev->fence_drv[fence->ring].delayed_irq = true; + radeon_fence_schedule_check(rdev, fence->ring); + } + + fence->fence_wake.flags = 0; + fence->fence_wake.private = NULL; + fence->fence_wake.func = radeon_fence_check_signaled; + __add_wait_queue(&rdev->fence_queue, &fence->fence_wake); + fence_get(f); + + FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring); + return true; +} + /** * radeon_fence_signaled - check if a fence has signaled * @@ -330,8 +431,15 @@ bool radeon_fence_signaled(struct radeon_fence *fence) { if (!fence) return true; - if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) + + if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) { + int ret; + + ret = fence_signal(&fence->base); + if (!ret) + FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n"); return true; + } return false; } @@ -433,17 +541,15 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) uint64_t seq[RADEON_NUM_RINGS] = {}; long r; - if (fence == NULL) { - WARN(1, "Querying an invalid fence : %p !\n", fence); - return -EINVAL; - } - seq[fence->ring] = fence->seq; r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); if (r < 0) { return r; } + r = fence_signal(&fence->base); + if (!r) + FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); return 0; } @@ -557,7 +663,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring) */ struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence) { - kref_get(&fence->kref); + fence_get(&fence->base); return fence; } @@ -574,7 +680,7 @@ void radeon_fence_unref(struct radeon_fence **fence) *fence = NULL; if (tmp) { - kref_put(&tmp->kref, radeon_fence_destroy); + fence_put(&tmp->base); } } @@ -887,3 +993,72 @@ int radeon_debugfs_fence_init(struct radeon_device *rdev) return 0; #endif } + +static const char *radeon_fence_get_driver_name(struct fence *fence) +{ + return "radeon"; +} + +static const char *radeon_fence_get_timeline_name(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + switch (fence->ring) { + case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx"; + case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1"; + case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2"; + case R600_RING_TYPE_DMA_INDEX: return "radeon.dma"; + case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1"; + case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd"; + case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1"; + case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2"; + default: WARN_ON_ONCE(1); return "radeon.unk"; + } +} + +static inline bool radeon_test_signaled(struct radeon_fence *fence) +{ + return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags); +} + +static signed long radeon_fence_default_wait(struct fence *f, bool intr, + signed long t) +{ + struct radeon_fence *fence = to_radeon_fence(f); + struct radeon_device *rdev = fence->rdev; + bool signaled; + + fence_enable_sw_signaling(&fence->base); + + /* + * This function has to return -EDEADLK, but cannot hold + * exclusive_lock during the wait because some callers + * may already hold it. This means checking needs_reset without + * lock, and not fiddling with any gpu internals. + * + * The callback installed with fence_enable_sw_signaling will + * run before our wait_event_*timeout call, so we will see + * both the signaled fence and the changes to needs_reset. + */ + + if (intr) + t = wait_event_interruptible_timeout(rdev->fence_queue, + ((signaled = radeon_test_signaled(fence)) || + rdev->needs_reset), t); + else + t = wait_event_timeout(rdev->fence_queue, + ((signaled = radeon_test_signaled(fence)) || + rdev->needs_reset), t); + + if (t > 0 && !signaled) + return -EDEADLK; + return t; +} + +const struct fence_ops radeon_fence_ops = { + .get_driver_name = radeon_fence_get_driver_name, + .get_timeline_name = radeon_fence_get_timeline_name, + .enable_signaling = radeon_fence_enable_signaling, + .signaled = radeon_fence_is_signaled, + .wait = radeon_fence_default_wait, + .release = NULL, +}; diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index f0bff4be67f1..7784911d78ef 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -323,6 +323,21 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) } } +/** + * radeon_irq_kms_sw_irq_get_delayed - enable software interrupt + * + * @rdev: radeon device pointer + * @ring: ring whose interrupt you want to enable + * + * Enables the software interrupt for a specific ring (all asics). + * The software interrupt is generally used to signal a fence on + * a particular ring. + */ +bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring) +{ + return atomic_inc_return(&rdev->irq.ring_int[ring]) == 1; +} + /** * radeon_irq_kms_sw_irq_put - disable software interrupt * From b6c044a3d881c1b01fe0220ad76e548ea5d79b1c Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 3 Aug 2014 10:36:59 +0800 Subject: [PATCH 211/640] gpu: ipu-v3: Select GENERIC_IRQ_CHIP to fix build error This driver uses GENERIC_IRQ_CHIP, so it needs to select GENERIC_IRQ_CHIP to avoid build error. Fixes below build errors: ERROR: "irq_alloc_domain_generic_chips" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! ERROR: "irq_gc_mask_clr_bit" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! ERROR: "irq_gc_mask_set_bit" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! ERROR: "irq_generic_chip_ops" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! ERROR: "irq_gc_ack_set_bit" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! ERROR: "irq_get_domain_generic_chip" [drivers/gpu/ipu-v3/imx-ipu-v3.ko] undefined! make[1]: *** [__modpost] Error 1 make: *** [modules] Error 2 Signed-off-by: Axel Lin Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig index 2f228a2f2a48..01864a55264a 100644 --- a/drivers/gpu/ipu-v3/Kconfig +++ b/drivers/gpu/ipu-v3/Kconfig @@ -2,6 +2,7 @@ config IMX_IPUV3_CORE tristate "IPUv3 core support" depends on SOC_IMX5 || SOC_IMX6Q || SOC_IMX6SL || ARCH_MULTIPLATFORM depends on RESET_CONTROLLER + select GENERIC_IRQ_CHIP help Choose this if you have a i.MX5/6 system and want to use the Image Processing Unit. This option only enables IPU base support. From e4946cdcabcffd4814e153e71d28884b94c65e9e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 3 Aug 2014 10:38:18 +0800 Subject: [PATCH 212/640] gpu: ipu-v3: Return proper error on ipu_add_client_devices error path Avoid returning an uninitialized variable in the error path. Signed-off-by: Axel Lin Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 04e7b2eafbdd..e340bebd3419 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -1116,8 +1116,10 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base) id++, ®->pdata, sizeof(reg->pdata)); } - if (IS_ERR(pdev)) + if (IS_ERR(pdev)) { + ret = PTR_ERR(pdev); goto err_register; + } } return 0; From e68885e24ad1a2d7d4ad6df04cbc9b623bd1d0b9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 2 Sep 2014 00:37:13 -0300 Subject: [PATCH 213/640] gpu: ipu-v3: ipu-smfc: Do not leave DEBUG defined Let's only define DEBUG for debugging purpose and not by default to avoid printing debugging message unnecessarily. Signed-off-by: Fabio Estevam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-smfc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/ipu-v3/ipu-smfc.c b/drivers/gpu/ipu-v3/ipu-smfc.c index e4f85ad286fc..4939c5011d4d 100644 --- a/drivers/gpu/ipu-v3/ipu-smfc.c +++ b/drivers/gpu/ipu-v3/ipu-smfc.c @@ -8,7 +8,6 @@ * http://www.opensource.org/licenses/gpl-license.html * http://www.gnu.org/copyleft/gpl.html */ -#define DEBUG #include #include #include From 2ffd48f2e7ae06c3d7b2bcde9a0cb211d1a32468 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 19 Aug 2014 10:52:40 -0700 Subject: [PATCH 214/640] gpu: ipu-v3: Add Camera Sensor Interface unit Adds the Camera Sensor Interface (CSI) unit required for video capture. Signed-off-by: Steve Longerbeam Removed the unused clk_get_rate in ipu_csi_init_interface and the ipu_csi_ccir_err_detection_enable/disable functions. Checkpatch cleanup. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/Makefile | 2 +- drivers/gpu/ipu-v3/ipu-common.c | 44 +- drivers/gpu/ipu-v3/ipu-csi.c | 741 ++++++++++++++++++++++++++++++++ drivers/gpu/ipu-v3/ipu-prv.h | 6 + include/video/imx-ipu-v3.h | 32 +- 5 files changed, 810 insertions(+), 15 deletions(-) create mode 100644 drivers/gpu/ipu-v3/ipu-csi.c diff --git a/drivers/gpu/ipu-v3/Makefile b/drivers/gpu/ipu-v3/Makefile index 0b42836caae1..d22bd06caa6d 100644 --- a/drivers/gpu/ipu-v3/Makefile +++ b/drivers/gpu/ipu-v3/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_IMX_IPUV3_CORE) += imx-ipu-v3.o -imx-ipu-v3-objs := ipu-common.o ipu-cpmem.o ipu-dc.o ipu-di.o \ +imx-ipu-v3-objs := ipu-common.o ipu-cpmem.o ipu-csi.o ipu-dc.o ipu-di.o \ ipu-dp.o ipu-dmfc.o ipu-smfc.o diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index cae543115856..511c364231a2 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -217,18 +217,6 @@ int ipu_module_disable(struct ipu_soc *ipu, u32 mask) } EXPORT_SYMBOL_GPL(ipu_module_disable); -int ipu_csi_enable(struct ipu_soc *ipu, int csi) -{ - return ipu_module_enable(ipu, csi ? IPU_CONF_CSI1_EN : IPU_CONF_CSI0_EN); -} -EXPORT_SYMBOL_GPL(ipu_csi_enable); - -int ipu_csi_disable(struct ipu_soc *ipu, int csi) -{ - return ipu_module_disable(ipu, csi ? IPU_CONF_CSI1_EN : IPU_CONF_CSI0_EN); -} -EXPORT_SYMBOL_GPL(ipu_csi_disable); - int ipu_smfc_enable(struct ipu_soc *ipu) { return ipu_module_enable(ipu, IPU_CONF_SMFC_EN); @@ -439,6 +427,8 @@ struct ipu_devtype { unsigned long cpmem_ofs; unsigned long srm_ofs; unsigned long tpm_ofs; + unsigned long csi0_ofs; + unsigned long csi1_ofs; unsigned long disp0_ofs; unsigned long disp1_ofs; unsigned long dc_tmpl_ofs; @@ -452,6 +442,8 @@ static struct ipu_devtype ipu_type_imx51 = { .cpmem_ofs = 0x1f000000, .srm_ofs = 0x1f040000, .tpm_ofs = 0x1f060000, + .csi0_ofs = 0x1f030000, + .csi1_ofs = 0x1f038000, .disp0_ofs = 0x1e040000, .disp1_ofs = 0x1e048000, .dc_tmpl_ofs = 0x1f080000, @@ -465,6 +457,8 @@ static struct ipu_devtype ipu_type_imx53 = { .cpmem_ofs = 0x07000000, .srm_ofs = 0x07040000, .tpm_ofs = 0x07060000, + .csi0_ofs = 0x07030000, + .csi1_ofs = 0x07038000, .disp0_ofs = 0x06040000, .disp1_ofs = 0x06048000, .dc_tmpl_ofs = 0x07080000, @@ -478,6 +472,8 @@ static struct ipu_devtype ipu_type_imx6q = { .cpmem_ofs = 0x00300000, .srm_ofs = 0x00340000, .tpm_ofs = 0x00360000, + .csi0_ofs = 0x00230000, + .csi1_ofs = 0x00238000, .disp0_ofs = 0x00240000, .disp1_ofs = 0x00248000, .dc_tmpl_ofs = 0x00380000, @@ -508,6 +504,20 @@ static int ipu_submodules_init(struct ipu_soc *ipu, goto err_cpmem; } + ret = ipu_csi_init(ipu, dev, 0, ipu_base + devtype->csi0_ofs, + IPU_CONF_CSI0_EN, ipu_clk); + if (ret) { + unit = "csi0"; + goto err_csi_0; + } + + ret = ipu_csi_init(ipu, dev, 1, ipu_base + devtype->csi1_ofs, + IPU_CONF_CSI1_EN, ipu_clk); + if (ret) { + unit = "csi1"; + goto err_csi_1; + } + ret = ipu_di_init(ipu, dev, 0, ipu_base + devtype->disp0_ofs, IPU_CONF_DI0_EN, ipu_clk); if (ret) { @@ -562,6 +572,10 @@ err_dc: err_di_1: ipu_di_exit(ipu, 0); err_di_0: + ipu_csi_exit(ipu, 1); +err_csi_1: + ipu_csi_exit(ipu, 0); +err_csi_0: ipu_cpmem_exit(ipu); err_cpmem: dev_err(&pdev->dev, "init %s failed with %d\n", unit, ret); @@ -640,6 +654,8 @@ static void ipu_submodules_exit(struct ipu_soc *ipu) ipu_dc_exit(ipu); ipu_di_exit(ipu, 1); ipu_di_exit(ipu, 0); + ipu_csi_exit(ipu, 1); + ipu_csi_exit(ipu, 0); ipu_cpmem_exit(ipu); } @@ -859,6 +875,10 @@ static int ipu_probe(struct platform_device *pdev) ipu_base + devtype->cm_ofs + IPU_CM_IDMAC_REG_OFS); dev_dbg(&pdev->dev, "cpmem: 0x%08lx\n", ipu_base + devtype->cpmem_ofs); + dev_dbg(&pdev->dev, "csi0: 0x%08lx\n", + ipu_base + devtype->csi0_ofs); + dev_dbg(&pdev->dev, "csi1: 0x%08lx\n", + ipu_base + devtype->csi1_ofs); dev_dbg(&pdev->dev, "disp0: 0x%08lx\n", ipu_base + devtype->disp0_ofs); dev_dbg(&pdev->dev, "disp1: 0x%08lx\n", diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c new file mode 100644 index 000000000000..d6f56471bd2a --- /dev/null +++ b/drivers/gpu/ipu-v3/ipu-csi.c @@ -0,0 +1,741 @@ +/* + * Copyright (C) 2012-2014 Mentor Graphics Inc. + * Copyright (C) 2005-2009 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ipu-prv.h" + +struct ipu_csi { + void __iomem *base; + int id; + u32 module; + struct clk *clk_ipu; /* IPU bus clock */ + spinlock_t lock; + bool inuse; + struct ipu_soc *ipu; +}; + +/* CSI Register Offsets */ +#define CSI_SENS_CONF 0x0000 +#define CSI_SENS_FRM_SIZE 0x0004 +#define CSI_ACT_FRM_SIZE 0x0008 +#define CSI_OUT_FRM_CTRL 0x000c +#define CSI_TST_CTRL 0x0010 +#define CSI_CCIR_CODE_1 0x0014 +#define CSI_CCIR_CODE_2 0x0018 +#define CSI_CCIR_CODE_3 0x001c +#define CSI_MIPI_DI 0x0020 +#define CSI_SKIP 0x0024 +#define CSI_CPD_CTRL 0x0028 +#define CSI_CPD_RC(n) (0x002c + ((n)*4)) +#define CSI_CPD_RS(n) (0x004c + ((n)*4)) +#define CSI_CPD_GRC(n) (0x005c + ((n)*4)) +#define CSI_CPD_GRS(n) (0x007c + ((n)*4)) +#define CSI_CPD_GBC(n) (0x008c + ((n)*4)) +#define CSI_CPD_GBS(n) (0x00Ac + ((n)*4)) +#define CSI_CPD_BC(n) (0x00Bc + ((n)*4)) +#define CSI_CPD_BS(n) (0x00Dc + ((n)*4)) +#define CSI_CPD_OFFSET1 0x00ec +#define CSI_CPD_OFFSET2 0x00f0 + +/* CSI Register Fields */ +#define CSI_SENS_CONF_DATA_FMT_SHIFT 8 +#define CSI_SENS_CONF_DATA_FMT_MASK 0x00000700 +#define CSI_SENS_CONF_DATA_FMT_RGB_YUV444 0L +#define CSI_SENS_CONF_DATA_FMT_YUV422_YUYV 1L +#define CSI_SENS_CONF_DATA_FMT_YUV422_UYVY 2L +#define CSI_SENS_CONF_DATA_FMT_BAYER 3L +#define CSI_SENS_CONF_DATA_FMT_RGB565 4L +#define CSI_SENS_CONF_DATA_FMT_RGB555 5L +#define CSI_SENS_CONF_DATA_FMT_RGB444 6L +#define CSI_SENS_CONF_DATA_FMT_JPEG 7L + +#define CSI_SENS_CONF_VSYNC_POL_SHIFT 0 +#define CSI_SENS_CONF_HSYNC_POL_SHIFT 1 +#define CSI_SENS_CONF_DATA_POL_SHIFT 2 +#define CSI_SENS_CONF_PIX_CLK_POL_SHIFT 3 +#define CSI_SENS_CONF_SENS_PRTCL_MASK 0x00000070 +#define CSI_SENS_CONF_SENS_PRTCL_SHIFT 4 +#define CSI_SENS_CONF_PACK_TIGHT_SHIFT 7 +#define CSI_SENS_CONF_DATA_WIDTH_SHIFT 11 +#define CSI_SENS_CONF_EXT_VSYNC_SHIFT 15 +#define CSI_SENS_CONF_DIVRATIO_SHIFT 16 + +#define CSI_SENS_CONF_DIVRATIO_MASK 0x00ff0000 +#define CSI_SENS_CONF_DATA_DEST_SHIFT 24 +#define CSI_SENS_CONF_DATA_DEST_MASK 0x07000000 +#define CSI_SENS_CONF_JPEG8_EN_SHIFT 27 +#define CSI_SENS_CONF_JPEG_EN_SHIFT 28 +#define CSI_SENS_CONF_FORCE_EOF_SHIFT 29 +#define CSI_SENS_CONF_DATA_EN_POL_SHIFT 31 + +#define CSI_DATA_DEST_IC 2 +#define CSI_DATA_DEST_IDMAC 4 + +#define CSI_CCIR_ERR_DET_EN 0x01000000 +#define CSI_HORI_DOWNSIZE_EN 0x80000000 +#define CSI_VERT_DOWNSIZE_EN 0x40000000 +#define CSI_TEST_GEN_MODE_EN 0x01000000 + +#define CSI_HSC_MASK 0x1fff0000 +#define CSI_HSC_SHIFT 16 +#define CSI_VSC_MASK 0x00000fff +#define CSI_VSC_SHIFT 0 + +#define CSI_TEST_GEN_R_MASK 0x000000ff +#define CSI_TEST_GEN_R_SHIFT 0 +#define CSI_TEST_GEN_G_MASK 0x0000ff00 +#define CSI_TEST_GEN_G_SHIFT 8 +#define CSI_TEST_GEN_B_MASK 0x00ff0000 +#define CSI_TEST_GEN_B_SHIFT 16 + +#define CSI_MAX_RATIO_SKIP_SMFC_MASK 0x00000007 +#define CSI_MAX_RATIO_SKIP_SMFC_SHIFT 0 +#define CSI_SKIP_SMFC_MASK 0x000000f8 +#define CSI_SKIP_SMFC_SHIFT 3 +#define CSI_ID_2_SKIP_MASK 0x00000300 +#define CSI_ID_2_SKIP_SHIFT 8 + +#define CSI_COLOR_FIRST_ROW_MASK 0x00000002 +#define CSI_COLOR_FIRST_COMP_MASK 0x00000001 + +/* MIPI CSI-2 data types */ +#define MIPI_DT_YUV420 0x18 /* YYY.../UYVY.... */ +#define MIPI_DT_YUV420_LEGACY 0x1a /* UYY.../VYY... */ +#define MIPI_DT_YUV422 0x1e /* UYVY... */ +#define MIPI_DT_RGB444 0x20 +#define MIPI_DT_RGB555 0x21 +#define MIPI_DT_RGB565 0x22 +#define MIPI_DT_RGB666 0x23 +#define MIPI_DT_RGB888 0x24 +#define MIPI_DT_RAW6 0x28 +#define MIPI_DT_RAW7 0x29 +#define MIPI_DT_RAW8 0x2a +#define MIPI_DT_RAW10 0x2b +#define MIPI_DT_RAW12 0x2c +#define MIPI_DT_RAW14 0x2d + +/* + * Bitfield of CSI bus signal polarities and modes. + */ +struct ipu_csi_bus_config { + unsigned data_width:4; + unsigned clk_mode:3; + unsigned ext_vsync:1; + unsigned vsync_pol:1; + unsigned hsync_pol:1; + unsigned pixclk_pol:1; + unsigned data_pol:1; + unsigned sens_clksrc:1; + unsigned pack_tight:1; + unsigned force_eof:1; + unsigned data_en_pol:1; + + unsigned data_fmt; + unsigned mipi_dt; +}; + +/* + * Enumeration of CSI data bus widths. + */ +enum ipu_csi_data_width { + IPU_CSI_DATA_WIDTH_4 = 0, + IPU_CSI_DATA_WIDTH_8 = 1, + IPU_CSI_DATA_WIDTH_10 = 3, + IPU_CSI_DATA_WIDTH_12 = 5, + IPU_CSI_DATA_WIDTH_16 = 9, +}; + +/* + * Enumeration of CSI clock modes. + */ +enum ipu_csi_clk_mode { + IPU_CSI_CLK_MODE_GATED_CLK, + IPU_CSI_CLK_MODE_NONGATED_CLK, + IPU_CSI_CLK_MODE_CCIR656_PROGRESSIVE, + IPU_CSI_CLK_MODE_CCIR656_INTERLACED, + IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_DDR, + IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_SDR, + IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_DDR, + IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_SDR, +}; + +static inline u32 ipu_csi_read(struct ipu_csi *csi, unsigned offset) +{ + return readl(csi->base + offset); +} + +static inline void ipu_csi_write(struct ipu_csi *csi, u32 value, + unsigned offset) +{ + writel(value, csi->base + offset); +} + +/* + * Set mclk division ratio for generating test mode mclk. Only used + * for test generator. + */ +static int ipu_csi_set_testgen_mclk(struct ipu_csi *csi, u32 pixel_clk, + u32 ipu_clk) +{ + u32 temp; + u32 div_ratio; + + div_ratio = (ipu_clk / pixel_clk) - 1; + + if (div_ratio > 0xFF || div_ratio < 0) { + dev_err(csi->ipu->dev, + "value of pixel_clk extends normal range\n"); + return -EINVAL; + } + + temp = ipu_csi_read(csi, CSI_SENS_CONF); + temp &= ~CSI_SENS_CONF_DIVRATIO_MASK; + ipu_csi_write(csi, temp | (div_ratio << CSI_SENS_CONF_DIVRATIO_SHIFT), + CSI_SENS_CONF); + + return 0; +} + +/* + * Find the CSI data format and data width for the given V4L2 media + * bus pixel format code. + */ +static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code) +{ + switch (mbus_code) { + case V4L2_MBUS_FMT_BGR565_2X8_BE: + case V4L2_MBUS_FMT_BGR565_2X8_LE: + case V4L2_MBUS_FMT_RGB565_2X8_BE: + case V4L2_MBUS_FMT_RGB565_2X8_LE: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_RGB565; + cfg->mipi_dt = MIPI_DT_RGB565; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_RGB444_2X8_PADHI_BE: + case V4L2_MBUS_FMT_RGB444_2X8_PADHI_LE: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_RGB444; + cfg->mipi_dt = MIPI_DT_RGB444; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE: + case V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_RGB555; + cfg->mipi_dt = MIPI_DT_RGB555; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_UYVY8_2X8: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_YUV422_UYVY; + cfg->mipi_dt = MIPI_DT_YUV422; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_YUYV8_2X8: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_YUV422_YUYV; + cfg->mipi_dt = MIPI_DT_YUV422; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_UYVY8_1X16: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_YUV422_UYVY; + cfg->mipi_dt = MIPI_DT_YUV422; + cfg->data_width = IPU_CSI_DATA_WIDTH_16; + break; + case V4L2_MBUS_FMT_YUYV8_1X16: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_YUV422_YUYV; + cfg->mipi_dt = MIPI_DT_YUV422; + cfg->data_width = IPU_CSI_DATA_WIDTH_16; + break; + case V4L2_MBUS_FMT_SBGGR8_1X8: + case V4L2_MBUS_FMT_SGBRG8_1X8: + case V4L2_MBUS_FMT_SGRBG8_1X8: + case V4L2_MBUS_FMT_SRGGB8_1X8: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; + cfg->mipi_dt = MIPI_DT_RAW8; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8: + case V4L2_MBUS_FMT_SGBRG10_DPCM8_1X8: + case V4L2_MBUS_FMT_SGRBG10_DPCM8_1X8: + case V4L2_MBUS_FMT_SRGGB10_DPCM8_1X8: + case V4L2_MBUS_FMT_SBGGR10_2X8_PADHI_BE: + case V4L2_MBUS_FMT_SBGGR10_2X8_PADHI_LE: + case V4L2_MBUS_FMT_SBGGR10_2X8_PADLO_BE: + case V4L2_MBUS_FMT_SBGGR10_2X8_PADLO_LE: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; + cfg->mipi_dt = MIPI_DT_RAW10; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + case V4L2_MBUS_FMT_SBGGR10_1X10: + case V4L2_MBUS_FMT_SGBRG10_1X10: + case V4L2_MBUS_FMT_SGRBG10_1X10: + case V4L2_MBUS_FMT_SRGGB10_1X10: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; + cfg->mipi_dt = MIPI_DT_RAW10; + cfg->data_width = IPU_CSI_DATA_WIDTH_10; + break; + case V4L2_MBUS_FMT_SBGGR12_1X12: + case V4L2_MBUS_FMT_SGBRG12_1X12: + case V4L2_MBUS_FMT_SGRBG12_1X12: + case V4L2_MBUS_FMT_SRGGB12_1X12: + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_BAYER; + cfg->mipi_dt = MIPI_DT_RAW12; + cfg->data_width = IPU_CSI_DATA_WIDTH_12; + break; + case V4L2_MBUS_FMT_JPEG_1X8: + /* TODO */ + cfg->data_fmt = CSI_SENS_CONF_DATA_FMT_JPEG; + cfg->mipi_dt = MIPI_DT_RAW8; + cfg->data_width = IPU_CSI_DATA_WIDTH_8; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* + * Fill a CSI bus config struct from mbus_config and mbus_framefmt. + */ +static void fill_csi_bus_cfg(struct ipu_csi_bus_config *csicfg, + struct v4l2_mbus_config *mbus_cfg, + struct v4l2_mbus_framefmt *mbus_fmt) +{ + memset(csicfg, 0, sizeof(*csicfg)); + + mbus_code_to_bus_cfg(csicfg, mbus_fmt->code); + + switch (mbus_cfg->type) { + case V4L2_MBUS_PARALLEL: + csicfg->ext_vsync = 1; + csicfg->vsync_pol = (mbus_cfg->flags & + V4L2_MBUS_VSYNC_ACTIVE_LOW) ? 1 : 0; + csicfg->hsync_pol = (mbus_cfg->flags & + V4L2_MBUS_HSYNC_ACTIVE_LOW) ? 1 : 0; + csicfg->pixclk_pol = (mbus_cfg->flags & + V4L2_MBUS_PCLK_SAMPLE_FALLING) ? 1 : 0; + csicfg->clk_mode = IPU_CSI_CLK_MODE_GATED_CLK; + break; + case V4L2_MBUS_BT656: + csicfg->ext_vsync = 0; + if (V4L2_FIELD_HAS_BOTH(mbus_fmt->field)) + csicfg->clk_mode = IPU_CSI_CLK_MODE_CCIR656_INTERLACED; + else + csicfg->clk_mode = IPU_CSI_CLK_MODE_CCIR656_PROGRESSIVE; + break; + case V4L2_MBUS_CSI2: + /* + * MIPI CSI-2 requires non gated clock mode, all other + * parameters are not applicable for MIPI CSI-2 bus. + */ + csicfg->clk_mode = IPU_CSI_CLK_MODE_NONGATED_CLK; + break; + default: + /* will never get here, keep compiler quiet */ + break; + } +} + +int ipu_csi_init_interface(struct ipu_csi *csi, + struct v4l2_mbus_config *mbus_cfg, + struct v4l2_mbus_framefmt *mbus_fmt) +{ + struct ipu_csi_bus_config cfg; + unsigned long flags; + u32 data = 0; + + fill_csi_bus_cfg(&cfg, mbus_cfg, mbus_fmt); + + /* Set the CSI_SENS_CONF register remaining fields */ + data |= cfg.data_width << CSI_SENS_CONF_DATA_WIDTH_SHIFT | + cfg.data_fmt << CSI_SENS_CONF_DATA_FMT_SHIFT | + cfg.data_pol << CSI_SENS_CONF_DATA_POL_SHIFT | + cfg.vsync_pol << CSI_SENS_CONF_VSYNC_POL_SHIFT | + cfg.hsync_pol << CSI_SENS_CONF_HSYNC_POL_SHIFT | + cfg.pixclk_pol << CSI_SENS_CONF_PIX_CLK_POL_SHIFT | + cfg.ext_vsync << CSI_SENS_CONF_EXT_VSYNC_SHIFT | + cfg.clk_mode << CSI_SENS_CONF_SENS_PRTCL_SHIFT | + cfg.pack_tight << CSI_SENS_CONF_PACK_TIGHT_SHIFT | + cfg.force_eof << CSI_SENS_CONF_FORCE_EOF_SHIFT | + cfg.data_en_pol << CSI_SENS_CONF_DATA_EN_POL_SHIFT; + + spin_lock_irqsave(&csi->lock, flags); + + ipu_csi_write(csi, data, CSI_SENS_CONF); + + /* Setup sensor frame size */ + ipu_csi_write(csi, + (mbus_fmt->width - 1) | ((mbus_fmt->height - 1) << 16), + CSI_SENS_FRM_SIZE); + + /* Set CCIR registers */ + + switch (cfg.clk_mode) { + case IPU_CSI_CLK_MODE_CCIR656_PROGRESSIVE: + ipu_csi_write(csi, 0x40030, CSI_CCIR_CODE_1); + ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3); + break; + case IPU_CSI_CLK_MODE_CCIR656_INTERLACED: + if (mbus_fmt->width == 720 && mbus_fmt->height == 576) { + /* + * PAL case + * + * Field0BlankEnd = 0x6, Field0BlankStart = 0x2, + * Field0ActiveEnd = 0x4, Field0ActiveStart = 0 + * Field1BlankEnd = 0x7, Field1BlankStart = 0x3, + * Field1ActiveEnd = 0x5, Field1ActiveStart = 0x1 + */ + ipu_csi_write(csi, 0x40596 | CSI_CCIR_ERR_DET_EN, + CSI_CCIR_CODE_1); + ipu_csi_write(csi, 0xD07DF, CSI_CCIR_CODE_2); + ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3); + + } else if (mbus_fmt->width == 720 && mbus_fmt->height == 480) { + /* + * NTSC case + * + * Field0BlankEnd = 0x7, Field0BlankStart = 0x3, + * Field0ActiveEnd = 0x5, Field0ActiveStart = 0x1 + * Field1BlankEnd = 0x6, Field1BlankStart = 0x2, + * Field1ActiveEnd = 0x4, Field1ActiveStart = 0 + */ + ipu_csi_write(csi, 0xD07DF | CSI_CCIR_ERR_DET_EN, + CSI_CCIR_CODE_1); + ipu_csi_write(csi, 0x40596, CSI_CCIR_CODE_2); + ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3); + } else { + dev_err(csi->ipu->dev, + "Unsupported CCIR656 interlaced video mode\n"); + spin_unlock_irqrestore(&csi->lock, flags); + return -EINVAL; + } + break; + case IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_DDR: + case IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_SDR: + case IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_DDR: + case IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_SDR: + ipu_csi_write(csi, 0x40030 | CSI_CCIR_ERR_DET_EN, + CSI_CCIR_CODE_1); + ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3); + break; + case IPU_CSI_CLK_MODE_GATED_CLK: + case IPU_CSI_CLK_MODE_NONGATED_CLK: + ipu_csi_write(csi, 0, CSI_CCIR_CODE_1); + break; + } + + dev_dbg(csi->ipu->dev, "CSI_SENS_CONF = 0x%08X\n", + ipu_csi_read(csi, CSI_SENS_CONF)); + dev_dbg(csi->ipu->dev, "CSI_ACT_FRM_SIZE = 0x%08X\n", + ipu_csi_read(csi, CSI_ACT_FRM_SIZE)); + + spin_unlock_irqrestore(&csi->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_init_interface); + +bool ipu_csi_is_interlaced(struct ipu_csi *csi) +{ + unsigned long flags; + u32 sensor_protocol; + + spin_lock_irqsave(&csi->lock, flags); + sensor_protocol = + (ipu_csi_read(csi, CSI_SENS_CONF) & + CSI_SENS_CONF_SENS_PRTCL_MASK) >> + CSI_SENS_CONF_SENS_PRTCL_SHIFT; + spin_unlock_irqrestore(&csi->lock, flags); + + switch (sensor_protocol) { + case IPU_CSI_CLK_MODE_GATED_CLK: + case IPU_CSI_CLK_MODE_NONGATED_CLK: + case IPU_CSI_CLK_MODE_CCIR656_PROGRESSIVE: + case IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_DDR: + case IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_SDR: + return false; + case IPU_CSI_CLK_MODE_CCIR656_INTERLACED: + case IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_DDR: + case IPU_CSI_CLK_MODE_CCIR1120_INTERLACED_SDR: + return true; + default: + dev_err(csi->ipu->dev, + "CSI %d sensor protocol unsupported\n", csi->id); + return false; + } +} +EXPORT_SYMBOL_GPL(ipu_csi_is_interlaced); + +void ipu_csi_get_window(struct ipu_csi *csi, struct v4l2_rect *w) +{ + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&csi->lock, flags); + + reg = ipu_csi_read(csi, CSI_ACT_FRM_SIZE); + w->width = (reg & 0xFFFF) + 1; + w->height = (reg >> 16 & 0xFFFF) + 1; + + reg = ipu_csi_read(csi, CSI_OUT_FRM_CTRL); + w->left = (reg & CSI_HSC_MASK) >> CSI_HSC_SHIFT; + w->top = (reg & CSI_VSC_MASK) >> CSI_VSC_SHIFT; + + spin_unlock_irqrestore(&csi->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_csi_get_window); + +void ipu_csi_set_window(struct ipu_csi *csi, struct v4l2_rect *w) +{ + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&csi->lock, flags); + + ipu_csi_write(csi, (w->width - 1) | ((w->height - 1) << 16), + CSI_ACT_FRM_SIZE); + + reg = ipu_csi_read(csi, CSI_OUT_FRM_CTRL); + reg &= ~(CSI_HSC_MASK | CSI_VSC_MASK); + reg |= ((w->top << CSI_VSC_SHIFT) | (w->left << CSI_HSC_SHIFT)); + ipu_csi_write(csi, reg, CSI_OUT_FRM_CTRL); + + spin_unlock_irqrestore(&csi->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_csi_set_window); + +void ipu_csi_set_test_generator(struct ipu_csi *csi, bool active, + u32 r_value, u32 g_value, u32 b_value, + u32 pix_clk) +{ + unsigned long flags; + u32 ipu_clk = clk_get_rate(csi->clk_ipu); + u32 temp; + + spin_lock_irqsave(&csi->lock, flags); + + temp = ipu_csi_read(csi, CSI_TST_CTRL); + + if (active == false) { + temp &= ~CSI_TEST_GEN_MODE_EN; + ipu_csi_write(csi, temp, CSI_TST_CTRL); + } else { + /* Set sensb_mclk div_ratio */ + ipu_csi_set_testgen_mclk(csi, pix_clk, ipu_clk); + + temp &= ~(CSI_TEST_GEN_R_MASK | CSI_TEST_GEN_G_MASK | + CSI_TEST_GEN_B_MASK); + temp |= CSI_TEST_GEN_MODE_EN; + temp |= (r_value << CSI_TEST_GEN_R_SHIFT) | + (g_value << CSI_TEST_GEN_G_SHIFT) | + (b_value << CSI_TEST_GEN_B_SHIFT); + ipu_csi_write(csi, temp, CSI_TST_CTRL); + } + + spin_unlock_irqrestore(&csi->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_csi_set_test_generator); + +int ipu_csi_set_mipi_datatype(struct ipu_csi *csi, u32 vc, + struct v4l2_mbus_framefmt *mbus_fmt) +{ + struct ipu_csi_bus_config cfg; + unsigned long flags; + u32 temp; + + if (vc > 3) + return -EINVAL; + + mbus_code_to_bus_cfg(&cfg, mbus_fmt->code); + + spin_lock_irqsave(&csi->lock, flags); + + temp = ipu_csi_read(csi, CSI_MIPI_DI); + temp &= ~(0xff << (vc * 8)); + temp |= (cfg.mipi_dt << (vc * 8)); + ipu_csi_write(csi, temp, CSI_MIPI_DI); + + spin_unlock_irqrestore(&csi->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_set_mipi_datatype); + +int ipu_csi_set_skip_smfc(struct ipu_csi *csi, u32 skip, + u32 max_ratio, u32 id) +{ + unsigned long flags; + u32 temp; + + if (max_ratio > 5 || id > 3) + return -EINVAL; + + spin_lock_irqsave(&csi->lock, flags); + + temp = ipu_csi_read(csi, CSI_SKIP); + temp &= ~(CSI_MAX_RATIO_SKIP_SMFC_MASK | CSI_ID_2_SKIP_MASK | + CSI_SKIP_SMFC_MASK); + temp |= (max_ratio << CSI_MAX_RATIO_SKIP_SMFC_SHIFT) | + (id << CSI_ID_2_SKIP_SHIFT) | + (skip << CSI_SKIP_SMFC_SHIFT); + ipu_csi_write(csi, temp, CSI_SKIP); + + spin_unlock_irqrestore(&csi->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_set_skip_smfc); + +int ipu_csi_set_dest(struct ipu_csi *csi, enum ipu_csi_dest csi_dest) +{ + unsigned long flags; + u32 csi_sens_conf, dest; + + if (csi_dest == IPU_CSI_DEST_IDMAC) + dest = CSI_DATA_DEST_IDMAC; + else + dest = CSI_DATA_DEST_IC; /* IC or VDIC */ + + spin_lock_irqsave(&csi->lock, flags); + + csi_sens_conf = ipu_csi_read(csi, CSI_SENS_CONF); + csi_sens_conf &= ~CSI_SENS_CONF_DATA_DEST_MASK; + csi_sens_conf |= (dest << CSI_SENS_CONF_DATA_DEST_SHIFT); + ipu_csi_write(csi, csi_sens_conf, CSI_SENS_CONF); + + spin_unlock_irqrestore(&csi->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_set_dest); + +int ipu_csi_enable(struct ipu_csi *csi) +{ + ipu_module_enable(csi->ipu, csi->module); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_enable); + +int ipu_csi_disable(struct ipu_csi *csi) +{ + ipu_module_disable(csi->ipu, csi->module); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_csi_disable); + +struct ipu_csi *ipu_csi_get(struct ipu_soc *ipu, int id) +{ + unsigned long flags; + struct ipu_csi *csi, *ret; + + if (id > 1) + return ERR_PTR(-EINVAL); + + csi = ipu->csi_priv[id]; + ret = csi; + + spin_lock_irqsave(&csi->lock, flags); + + if (csi->inuse) { + ret = ERR_PTR(-EBUSY); + goto unlock; + } + + csi->inuse = true; +unlock: + spin_unlock_irqrestore(&csi->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_csi_get); + +void ipu_csi_put(struct ipu_csi *csi) +{ + unsigned long flags; + + spin_lock_irqsave(&csi->lock, flags); + csi->inuse = false; + spin_unlock_irqrestore(&csi->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_csi_put); + +int ipu_csi_init(struct ipu_soc *ipu, struct device *dev, int id, + unsigned long base, u32 module, struct clk *clk_ipu) +{ + struct ipu_csi *csi; + + if (id > 1) + return -ENODEV; + + csi = devm_kzalloc(dev, sizeof(*csi), GFP_KERNEL); + if (!csi) + return -ENOMEM; + + ipu->csi_priv[id] = csi; + + spin_lock_init(&csi->lock); + csi->module = module; + csi->id = id; + csi->clk_ipu = clk_ipu; + csi->base = devm_ioremap(dev, base, PAGE_SIZE); + if (!csi->base) + return -ENOMEM; + + dev_dbg(dev, "CSI%d base: 0x%08lx remapped to %p\n", + id, base, csi->base); + csi->ipu = ipu; + + return 0; +} + +void ipu_csi_exit(struct ipu_soc *ipu, int id) +{ +} + +void ipu_csi_dump(struct ipu_csi *csi) +{ + dev_dbg(csi->ipu->dev, "CSI_SENS_CONF: %08x\n", + ipu_csi_read(csi, CSI_SENS_CONF)); + dev_dbg(csi->ipu->dev, "CSI_SENS_FRM_SIZE: %08x\n", + ipu_csi_read(csi, CSI_SENS_FRM_SIZE)); + dev_dbg(csi->ipu->dev, "CSI_ACT_FRM_SIZE: %08x\n", + ipu_csi_read(csi, CSI_ACT_FRM_SIZE)); + dev_dbg(csi->ipu->dev, "CSI_OUT_FRM_CTRL: %08x\n", + ipu_csi_read(csi, CSI_OUT_FRM_CTRL)); + dev_dbg(csi->ipu->dev, "CSI_TST_CTRL: %08x\n", + ipu_csi_read(csi, CSI_TST_CTRL)); + dev_dbg(csi->ipu->dev, "CSI_CCIR_CODE_1: %08x\n", + ipu_csi_read(csi, CSI_CCIR_CODE_1)); + dev_dbg(csi->ipu->dev, "CSI_CCIR_CODE_2: %08x\n", + ipu_csi_read(csi, CSI_CCIR_CODE_2)); + dev_dbg(csi->ipu->dev, "CSI_CCIR_CODE_3: %08x\n", + ipu_csi_read(csi, CSI_CCIR_CODE_3)); + dev_dbg(csi->ipu->dev, "CSI_MIPI_DI: %08x\n", + ipu_csi_read(csi, CSI_MIPI_DI)); + dev_dbg(csi->ipu->dev, "CSI_SKIP: %08x\n", + ipu_csi_read(csi, CSI_SKIP)); +} +EXPORT_SYMBOL_GPL(ipu_csi_dump); diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h index 1a5c55c05fe8..9b274f1259e1 100644 --- a/drivers/gpu/ipu-v3/ipu-prv.h +++ b/drivers/gpu/ipu-v3/ipu-prv.h @@ -157,6 +157,7 @@ struct ipuv3_channel { }; struct ipu_cpmem; +struct ipu_csi; struct ipu_dc_priv; struct ipu_dmfc_priv; struct ipu_di; @@ -189,6 +190,7 @@ struct ipu_soc { struct ipu_dp_priv *dp_priv; struct ipu_dmfc_priv *dmfc_priv; struct ipu_di *di_priv[2]; + struct ipu_csi *csi_priv[2]; struct ipu_smfc_priv *smfc_priv; }; @@ -211,6 +213,10 @@ int ipu_module_disable(struct ipu_soc *ipu, u32 mask); bool ipu_idmac_channel_busy(struct ipu_soc *ipu, unsigned int chno); int ipu_wait_interrupt(struct ipu_soc *ipu, int irq, int ms); +int ipu_csi_init(struct ipu_soc *ipu, struct device *dev, int id, + unsigned long base, u32 module, struct clk *clk_ipu); +void ipu_csi_exit(struct ipu_soc *ipu, int id); + int ipu_di_init(struct ipu_soc *ipu, struct device *dev, int id, unsigned long base, u32 module, struct clk *ipu_clk); void ipu_di_exit(struct ipu_soc *ipu, int id); diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index f80fe13b0d4d..6d254275192b 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -16,6 +16,7 @@ #include #include #include +#include struct ipu_soc; @@ -61,6 +62,15 @@ struct ipu_di_signal_cfg { u8 vsync_pin; }; +/* + * Enumeration of CSI destinations + */ +enum ipu_csi_dest { + IPU_CSI_DEST_IDMAC, /* to memory via SMFC */ + IPU_CSI_DEST_IC, /* to Image Converter */ + IPU_CSI_DEST_VDIC, /* to VDIC */ +}; + enum ipu_color_space { IPUV3_COLORSPACE_RGB, IPUV3_COLORSPACE_YUV, @@ -211,8 +221,26 @@ int ipu_dp_set_global_alpha(struct ipu_dp *dp, bool enable, u8 alpha, /* * IPU CMOS Sensor Interface (csi) functions */ -int ipu_csi_enable(struct ipu_soc *ipu, int csi); -int ipu_csi_disable(struct ipu_soc *ipu, int csi); +struct ipu_csi; +int ipu_csi_init_interface(struct ipu_csi *csi, + struct v4l2_mbus_config *mbus_cfg, + struct v4l2_mbus_framefmt *mbus_fmt); +bool ipu_csi_is_interlaced(struct ipu_csi *csi); +void ipu_csi_get_window(struct ipu_csi *csi, struct v4l2_rect *w); +void ipu_csi_set_window(struct ipu_csi *csi, struct v4l2_rect *w); +void ipu_csi_set_test_generator(struct ipu_csi *csi, bool active, + u32 r_value, u32 g_value, u32 b_value, + u32 pix_clk); +int ipu_csi_set_mipi_datatype(struct ipu_csi *csi, u32 vc, + struct v4l2_mbus_framefmt *mbus_fmt); +int ipu_csi_set_skip_smfc(struct ipu_csi *csi, u32 skip, + u32 max_ratio, u32 id); +int ipu_csi_set_dest(struct ipu_csi *csi, enum ipu_csi_dest csi_dest); +int ipu_csi_enable(struct ipu_csi *csi); +int ipu_csi_disable(struct ipu_csi *csi); +struct ipu_csi *ipu_csi_get(struct ipu_soc *ipu, int id); +void ipu_csi_put(struct ipu_csi *csi); +void ipu_csi_dump(struct ipu_csi *csi); /* * IPU Sensor Multiple FIFO Controller (SMFC) functions From 1aa8ea0d2bd5d4ba7b5d2b132a02157bc1fb9793 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Mon, 11 Aug 2014 13:04:50 +0200 Subject: [PATCH 215/640] gpu: ipu-v3: Add Image Converter unit Adds the Image Converter (IC) unit. Signed-off-by: Steve Longerbeam Condensed the three CSC setup functions into a single one that uses static tables to set up the CSC task parameters. Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/Makefile | 2 +- drivers/gpu/ipu-v3/ipu-common.c | 19 +- drivers/gpu/ipu-v3/ipu-ic.c | 778 ++++++++++++++++++++++++++++++++ drivers/gpu/ipu-v3/ipu-prv.h | 6 + include/video/imx-ipu-v3.h | 45 ++ 5 files changed, 848 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/ipu-v3/ipu-ic.c diff --git a/drivers/gpu/ipu-v3/Makefile b/drivers/gpu/ipu-v3/Makefile index d22bd06caa6d..107ec236a4a6 100644 --- a/drivers/gpu/ipu-v3/Makefile +++ b/drivers/gpu/ipu-v3/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_IMX_IPUV3_CORE) += imx-ipu-v3.o imx-ipu-v3-objs := ipu-common.o ipu-cpmem.o ipu-csi.o ipu-dc.o ipu-di.o \ - ipu-dp.o ipu-dmfc.o ipu-smfc.o + ipu-dp.o ipu-dmfc.o ipu-ic.o ipu-smfc.o diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 511c364231a2..312eef6ffcad 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -429,6 +429,7 @@ struct ipu_devtype { unsigned long tpm_ofs; unsigned long csi0_ofs; unsigned long csi1_ofs; + unsigned long ic_ofs; unsigned long disp0_ofs; unsigned long disp1_ofs; unsigned long dc_tmpl_ofs; @@ -444,6 +445,7 @@ static struct ipu_devtype ipu_type_imx51 = { .tpm_ofs = 0x1f060000, .csi0_ofs = 0x1f030000, .csi1_ofs = 0x1f038000, + .ic_ofs = 0x1f020000, .disp0_ofs = 0x1e040000, .disp1_ofs = 0x1e048000, .dc_tmpl_ofs = 0x1f080000, @@ -459,6 +461,7 @@ static struct ipu_devtype ipu_type_imx53 = { .tpm_ofs = 0x07060000, .csi0_ofs = 0x07030000, .csi1_ofs = 0x07038000, + .ic_ofs = 0x07020000, .disp0_ofs = 0x06040000, .disp1_ofs = 0x06048000, .dc_tmpl_ofs = 0x07080000, @@ -474,6 +477,7 @@ static struct ipu_devtype ipu_type_imx6q = { .tpm_ofs = 0x00360000, .csi0_ofs = 0x00230000, .csi1_ofs = 0x00238000, + .ic_ofs = 0x00220000, .disp0_ofs = 0x00240000, .disp1_ofs = 0x00248000, .dc_tmpl_ofs = 0x00380000, @@ -518,8 +522,16 @@ static int ipu_submodules_init(struct ipu_soc *ipu, goto err_csi_1; } + ret = ipu_ic_init(ipu, dev, + ipu_base + devtype->ic_ofs, + ipu_base + devtype->tpm_ofs); + if (ret) { + unit = "ic"; + goto err_ic; + } + ret = ipu_di_init(ipu, dev, 0, ipu_base + devtype->disp0_ofs, - IPU_CONF_DI0_EN, ipu_clk); + IPU_CONF_DI0_EN, ipu_clk); if (ret) { unit = "di0"; goto err_di_0; @@ -572,6 +584,8 @@ err_dc: err_di_1: ipu_di_exit(ipu, 0); err_di_0: + ipu_ic_exit(ipu); +err_ic: ipu_csi_exit(ipu, 1); err_csi_1: ipu_csi_exit(ipu, 0); @@ -654,6 +668,7 @@ static void ipu_submodules_exit(struct ipu_soc *ipu) ipu_dc_exit(ipu); ipu_di_exit(ipu, 1); ipu_di_exit(ipu, 0); + ipu_ic_exit(ipu); ipu_csi_exit(ipu, 1); ipu_csi_exit(ipu, 0); ipu_cpmem_exit(ipu); @@ -879,6 +894,8 @@ static int ipu_probe(struct platform_device *pdev) ipu_base + devtype->csi0_ofs); dev_dbg(&pdev->dev, "csi1: 0x%08lx\n", ipu_base + devtype->csi1_ofs); + dev_dbg(&pdev->dev, "ic: 0x%08lx\n", + ipu_base + devtype->ic_ofs); dev_dbg(&pdev->dev, "disp0: 0x%08lx\n", ipu_base + devtype->disp0_ofs); dev_dbg(&pdev->dev, "disp1: 0x%08lx\n", diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c new file mode 100644 index 000000000000..ad75588e1629 --- /dev/null +++ b/drivers/gpu/ipu-v3/ipu-ic.c @@ -0,0 +1,778 @@ +/* + * Copyright (C) 2012-2014 Mentor Graphics Inc. + * Copyright 2005-2012 Freescale Semiconductor, Inc. All Rights Reserved. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include +#include +#include +#include +#include +#include +#include +#include "ipu-prv.h" + +/* IC Register Offsets */ +#define IC_CONF 0x0000 +#define IC_PRP_ENC_RSC 0x0004 +#define IC_PRP_VF_RSC 0x0008 +#define IC_PP_RSC 0x000C +#define IC_CMBP_1 0x0010 +#define IC_CMBP_2 0x0014 +#define IC_IDMAC_1 0x0018 +#define IC_IDMAC_2 0x001C +#define IC_IDMAC_3 0x0020 +#define IC_IDMAC_4 0x0024 + +/* IC Register Fields */ +#define IC_CONF_PRPENC_EN (1 << 0) +#define IC_CONF_PRPENC_CSC1 (1 << 1) +#define IC_CONF_PRPENC_ROT_EN (1 << 2) +#define IC_CONF_PRPVF_EN (1 << 8) +#define IC_CONF_PRPVF_CSC1 (1 << 9) +#define IC_CONF_PRPVF_CSC2 (1 << 10) +#define IC_CONF_PRPVF_CMB (1 << 11) +#define IC_CONF_PRPVF_ROT_EN (1 << 12) +#define IC_CONF_PP_EN (1 << 16) +#define IC_CONF_PP_CSC1 (1 << 17) +#define IC_CONF_PP_CSC2 (1 << 18) +#define IC_CONF_PP_CMB (1 << 19) +#define IC_CONF_PP_ROT_EN (1 << 20) +#define IC_CONF_IC_GLB_LOC_A (1 << 28) +#define IC_CONF_KEY_COLOR_EN (1 << 29) +#define IC_CONF_RWS_EN (1 << 30) +#define IC_CONF_CSI_MEM_WR_EN (1 << 31) + +#define IC_IDMAC_1_CB0_BURST_16 (1 << 0) +#define IC_IDMAC_1_CB1_BURST_16 (1 << 1) +#define IC_IDMAC_1_CB2_BURST_16 (1 << 2) +#define IC_IDMAC_1_CB3_BURST_16 (1 << 3) +#define IC_IDMAC_1_CB4_BURST_16 (1 << 4) +#define IC_IDMAC_1_CB5_BURST_16 (1 << 5) +#define IC_IDMAC_1_CB6_BURST_16 (1 << 6) +#define IC_IDMAC_1_CB7_BURST_16 (1 << 7) +#define IC_IDMAC_1_PRPENC_ROT_MASK (0x7 << 11) +#define IC_IDMAC_1_PRPENC_ROT_OFFSET 11 +#define IC_IDMAC_1_PRPVF_ROT_MASK (0x7 << 14) +#define IC_IDMAC_1_PRPVF_ROT_OFFSET 14 +#define IC_IDMAC_1_PP_ROT_MASK (0x7 << 17) +#define IC_IDMAC_1_PP_ROT_OFFSET 17 +#define IC_IDMAC_1_PP_FLIP_RS (1 << 22) +#define IC_IDMAC_1_PRPVF_FLIP_RS (1 << 21) +#define IC_IDMAC_1_PRPENC_FLIP_RS (1 << 20) + +#define IC_IDMAC_2_PRPENC_HEIGHT_MASK (0x3ff << 0) +#define IC_IDMAC_2_PRPENC_HEIGHT_OFFSET 0 +#define IC_IDMAC_2_PRPVF_HEIGHT_MASK (0x3ff << 10) +#define IC_IDMAC_2_PRPVF_HEIGHT_OFFSET 10 +#define IC_IDMAC_2_PP_HEIGHT_MASK (0x3ff << 20) +#define IC_IDMAC_2_PP_HEIGHT_OFFSET 20 + +#define IC_IDMAC_3_PRPENC_WIDTH_MASK (0x3ff << 0) +#define IC_IDMAC_3_PRPENC_WIDTH_OFFSET 0 +#define IC_IDMAC_3_PRPVF_WIDTH_MASK (0x3ff << 10) +#define IC_IDMAC_3_PRPVF_WIDTH_OFFSET 10 +#define IC_IDMAC_3_PP_WIDTH_MASK (0x3ff << 20) +#define IC_IDMAC_3_PP_WIDTH_OFFSET 20 + +struct ic_task_regoffs { + u32 rsc; + u32 tpmem_csc[2]; +}; + +struct ic_task_bitfields { + u32 ic_conf_en; + u32 ic_conf_rot_en; + u32 ic_conf_cmb_en; + u32 ic_conf_csc1_en; + u32 ic_conf_csc2_en; + u32 ic_cmb_galpha_bit; +}; + +static const struct ic_task_regoffs ic_task_reg[IC_NUM_TASKS] = { + [IC_TASK_ENCODER] = { + .rsc = IC_PRP_ENC_RSC, + .tpmem_csc = {0x2008, 0}, + }, + [IC_TASK_VIEWFINDER] = { + .rsc = IC_PRP_VF_RSC, + .tpmem_csc = {0x4028, 0x4040}, + }, + [IC_TASK_POST_PROCESSOR] = { + .rsc = IC_PP_RSC, + .tpmem_csc = {0x6060, 0x6078}, + }, +}; + +static const struct ic_task_bitfields ic_task_bit[IC_NUM_TASKS] = { + [IC_TASK_ENCODER] = { + .ic_conf_en = IC_CONF_PRPENC_EN, + .ic_conf_rot_en = IC_CONF_PRPENC_ROT_EN, + .ic_conf_cmb_en = 0, /* NA */ + .ic_conf_csc1_en = IC_CONF_PRPENC_CSC1, + .ic_conf_csc2_en = 0, /* NA */ + .ic_cmb_galpha_bit = 0, /* NA */ + }, + [IC_TASK_VIEWFINDER] = { + .ic_conf_en = IC_CONF_PRPVF_EN, + .ic_conf_rot_en = IC_CONF_PRPVF_ROT_EN, + .ic_conf_cmb_en = IC_CONF_PRPVF_CMB, + .ic_conf_csc1_en = IC_CONF_PRPVF_CSC1, + .ic_conf_csc2_en = IC_CONF_PRPVF_CSC2, + .ic_cmb_galpha_bit = 0, + }, + [IC_TASK_POST_PROCESSOR] = { + .ic_conf_en = IC_CONF_PP_EN, + .ic_conf_rot_en = IC_CONF_PP_ROT_EN, + .ic_conf_cmb_en = IC_CONF_PP_CMB, + .ic_conf_csc1_en = IC_CONF_PP_CSC1, + .ic_conf_csc2_en = IC_CONF_PP_CSC2, + .ic_cmb_galpha_bit = 8, + }, +}; + +struct ipu_ic_priv; + +struct ipu_ic { + enum ipu_ic_task task; + const struct ic_task_regoffs *reg; + const struct ic_task_bitfields *bit; + + enum ipu_color_space in_cs, g_in_cs; + enum ipu_color_space out_cs; + bool graphics; + bool rotation; + bool in_use; + + struct ipu_ic_priv *priv; +}; + +struct ipu_ic_priv { + void __iomem *base; + void __iomem *tpmem_base; + spinlock_t lock; + struct ipu_soc *ipu; + int use_count; + struct ipu_ic task[IC_NUM_TASKS]; +}; + +static inline u32 ipu_ic_read(struct ipu_ic *ic, unsigned offset) +{ + return readl(ic->priv->base + offset); +} + +static inline void ipu_ic_write(struct ipu_ic *ic, u32 value, unsigned offset) +{ + writel(value, ic->priv->base + offset); +} + +struct ic_csc_params { + s16 coeff[3][3]; /* signed 9-bit integer coefficients */ + s16 offset[3]; /* signed 11+2-bit fixed point offset */ + u8 scale:2; /* scale coefficients * 2^(scale-1) */ + bool sat:1; /* saturate to (16, 235(Y) / 240(U, V)) */ +}; + +/* + * Y = R * .299 + G * .587 + B * .114; + * U = R * -.169 + G * -.332 + B * .500 + 128.; + * V = R * .500 + G * -.419 + B * -.0813 + 128.; + */ +static const struct ic_csc_params ic_csc_rgb2ycbcr = { + .coeff = { + { 77, 150, 29 }, + { 469, 427, 128 }, + { 128, 405, 491 }, + }, + .offset = { 0, 512, 512 }, + .scale = 1, +}; + +/* transparent RGB->RGB matrix for graphics combining */ +static const struct ic_csc_params ic_csc_rgb2rgb = { + .coeff = { + { 128, 0, 0 }, + { 0, 128, 0 }, + { 0, 0, 128 }, + }, + .scale = 2, +}; + +/* + * R = (1.164 * (Y - 16)) + (1.596 * (Cr - 128)); + * G = (1.164 * (Y - 16)) - (0.392 * (Cb - 128)) - (0.813 * (Cr - 128)); + * B = (1.164 * (Y - 16)) + (2.017 * (Cb - 128); + */ +static const struct ic_csc_params ic_csc_ycbcr2rgb = { + .coeff = { + { 149, 0, 204 }, + { 149, 462, 408 }, + { 149, 255, 0 }, + }, + .offset = { -446, 266, -554 }, + .scale = 2, +}; + +static int init_csc(struct ipu_ic *ic, + enum ipu_color_space inf, + enum ipu_color_space outf, + int csc_index) +{ + struct ipu_ic_priv *priv = ic->priv; + const struct ic_csc_params *params; + u32 __iomem *base; + const u16 (*c)[3]; + const u16 *a; + u32 param; + + base = (u32 __iomem *) + (priv->tpmem_base + ic->reg->tpmem_csc[csc_index]); + + if (inf == IPUV3_COLORSPACE_YUV && outf == IPUV3_COLORSPACE_RGB) + params = &ic_csc_ycbcr2rgb; + else if (inf == IPUV3_COLORSPACE_RGB && outf == IPUV3_COLORSPACE_YUV) + params = &ic_csc_rgb2ycbcr; + else if (inf == IPUV3_COLORSPACE_RGB && outf == IPUV3_COLORSPACE_RGB) + params = &ic_csc_rgb2rgb; + else { + dev_err(priv->ipu->dev, "Unsupported color space conversion\n"); + return -EINVAL; + } + + /* Cast to unsigned */ + c = (const u16 (*)[3])params->coeff; + a = (const u16 *)params->offset; + + param = ((a[0] & 0x1f) << 27) | ((c[0][0] & 0x1ff) << 18) | + ((c[1][1] & 0x1ff) << 9) | (c[2][2] & 0x1ff); + writel(param, base++); + + param = ((a[0] & 0x1fe0) >> 5) | (params->scale << 8) | + (params->sat << 9); + writel(param, base++); + + param = ((a[1] & 0x1f) << 27) | ((c[0][1] & 0x1ff) << 18) | + ((c[1][0] & 0x1ff) << 9) | (c[2][0] & 0x1ff); + writel(param, base++); + + param = ((a[1] & 0x1fe0) >> 5); + writel(param, base++); + + param = ((a[2] & 0x1f) << 27) | ((c[0][2] & 0x1ff) << 18) | + ((c[1][2] & 0x1ff) << 9) | (c[2][1] & 0x1ff); + writel(param, base++); + + param = ((a[2] & 0x1fe0) >> 5); + writel(param, base++); + + return 0; +} + +static int calc_resize_coeffs(struct ipu_ic *ic, + u32 in_size, u32 out_size, + u32 *resize_coeff, + u32 *downsize_coeff) +{ + struct ipu_ic_priv *priv = ic->priv; + struct ipu_soc *ipu = priv->ipu; + u32 temp_size, temp_downsize; + + /* + * Input size cannot be more than 4096, and output size cannot + * be more than 1024 + */ + if (in_size > 4096) { + dev_err(ipu->dev, "Unsupported resize (in_size > 4096)\n"); + return -EINVAL; + } + if (out_size > 1024) { + dev_err(ipu->dev, "Unsupported resize (out_size > 1024)\n"); + return -EINVAL; + } + + /* Cannot downsize more than 8:1 */ + if ((out_size << 3) < in_size) { + dev_err(ipu->dev, "Unsupported downsize\n"); + return -EINVAL; + } + + /* Compute downsizing coefficient */ + temp_downsize = 0; + temp_size = in_size; + while (((temp_size > 1024) || (temp_size >= out_size * 2)) && + (temp_downsize < 2)) { + temp_size >>= 1; + temp_downsize++; + } + *downsize_coeff = temp_downsize; + + /* + * compute resizing coefficient using the following equation: + * resize_coeff = M * (SI - 1) / (SO - 1) + * where M = 2^13, SI = input size, SO = output size + */ + *resize_coeff = (8192L * (temp_size - 1)) / (out_size - 1); + if (*resize_coeff >= 16384L) { + dev_err(ipu->dev, "Warning! Overflow on resize coeff.\n"); + *resize_coeff = 0x3FFF; + } + + return 0; +} + +void ipu_ic_task_enable(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + u32 ic_conf; + + spin_lock_irqsave(&priv->lock, flags); + + ic_conf = ipu_ic_read(ic, IC_CONF); + + ic_conf |= ic->bit->ic_conf_en; + + if (ic->rotation) + ic_conf |= ic->bit->ic_conf_rot_en; + + if (ic->in_cs != ic->out_cs) + ic_conf |= ic->bit->ic_conf_csc1_en; + + if (ic->graphics) { + ic_conf |= ic->bit->ic_conf_cmb_en; + ic_conf |= ic->bit->ic_conf_csc1_en; + + if (ic->g_in_cs != ic->out_cs) + ic_conf |= ic->bit->ic_conf_csc2_en; + } + + ipu_ic_write(ic, ic_conf, IC_CONF); + + spin_unlock_irqrestore(&priv->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_ic_task_enable); + +void ipu_ic_task_disable(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + u32 ic_conf; + + spin_lock_irqsave(&priv->lock, flags); + + ic_conf = ipu_ic_read(ic, IC_CONF); + + ic_conf &= ~(ic->bit->ic_conf_en | + ic->bit->ic_conf_csc1_en | + ic->bit->ic_conf_rot_en); + if (ic->bit->ic_conf_csc2_en) + ic_conf &= ~ic->bit->ic_conf_csc2_en; + if (ic->bit->ic_conf_cmb_en) + ic_conf &= ~ic->bit->ic_conf_cmb_en; + + ipu_ic_write(ic, ic_conf, IC_CONF); + + ic->rotation = ic->graphics = false; + + spin_unlock_irqrestore(&priv->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_ic_task_disable); + +int ipu_ic_task_graphics_init(struct ipu_ic *ic, + enum ipu_color_space in_g_cs, + bool galpha_en, u32 galpha, + bool colorkey_en, u32 colorkey) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + u32 reg, ic_conf; + int ret = 0; + + if (ic->task == IC_TASK_ENCODER) + return -EINVAL; + + spin_lock_irqsave(&priv->lock, flags); + + ic_conf = ipu_ic_read(ic, IC_CONF); + + if (!(ic_conf & ic->bit->ic_conf_csc1_en)) { + /* need transparent CSC1 conversion */ + ret = init_csc(ic, IPUV3_COLORSPACE_RGB, + IPUV3_COLORSPACE_RGB, 0); + if (ret) + goto unlock; + } + + ic->g_in_cs = in_g_cs; + + if (ic->g_in_cs != ic->out_cs) { + ret = init_csc(ic, ic->g_in_cs, ic->out_cs, 1); + if (ret) + goto unlock; + } + + if (galpha_en) { + ic_conf |= IC_CONF_IC_GLB_LOC_A; + reg = ipu_ic_read(ic, IC_CMBP_1); + reg &= ~(0xff << ic->bit->ic_cmb_galpha_bit); + reg |= (galpha << ic->bit->ic_cmb_galpha_bit); + ipu_ic_write(ic, reg, IC_CMBP_1); + } else + ic_conf &= ~IC_CONF_IC_GLB_LOC_A; + + if (colorkey_en) { + ic_conf |= IC_CONF_KEY_COLOR_EN; + ipu_ic_write(ic, colorkey, IC_CMBP_2); + } else + ic_conf &= ~IC_CONF_KEY_COLOR_EN; + + ipu_ic_write(ic, ic_conf, IC_CONF); + + ic->graphics = true; +unlock: + spin_unlock_irqrestore(&priv->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_ic_task_graphics_init); + +int ipu_ic_task_init(struct ipu_ic *ic, + int in_width, int in_height, + int out_width, int out_height, + enum ipu_color_space in_cs, + enum ipu_color_space out_cs) +{ + struct ipu_ic_priv *priv = ic->priv; + u32 reg, downsize_coeff, resize_coeff; + unsigned long flags; + int ret = 0; + + /* Setup vertical resizing */ + ret = calc_resize_coeffs(ic, in_height, out_height, + &resize_coeff, &downsize_coeff); + if (ret) + return ret; + + reg = (downsize_coeff << 30) | (resize_coeff << 16); + + /* Setup horizontal resizing */ + ret = calc_resize_coeffs(ic, in_width, out_width, + &resize_coeff, &downsize_coeff); + if (ret) + return ret; + + reg |= (downsize_coeff << 14) | resize_coeff; + + spin_lock_irqsave(&priv->lock, flags); + + ipu_ic_write(ic, reg, ic->reg->rsc); + + /* Setup color space conversion */ + ic->in_cs = in_cs; + ic->out_cs = out_cs; + + if (ic->in_cs != ic->out_cs) { + ret = init_csc(ic, ic->in_cs, ic->out_cs, 0); + if (ret) + goto unlock; + } + +unlock: + spin_unlock_irqrestore(&priv->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_ic_task_init); + +int ipu_ic_task_idma_init(struct ipu_ic *ic, struct ipuv3_channel *channel, + u32 width, u32 height, int burst_size, + enum ipu_rotate_mode rot) +{ + struct ipu_ic_priv *priv = ic->priv; + struct ipu_soc *ipu = priv->ipu; + u32 ic_idmac_1, ic_idmac_2, ic_idmac_3; + u32 temp_rot = bitrev8(rot) >> 5; + bool need_hor_flip = false; + unsigned long flags; + int ret = 0; + + if ((burst_size != 8) && (burst_size != 16)) { + dev_err(ipu->dev, "Illegal burst length for IC\n"); + return -EINVAL; + } + + width--; + height--; + + if (temp_rot & 0x2) /* Need horizontal flip */ + need_hor_flip = true; + + spin_lock_irqsave(&priv->lock, flags); + + ic_idmac_1 = ipu_ic_read(ic, IC_IDMAC_1); + ic_idmac_2 = ipu_ic_read(ic, IC_IDMAC_2); + ic_idmac_3 = ipu_ic_read(ic, IC_IDMAC_3); + + switch (channel->num) { + case IPUV3_CHANNEL_IC_PP_MEM: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB2_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB2_BURST_16; + + if (need_hor_flip) + ic_idmac_1 |= IC_IDMAC_1_PP_FLIP_RS; + else + ic_idmac_1 &= ~IC_IDMAC_1_PP_FLIP_RS; + + ic_idmac_2 &= ~IC_IDMAC_2_PP_HEIGHT_MASK; + ic_idmac_2 |= height << IC_IDMAC_2_PP_HEIGHT_OFFSET; + + ic_idmac_3 &= ~IC_IDMAC_3_PP_WIDTH_MASK; + ic_idmac_3 |= width << IC_IDMAC_3_PP_WIDTH_OFFSET; + break; + case IPUV3_CHANNEL_MEM_IC_PP: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB5_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB5_BURST_16; + break; + case IPUV3_CHANNEL_MEM_ROT_PP: + ic_idmac_1 &= ~IC_IDMAC_1_PP_ROT_MASK; + ic_idmac_1 |= temp_rot << IC_IDMAC_1_PP_ROT_OFFSET; + break; + case IPUV3_CHANNEL_MEM_IC_PRP_VF: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB6_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB6_BURST_16; + break; + case IPUV3_CHANNEL_IC_PRP_ENC_MEM: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB0_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB0_BURST_16; + + if (need_hor_flip) + ic_idmac_1 |= IC_IDMAC_1_PRPENC_FLIP_RS; + else + ic_idmac_1 &= ~IC_IDMAC_1_PRPENC_FLIP_RS; + + ic_idmac_2 &= ~IC_IDMAC_2_PRPENC_HEIGHT_MASK; + ic_idmac_2 |= height << IC_IDMAC_2_PRPENC_HEIGHT_OFFSET; + + ic_idmac_3 &= ~IC_IDMAC_3_PRPENC_WIDTH_MASK; + ic_idmac_3 |= width << IC_IDMAC_3_PRPENC_WIDTH_OFFSET; + break; + case IPUV3_CHANNEL_MEM_ROT_ENC: + ic_idmac_1 &= ~IC_IDMAC_1_PRPENC_ROT_MASK; + ic_idmac_1 |= temp_rot << IC_IDMAC_1_PRPENC_ROT_OFFSET; + break; + case IPUV3_CHANNEL_IC_PRP_VF_MEM: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB1_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB1_BURST_16; + + if (need_hor_flip) + ic_idmac_1 |= IC_IDMAC_1_PRPVF_FLIP_RS; + else + ic_idmac_1 &= ~IC_IDMAC_1_PRPVF_FLIP_RS; + + ic_idmac_2 &= ~IC_IDMAC_2_PRPVF_HEIGHT_MASK; + ic_idmac_2 |= height << IC_IDMAC_2_PRPVF_HEIGHT_OFFSET; + + ic_idmac_3 &= ~IC_IDMAC_3_PRPVF_WIDTH_MASK; + ic_idmac_3 |= width << IC_IDMAC_3_PRPVF_WIDTH_OFFSET; + break; + case IPUV3_CHANNEL_MEM_ROT_VF: + ic_idmac_1 &= ~IC_IDMAC_1_PRPVF_ROT_MASK; + ic_idmac_1 |= temp_rot << IC_IDMAC_1_PRPVF_ROT_OFFSET; + break; + case IPUV3_CHANNEL_G_MEM_IC_PRP_VF: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB3_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB3_BURST_16; + break; + case IPUV3_CHANNEL_G_MEM_IC_PP: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB4_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB4_BURST_16; + break; + case IPUV3_CHANNEL_VDI_MEM_IC_VF: + if (burst_size == 16) + ic_idmac_1 |= IC_IDMAC_1_CB7_BURST_16; + else + ic_idmac_1 &= ~IC_IDMAC_1_CB7_BURST_16; + break; + default: + goto unlock; + } + + ipu_ic_write(ic, ic_idmac_1, IC_IDMAC_1); + ipu_ic_write(ic, ic_idmac_2, IC_IDMAC_2); + ipu_ic_write(ic, ic_idmac_3, IC_IDMAC_3); + + if (rot >= IPU_ROTATE_90_RIGHT) + ic->rotation = true; + +unlock: + spin_unlock_irqrestore(&priv->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_ic_task_idma_init); + +int ipu_ic_enable(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + u32 module = IPU_CONF_IC_EN; + + spin_lock_irqsave(&priv->lock, flags); + + if (ic->rotation) + module |= IPU_CONF_ROT_EN; + + if (!priv->use_count) + ipu_module_enable(priv->ipu, module); + + priv->use_count++; + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_ic_enable); + +int ipu_ic_disable(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + u32 module = IPU_CONF_IC_EN | IPU_CONF_ROT_EN; + + spin_lock_irqsave(&priv->lock, flags); + + priv->use_count--; + + if (!priv->use_count) + ipu_module_disable(priv->ipu, module); + + if (priv->use_count < 0) + priv->use_count = 0; + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_ic_disable); + +struct ipu_ic *ipu_ic_get(struct ipu_soc *ipu, enum ipu_ic_task task) +{ + struct ipu_ic_priv *priv = ipu->ic_priv; + unsigned long flags; + struct ipu_ic *ic, *ret; + + if (task >= IC_NUM_TASKS) + return ERR_PTR(-EINVAL); + + ic = &priv->task[task]; + + spin_lock_irqsave(&priv->lock, flags); + + if (ic->in_use) { + ret = ERR_PTR(-EBUSY); + goto unlock; + } + + ic->in_use = true; + ret = ic; + +unlock: + spin_unlock_irqrestore(&priv->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_ic_get); + +void ipu_ic_put(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + ic->in_use = false; + spin_unlock_irqrestore(&priv->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_ic_put); + +int ipu_ic_init(struct ipu_soc *ipu, struct device *dev, + unsigned long base, unsigned long tpmem_base) +{ + struct ipu_ic_priv *priv; + int i; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + ipu->ic_priv = priv; + + spin_lock_init(&priv->lock); + priv->base = devm_ioremap(dev, base, PAGE_SIZE); + if (!priv->base) + return -ENOMEM; + priv->tpmem_base = devm_ioremap(dev, tpmem_base, SZ_64K); + if (!priv->tpmem_base) + return -ENOMEM; + + dev_dbg(dev, "IC base: 0x%08lx remapped to %p\n", base, priv->base); + + priv->ipu = ipu; + + for (i = 0; i < IC_NUM_TASKS; i++) { + priv->task[i].task = i; + priv->task[i].priv = priv; + priv->task[i].reg = &ic_task_reg[i]; + priv->task[i].bit = &ic_task_bit[i]; + } + + return 0; +} + +void ipu_ic_exit(struct ipu_soc *ipu) +{ +} + +void ipu_ic_dump(struct ipu_ic *ic) +{ + struct ipu_ic_priv *priv = ic->priv; + struct ipu_soc *ipu = priv->ipu; + + dev_dbg(ipu->dev, "IC_CONF = \t0x%08X\n", + ipu_ic_read(ic, IC_CONF)); + dev_dbg(ipu->dev, "IC_PRP_ENC_RSC = \t0x%08X\n", + ipu_ic_read(ic, IC_PRP_ENC_RSC)); + dev_dbg(ipu->dev, "IC_PRP_VF_RSC = \t0x%08X\n", + ipu_ic_read(ic, IC_PRP_VF_RSC)); + dev_dbg(ipu->dev, "IC_PP_RSC = \t0x%08X\n", + ipu_ic_read(ic, IC_PP_RSC)); + dev_dbg(ipu->dev, "IC_CMBP_1 = \t0x%08X\n", + ipu_ic_read(ic, IC_CMBP_1)); + dev_dbg(ipu->dev, "IC_CMBP_2 = \t0x%08X\n", + ipu_ic_read(ic, IC_CMBP_2)); + dev_dbg(ipu->dev, "IC_IDMAC_1 = \t0x%08X\n", + ipu_ic_read(ic, IC_IDMAC_1)); + dev_dbg(ipu->dev, "IC_IDMAC_2 = \t0x%08X\n", + ipu_ic_read(ic, IC_IDMAC_2)); + dev_dbg(ipu->dev, "IC_IDMAC_3 = \t0x%08X\n", + ipu_ic_read(ic, IC_IDMAC_3)); + dev_dbg(ipu->dev, "IC_IDMAC_4 = \t0x%08X\n", + ipu_ic_read(ic, IC_IDMAC_4)); +} +EXPORT_SYMBOL_GPL(ipu_ic_dump); diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h index 9b274f1259e1..1596a4f52faf 100644 --- a/drivers/gpu/ipu-v3/ipu-prv.h +++ b/drivers/gpu/ipu-v3/ipu-prv.h @@ -161,6 +161,7 @@ struct ipu_csi; struct ipu_dc_priv; struct ipu_dmfc_priv; struct ipu_di; +struct ipu_ic_priv; struct ipu_smfc_priv; struct ipu_devtype; @@ -191,6 +192,7 @@ struct ipu_soc { struct ipu_dmfc_priv *dmfc_priv; struct ipu_di *di_priv[2]; struct ipu_csi *csi_priv[2]; + struct ipu_ic_priv *ic_priv; struct ipu_smfc_priv *smfc_priv; }; @@ -217,6 +219,10 @@ int ipu_csi_init(struct ipu_soc *ipu, struct device *dev, int id, unsigned long base, u32 module, struct clk *clk_ipu); void ipu_csi_exit(struct ipu_soc *ipu, int id); +int ipu_ic_init(struct ipu_soc *ipu, struct device *dev, + unsigned long base, unsigned long tpmem_base); +void ipu_ic_exit(struct ipu_soc *ipu); + int ipu_di_init(struct ipu_soc *ipu, struct device *dev, int id, unsigned long base, u32 module, struct clk *ipu_clk); void ipu_di_exit(struct ipu_soc *ipu, int id); diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 6d254275192b..a477814a03af 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -71,6 +71,20 @@ enum ipu_csi_dest { IPU_CSI_DEST_VDIC, /* to VDIC */ }; +/* + * Enumeration of IPU rotation modes + */ +enum ipu_rotate_mode { + IPU_ROTATE_NONE = 0, + IPU_ROTATE_VERT_FLIP, + IPU_ROTATE_HORIZ_FLIP, + IPU_ROTATE_180, + IPU_ROTATE_90_RIGHT, + IPU_ROTATE_90_RIGHT_VFLIP, + IPU_ROTATE_90_RIGHT_HFLIP, + IPU_ROTATE_90_LEFT, +}; + enum ipu_color_space { IPUV3_COLORSPACE_RGB, IPUV3_COLORSPACE_YUV, @@ -242,6 +256,37 @@ struct ipu_csi *ipu_csi_get(struct ipu_soc *ipu, int id); void ipu_csi_put(struct ipu_csi *csi); void ipu_csi_dump(struct ipu_csi *csi); +/* + * IPU Image Converter (ic) functions + */ +enum ipu_ic_task { + IC_TASK_ENCODER, + IC_TASK_VIEWFINDER, + IC_TASK_POST_PROCESSOR, + IC_NUM_TASKS, +}; + +struct ipu_ic; +int ipu_ic_task_init(struct ipu_ic *ic, + int in_width, int in_height, + int out_width, int out_height, + enum ipu_color_space in_cs, + enum ipu_color_space out_cs); +int ipu_ic_task_graphics_init(struct ipu_ic *ic, + enum ipu_color_space in_g_cs, + bool galpha_en, u32 galpha, + bool colorkey_en, u32 colorkey); +void ipu_ic_task_enable(struct ipu_ic *ic); +void ipu_ic_task_disable(struct ipu_ic *ic); +int ipu_ic_task_idma_init(struct ipu_ic *ic, struct ipuv3_channel *channel, + u32 width, u32 height, int burst_size, + enum ipu_rotate_mode rot); +int ipu_ic_enable(struct ipu_ic *ic); +int ipu_ic_disable(struct ipu_ic *ic); +struct ipu_ic *ipu_ic_get(struct ipu_soc *ipu, enum ipu_ic_task task); +void ipu_ic_put(struct ipu_ic *ic); +void ipu_ic_dump(struct ipu_ic *ic); + /* * IPU Sensor Multiple FIFO Controller (SMFC) functions */ From fc4353559e587f5962f22c24ca7e015bdbea1e49 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:33 -0700 Subject: [PATCH 216/640] gpu: ipu-v3: smfc: Move enable/disable to ipu-smfc.c Move the SMFC module enable/disable helpers into the ipu-smfc submodule. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 12 ------------ drivers/gpu/ipu-v3/ipu-smfc.c | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 312eef6ffcad..f5a4e1ac2b50 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -217,18 +217,6 @@ int ipu_module_disable(struct ipu_soc *ipu, u32 mask) } EXPORT_SYMBOL_GPL(ipu_module_disable); -int ipu_smfc_enable(struct ipu_soc *ipu) -{ - return ipu_module_enable(ipu, IPU_CONF_SMFC_EN); -} -EXPORT_SYMBOL_GPL(ipu_smfc_enable); - -int ipu_smfc_disable(struct ipu_soc *ipu) -{ - return ipu_module_disable(ipu, IPU_CONF_SMFC_EN); -} -EXPORT_SYMBOL_GPL(ipu_smfc_disable); - int ipu_idmac_get_current_buffer(struct ipuv3_channel *channel) { struct ipu_soc *ipu = channel->ipu; diff --git a/drivers/gpu/ipu-v3/ipu-smfc.c b/drivers/gpu/ipu-v3/ipu-smfc.c index e4f85ad286fc..87ac624dd7ca 100644 --- a/drivers/gpu/ipu-v3/ipu-smfc.c +++ b/drivers/gpu/ipu-v3/ipu-smfc.c @@ -71,6 +71,18 @@ int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_ } EXPORT_SYMBOL_GPL(ipu_smfc_map_channel); +int ipu_smfc_enable(struct ipu_soc *ipu) +{ + return ipu_module_enable(ipu, IPU_CONF_SMFC_EN); +} +EXPORT_SYMBOL_GPL(ipu_smfc_enable); + +int ipu_smfc_disable(struct ipu_soc *ipu) +{ + return ipu_module_disable(ipu, IPU_CONF_SMFC_EN); +} +EXPORT_SYMBOL_GPL(ipu_smfc_disable); + int ipu_smfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base) { From 7fafa8f06f9bdf32b806b4612bfe387de8e34125 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:34 -0700 Subject: [PATCH 217/640] gpu: ipu-v3: smfc: Convert to per-channel Convert the smfc object to be specific to a single smfc channel. Add ipu_smfc_{get|put} to retrieve and release a single smfc channel for exclusive use, and add use counter to ipu_smfc_{enable|disable}. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-smfc.c | 132 +++++++++++++++++++++++++++------- include/video/imx-ipu-v3.h | 10 +-- 2 files changed, 112 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-smfc.c b/drivers/gpu/ipu-v3/ipu-smfc.c index 87ac624dd7ca..a6429ca913c1 100644 --- a/drivers/gpu/ipu-v3/ipu-smfc.c +++ b/drivers/gpu/ipu-v3/ipu-smfc.c @@ -21,9 +21,18 @@ #include "ipu-prv.h" +struct ipu_smfc { + struct ipu_smfc_priv *priv; + int chno; + bool inuse; +}; + struct ipu_smfc_priv { void __iomem *base; spinlock_t lock; + struct ipu_soc *ipu; + struct ipu_smfc channel[4]; + int use_count; }; /*SMFC Registers */ @@ -31,75 +40,146 @@ struct ipu_smfc_priv { #define SMFC_WMC 0x0004 #define SMFC_BS 0x0008 -int ipu_smfc_set_burstsize(struct ipu_soc *ipu, int channel, int burstsize) +int ipu_smfc_set_burstsize(struct ipu_smfc *smfc, int burstsize) { - struct ipu_smfc_priv *smfc = ipu->smfc_priv; + struct ipu_smfc_priv *priv = smfc->priv; unsigned long flags; u32 val, shift; - spin_lock_irqsave(&smfc->lock, flags); + spin_lock_irqsave(&priv->lock, flags); - shift = channel * 4; - val = readl(smfc->base + SMFC_BS); + shift = smfc->chno * 4; + val = readl(priv->base + SMFC_BS); val &= ~(0xf << shift); val |= burstsize << shift; - writel(val, smfc->base + SMFC_BS); + writel(val, priv->base + SMFC_BS); - spin_unlock_irqrestore(&smfc->lock, flags); + spin_unlock_irqrestore(&priv->lock, flags); return 0; } EXPORT_SYMBOL_GPL(ipu_smfc_set_burstsize); -int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_id) +int ipu_smfc_map_channel(struct ipu_smfc *smfc, int csi_id, int mipi_id) { - struct ipu_smfc_priv *smfc = ipu->smfc_priv; + struct ipu_smfc_priv *priv = smfc->priv; unsigned long flags; u32 val, shift; - spin_lock_irqsave(&smfc->lock, flags); + spin_lock_irqsave(&priv->lock, flags); - shift = channel * 3; - val = readl(smfc->base + SMFC_MAP); + shift = smfc->chno * 3; + val = readl(priv->base + SMFC_MAP); val &= ~(0x7 << shift); val |= ((csi_id << 2) | mipi_id) << shift; - writel(val, smfc->base + SMFC_MAP); + writel(val, priv->base + SMFC_MAP); - spin_unlock_irqrestore(&smfc->lock, flags); + spin_unlock_irqrestore(&priv->lock, flags); return 0; } EXPORT_SYMBOL_GPL(ipu_smfc_map_channel); -int ipu_smfc_enable(struct ipu_soc *ipu) +int ipu_smfc_enable(struct ipu_smfc *smfc) { - return ipu_module_enable(ipu, IPU_CONF_SMFC_EN); + struct ipu_smfc_priv *priv = smfc->priv; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + if (!priv->use_count) + ipu_module_enable(priv->ipu, IPU_CONF_SMFC_EN); + + priv->use_count++; + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; } EXPORT_SYMBOL_GPL(ipu_smfc_enable); -int ipu_smfc_disable(struct ipu_soc *ipu) +int ipu_smfc_disable(struct ipu_smfc *smfc) { - return ipu_module_disable(ipu, IPU_CONF_SMFC_EN); + struct ipu_smfc_priv *priv = smfc->priv; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + priv->use_count--; + + if (!priv->use_count) + ipu_module_disable(priv->ipu, IPU_CONF_SMFC_EN); + + if (priv->use_count < 0) + priv->use_count = 0; + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; } EXPORT_SYMBOL_GPL(ipu_smfc_disable); +struct ipu_smfc *ipu_smfc_get(struct ipu_soc *ipu, unsigned int chno) +{ + struct ipu_smfc_priv *priv = ipu->smfc_priv; + struct ipu_smfc *smfc, *ret; + unsigned long flags; + + if (chno >= 4) + return ERR_PTR(-EINVAL); + + smfc = &priv->channel[chno]; + ret = smfc; + + spin_lock_irqsave(&priv->lock, flags); + + if (smfc->inuse) { + ret = ERR_PTR(-EBUSY); + goto unlock; + } + + smfc->inuse = true; +unlock: + spin_unlock_irqrestore(&priv->lock, flags); + return ret; +} +EXPORT_SYMBOL_GPL(ipu_smfc_get); + +void ipu_smfc_put(struct ipu_smfc *smfc) +{ + struct ipu_smfc_priv *priv = smfc->priv; + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + smfc->inuse = false; + spin_unlock_irqrestore(&priv->lock, flags); +} +EXPORT_SYMBOL_GPL(ipu_smfc_put); + int ipu_smfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base) { - struct ipu_smfc_priv *smfc; + struct ipu_smfc_priv *priv; + int i; - smfc = devm_kzalloc(dev, sizeof(*smfc), GFP_KERNEL); - if (!smfc) + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) return -ENOMEM; - ipu->smfc_priv = smfc; - spin_lock_init(&smfc->lock); + ipu->smfc_priv = priv; + spin_lock_init(&priv->lock); + priv->ipu = ipu; - smfc->base = devm_ioremap(dev, base, PAGE_SIZE); - if (!smfc->base) + priv->base = devm_ioremap(dev, base, PAGE_SIZE); + if (!priv->base) return -ENOMEM; - pr_debug("%s: ioremap 0x%08lx -> %p\n", __func__, base, smfc->base); + for (i = 0; i < 4; i++) { + priv->channel[i].priv = priv; + priv->channel[i].chno = i; + } + + pr_debug("%s: ioremap 0x%08lx -> %p\n", __func__, base, priv->base); return 0; } diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index a477814a03af..a695ee83e4e1 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -290,10 +290,12 @@ void ipu_ic_dump(struct ipu_ic *ic); /* * IPU Sensor Multiple FIFO Controller (SMFC) functions */ -int ipu_smfc_enable(struct ipu_soc *ipu); -int ipu_smfc_disable(struct ipu_soc *ipu); -int ipu_smfc_map_channel(struct ipu_soc *ipu, int channel, int csi_id, int mipi_id); -int ipu_smfc_set_burstsize(struct ipu_soc *ipu, int channel, int burstsize); +struct ipu_smfc *ipu_smfc_get(struct ipu_soc *ipu, unsigned int chno); +void ipu_smfc_put(struct ipu_smfc *smfc); +int ipu_smfc_enable(struct ipu_smfc *smfc); +int ipu_smfc_disable(struct ipu_smfc *smfc); +int ipu_smfc_map_channel(struct ipu_smfc *smfc, int csi_id, int mipi_id); +int ipu_smfc_set_burstsize(struct ipu_smfc *smfc, int burstsize); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); From a2be35e3320b27c84488729e9fb56a62e74d65fa Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:35 -0700 Subject: [PATCH 218/640] gpu: ipu-v3: smfc: Add ipu_smfc_set_watermark() Adds ipu_smfc_set_watermark() which programs a channel's SMFC FIFO levels at which the watermark signal is set and cleared. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-smfc.c | 20 ++++++++++++++++++++ include/video/imx-ipu-v3.h | 1 + 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-smfc.c b/drivers/gpu/ipu-v3/ipu-smfc.c index a6429ca913c1..6ca9b43ce25a 100644 --- a/drivers/gpu/ipu-v3/ipu-smfc.c +++ b/drivers/gpu/ipu-v3/ipu-smfc.c @@ -80,6 +80,26 @@ int ipu_smfc_map_channel(struct ipu_smfc *smfc, int csi_id, int mipi_id) } EXPORT_SYMBOL_GPL(ipu_smfc_map_channel); +int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level) +{ + struct ipu_smfc_priv *priv = smfc->priv; + unsigned long flags; + u32 val, shift; + + spin_lock_irqsave(&priv->lock, flags); + + shift = smfc->chno * 6 + (smfc->chno > 1 ? 4 : 0); + val = readl(priv->base + SMFC_WMC); + val &= ~(0x3f << shift); + val |= ((clr_level << 3) | set_level) << shift; + writel(val, priv->base + SMFC_WMC); + + spin_unlock_irqrestore(&priv->lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_smfc_set_watermark); + int ipu_smfc_enable(struct ipu_smfc *smfc) { struct ipu_smfc_priv *priv = smfc->priv; diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index a695ee83e4e1..49e5954ac033 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -296,6 +296,7 @@ int ipu_smfc_enable(struct ipu_smfc *smfc); int ipu_smfc_disable(struct ipu_smfc *smfc); int ipu_smfc_map_channel(struct ipu_smfc *smfc, int csi_id, int mipi_id); int ipu_smfc_set_burstsize(struct ipu_smfc *smfc, int burstsize); +int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); From ae0e9708b30b3eebe5a58e4d055eb49a73d641dd Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:36 -0700 Subject: [PATCH 219/640] gpu: ipu-v3: Add ipu_mbus_code_to_colorspace() Add ipu_mbus_code_to_colorspace() to find ipu_color_space from a media bus pixel format code. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 13 +++++++++++++ include/video/imx-ipu-v3.h | 1 + 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index f5a4e1ac2b50..49ee990b4d1f 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -101,6 +101,19 @@ enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat) } EXPORT_SYMBOL_GPL(ipu_pixelformat_to_colorspace); +enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code) +{ + switch (mbus_code & 0xf000) { + case 0x1000: + return IPUV3_COLORSPACE_RGB; + case 0x2000: + return IPUV3_COLORSPACE_YUV; + default: + return IPUV3_COLORSPACE_UNKNOWN; + } +} +EXPORT_SYMBOL_GPL(ipu_mbus_code_to_colorspace); + struct ipuv3_channel *ipu_idmac_get(struct ipu_soc *ipu, unsigned num) { struct ipuv3_channel *channel; diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 49e5954ac033..7c97ccaf39f6 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -300,6 +300,7 @@ int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); +enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code); struct ipu_client_platformdata { int csi; From f835f386a119c3f78f5acb93e86a4f025211739a Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:37 -0700 Subject: [PATCH 220/640] gpu: ipu-v3: Add rotation mode conversion utilities Add two functions: - ipu_degrees_to_rot_mode(): converts a degrees, hflip, and vflip setting to an IPU rotation mode. - ipu_rot_mode_to_degrees(): converts an IPU rotation mode with given hflip and vflip settings to degrees. Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 64 +++++++++++++++++++++++++++++++++ include/video/imx-ipu-v3.h | 4 +++ 2 files changed, 68 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 49ee990b4d1f..a1d42eed5d06 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -114,6 +114,70 @@ enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code) } EXPORT_SYMBOL_GPL(ipu_mbus_code_to_colorspace); +int ipu_degrees_to_rot_mode(enum ipu_rotate_mode *mode, int degrees, + bool hflip, bool vflip) +{ + u32 r90, vf, hf; + + switch (degrees) { + case 0: + vf = hf = r90 = 0; + break; + case 90: + vf = hf = 0; + r90 = 1; + break; + case 180: + vf = hf = 1; + r90 = 0; + break; + case 270: + vf = hf = r90 = 1; + break; + default: + return -EINVAL; + } + + hf ^= (u32)hflip; + vf ^= (u32)vflip; + + *mode = (enum ipu_rotate_mode)((r90 << 2) | (hf << 1) | vf); + return 0; +} +EXPORT_SYMBOL_GPL(ipu_degrees_to_rot_mode); + +int ipu_rot_mode_to_degrees(int *degrees, enum ipu_rotate_mode mode, + bool hflip, bool vflip) +{ + u32 r90, vf, hf; + + r90 = ((u32)mode >> 2) & 0x1; + hf = ((u32)mode >> 1) & 0x1; + vf = ((u32)mode >> 0) & 0x1; + hf ^= (u32)hflip; + vf ^= (u32)vflip; + + switch ((enum ipu_rotate_mode)((r90 << 2) | (hf << 1) | vf)) { + case IPU_ROTATE_NONE: + *degrees = 0; + break; + case IPU_ROTATE_90_RIGHT: + *degrees = 90; + break; + case IPU_ROTATE_180: + *degrees = 180; + break; + case IPU_ROTATE_90_LEFT: + *degrees = 270; + break; + default: + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(ipu_rot_mode_to_degrees); + struct ipuv3_channel *ipu_idmac_get(struct ipu_soc *ipu, unsigned num) { struct ipuv3_channel *channel; diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 7c97ccaf39f6..3562698528bd 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -301,6 +301,10 @@ int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code); +int ipu_degrees_to_rot_mode(enum ipu_rotate_mode *mode, int degrees, + bool hflip, bool vflip); +int ipu_rot_mode_to_degrees(int *degrees, enum ipu_rotate_mode mode, + bool hflip, bool vflip); struct ipu_client_platformdata { int csi; From 4cea940d34319fb5d5e2f4d554e23f766c228e90 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:38 -0700 Subject: [PATCH 221/640] gpu: ipu-v3: Add helper function checking if pixfmt is planar Add simple helper function returning true if passed pixel format is one of supported planar ones. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-common.c | 12 ++++++++++++ include/video/imx-ipu-v3.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index a1d42eed5d06..18563c240f10 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -101,6 +101,18 @@ enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat) } EXPORT_SYMBOL_GPL(ipu_pixelformat_to_colorspace); +bool ipu_pixelformat_is_planar(u32 pixelformat) +{ + switch (pixelformat) { + case V4L2_PIX_FMT_YUV420: + case V4L2_PIX_FMT_YVU420: + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(ipu_pixelformat_is_planar); + enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code) { switch (mbus_code & 0xf000) { diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 3562698528bd..ecb01f843aee 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -301,6 +301,7 @@ int ipu_smfc_set_watermark(struct ipu_smfc *smfc, u32 set_level, u32 clr_level); enum ipu_color_space ipu_drm_fourcc_to_colorspace(u32 drm_fourcc); enum ipu_color_space ipu_pixelformat_to_colorspace(u32 pixelformat); enum ipu_color_space ipu_mbus_code_to_colorspace(u32 mbus_code); +bool ipu_pixelformat_is_planar(u32 pixelformat); int ipu_degrees_to_rot_mode(enum ipu_rotate_mode *mode, int degrees, bool hflip, bool vflip); int ipu_rot_mode_to_degrees(int *degrees, enum ipu_rotate_mode mode, From a4cd8f229ff71db0c95c0d96381d4fb9239fdb19 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 25 Jun 2014 18:05:39 -0700 Subject: [PATCH 222/640] gpu: ipu-v3: Move IDMAC channel names to imx-ipu-v3.h Move the IDMAC channel names to imx-ipu-v3.h, to make the names available outside IPU. Add a couple new channels in the process (async display BG/FG, channels 24 and 29). Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-prv.h | 25 ------------------------- include/video/imx-ipu-v3.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/ipu-v3/ipu-prv.h b/drivers/gpu/ipu-v3/ipu-prv.h index 1596a4f52faf..7f08a461c929 100644 --- a/drivers/gpu/ipu-v3/ipu-prv.h +++ b/drivers/gpu/ipu-v3/ipu-prv.h @@ -24,31 +24,6 @@ struct ipu_soc; #include