From 6911a9b8ae8b2a1dab4dfda9c2bd20f7ca2961d6 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Thu, 2 Apr 2009 11:24:54 -0700 Subject: [PATCH 1/8] drm/i915: Implement batch and ring buffer dumping We create a debugfs node (i915_ringbuffer_data) to expose a hex dump of the ring buffer itself. We also expose another debugfs node (i915_ringbuffer_info) with information on the state (i.e. head, tail addresses) of the ringbuffer. For batchbuffer dumping, we look at the device's active_list, dumping each object which has I915_GEM_DOMAIN_COMMAND in its read domains. This is all exposed through the dri/i915_batchbuffers debugfs file with a header for each object (giving the objects gtt_offset so that it can be matched against the offset given in the BATCH_BUFFER_START command. Signed-off-by: Ben Gamari Signed-off-by: Carl Worth Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 8 +-- drivers/gpu/drm/i915/i915_gem_debugfs.c | 93 +++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 317b1223e091..efcd610d4fca 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -635,6 +635,8 @@ int i915_gem_attach_phys_object(struct drm_device *dev, void i915_gem_detach_phys_object(struct drm_device *dev, struct drm_gem_object *obj); void i915_gem_free_all_phys_object(struct drm_device *dev); +int i915_gem_object_get_pages(struct drm_gem_object *obj); +void i915_gem_object_put_pages(struct drm_gem_object *obj); /* i915_gem_tiling.c */ void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1449b452cc63..33ab07b0d712 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -43,8 +43,6 @@ static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, uint64_t offset, uint64_t size); static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); -static int i915_gem_object_get_pages(struct drm_gem_object *obj); -static void i915_gem_object_put_pages(struct drm_gem_object *obj); static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment); @@ -1285,7 +1283,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, return 0; } -static void +void i915_gem_object_put_pages(struct drm_gem_object *obj) { struct drm_i915_gem_object *obj_priv = obj->driver_private; @@ -1884,7 +1882,7 @@ i915_gem_evict_everything(struct drm_device *dev) return ret; } -static int +int i915_gem_object_get_pages(struct drm_gem_object *obj) { struct drm_i915_gem_object *obj_priv = obj->driver_private; @@ -3243,7 +3241,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec_offset = exec_list[args->buffer_count - 1].offset; #if WATCH_EXEC - i915_gem_dump_object(object_list[args->buffer_count - 1], + i915_gem_dump_object(batch_obj, args->batch_len, __func__, ~0); diff --git a/drivers/gpu/drm/i915/i915_gem_debugfs.c b/drivers/gpu/drm/i915/i915_gem_debugfs.c index a1ac0c5e7307..986f1082c596 100644 --- a/drivers/gpu/drm/i915/i915_gem_debugfs.c +++ b/drivers/gpu/drm/i915/i915_gem_debugfs.c @@ -234,6 +234,96 @@ static int i915_hws_info(struct seq_file *m, void *data) return 0; } +static void i915_dump_pages(struct seq_file *m, struct page **pages, int page_count) +{ + int page, i; + uint32_t *mem; + + for (page = 0; page < page_count; page++) { + mem = kmap(pages[page]); + for (i = 0; i < PAGE_SIZE; i += 4) + seq_printf(m, "%08x : %08x\n", i, mem[i / 4]); + kunmap(pages[page]); + } +} + +static int i915_batchbuffer_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_gem_object *obj; + struct drm_i915_gem_object *obj_priv; + int ret; + + spin_lock(&dev_priv->mm.active_list_lock); + + list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) { + obj = obj_priv->obj; + if (obj->read_domains & I915_GEM_DOMAIN_COMMAND) { + ret = i915_gem_object_get_pages(obj); + if (ret) { + DRM_ERROR("Failed to get pages: %d\n", ret); + spin_unlock(&dev_priv->mm.active_list_lock); + return ret; + } + + seq_printf(m, "--- gtt_offset = 0x%08x\n", obj_priv->gtt_offset); + i915_dump_pages(m, obj_priv->pages, obj->size / PAGE_SIZE); + + i915_gem_object_put_pages(obj); + } + } + + spin_unlock(&dev_priv->mm.active_list_lock); + + return 0; +} + +static int i915_ringbuffer_data(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + u8 *virt; + uint32_t *ptr, off; + + if (!dev_priv->ring.ring_obj) { + seq_printf(m, "No ringbuffer setup\n"); + return 0; + } + + virt = dev_priv->ring.virtual_start; + + for (off = 0; off < dev_priv->ring.Size; off += 4) { + ptr = (uint32_t *)(virt + off); + seq_printf(m, "%08x : %08x\n", off, *ptr); + } + + return 0; +} + +static int i915_ringbuffer_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + unsigned int head, tail, mask; + + head = I915_READ(PRB0_HEAD) & HEAD_ADDR; + tail = I915_READ(PRB0_TAIL) & TAIL_ADDR; + mask = dev_priv->ring.tail_mask; + + seq_printf(m, "RingHead : %08x\n", head); + seq_printf(m, "RingTail : %08x\n", tail); + seq_printf(m, "RingMask : %08x\n", mask); + seq_printf(m, "RingSize : %08lx\n", dev_priv->ring.Size); + seq_printf(m, "Acthd : %08x\n", I915_READ(IS_I965G(dev) ? ACTHD_I965 : ACTHD)); + + return 0; +} + + static struct drm_info_list i915_gem_debugfs_list[] = { {"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST}, {"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST}, @@ -243,6 +333,9 @@ static struct drm_info_list i915_gem_debugfs_list[] = { {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0}, {"i915_gem_interrupt", i915_interrupt_info, 0}, {"i915_gem_hws", i915_hws_info, 0}, + {"i915_ringbuffer_data", i915_ringbuffer_data, 0}, + {"i915_ringbuffer_info", i915_ringbuffer_info, 0}, + {"i915_batchbuffers", i915_batchbuffer_info, 0}, }; #define I915_GEM_DEBUGFS_ENTRIES ARRAY_SIZE(i915_gem_debugfs_list) From 6115707be0e85a9b825f10e95143cb705b87fef8 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 3 Apr 2009 15:24:43 +0800 Subject: [PATCH 2/8] drm/i915: Fix a mismerge of the IGD patch (new .find_pll hooks missed) Signed-off-by: Shaohua Li Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 64773ce52964..c2c8e95ff14d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -367,6 +367,7 @@ static const intel_limit_t intel_limits[] = { .p1 = { .min = I9XX_P1_MIN, .max = I9XX_P1_MAX }, .p2 = { .dot_limit = I9XX_P2_SDVO_DAC_SLOW_LIMIT, .p2_slow = I9XX_P2_SDVO_DAC_SLOW, .p2_fast = I9XX_P2_SDVO_DAC_FAST }, + .find_pll = intel_find_best_PLL, }, { /* INTEL_LIMIT_IGD_LVDS */ .dot = { .min = I9XX_DOT_MIN, .max = I9XX_DOT_MAX }, @@ -380,6 +381,7 @@ static const intel_limit_t intel_limits[] = { /* IGD only supports single-channel mode. */ .p2 = { .dot_limit = I9XX_P2_LVDS_SLOW_LIMIT, .p2_slow = I9XX_P2_LVDS_SLOW, .p2_fast = I9XX_P2_LVDS_SLOW }, + .find_pll = intel_find_best_PLL, }, }; From 9dff6af860d6b7f661d4360eb859837afaca0a1b Mon Sep 17 00:00:00 2001 From: Ma Ling Date: Thu, 2 Apr 2009 13:13:26 +0800 Subject: [PATCH 3/8] drm/i915: sync hdmi detection by hdmi identifier with 2D Currently we detect HDMI monitor by hardware detection, but if an HDMI-DVI adapter is used to connect a DVI monitor, hardware detection will incorrectly take monitor as HDMI. HDMI spec says any device containing IEEE registration identifier will be treated as HDMI device. The patch intends to detect HDMI monitor by drm_detect_hdmi_monitor function which follows that rule. Signed-off-by: Ma Ling Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_hdmi.c | 23 ++++++++++++++++++++--- drivers/gpu/drm/i915/intel_sdvo.c | 22 ++++++++++++++++++++-- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index b06a4a3ff08d..550374225388 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -38,7 +38,7 @@ struct intel_hdmi_priv { u32 sdvox_reg; u32 save_SDVOX; - int has_hdmi_sink; + bool has_hdmi_sink; }; static void intel_hdmi_mode_set(struct drm_encoder *encoder, @@ -128,6 +128,22 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder, return true; } +static void +intel_hdmi_sink_detect(struct drm_connector *connector) +{ + struct intel_output *intel_output = to_intel_output(connector); + struct intel_hdmi_priv *hdmi_priv = intel_output->dev_priv; + struct edid *edid = NULL; + + edid = drm_get_edid(&intel_output->base, + &intel_output->ddc_bus->adapter); + if (edid != NULL) { + hdmi_priv->has_hdmi_sink = drm_detect_hdmi_monitor(edid); + kfree(edid); + intel_output->base.display_info.raw_edid = NULL; + } +} + static enum drm_connector_status intel_hdmi_detect(struct drm_connector *connector) { @@ -158,9 +174,10 @@ intel_hdmi_detect(struct drm_connector *connector) return connector_status_unknown; } - if ((I915_READ(PORT_HOTPLUG_STAT) & bit) != 0) + if ((I915_READ(PORT_HOTPLUG_STAT) & bit) != 0) { + intel_hdmi_sink_detect(connector); return connector_status_connected; - else + } else return connector_status_disconnected; } diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 7b31f55f55c8..9913651c1e17 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1357,6 +1357,23 @@ void intel_sdvo_set_hotplug(struct drm_connector *connector, int on) intel_sdvo_read_response(intel_output, &response, 2); } +static void +intel_sdvo_hdmi_sink_detect(struct drm_connector *connector) +{ + struct intel_output *intel_output = to_intel_output(connector); + struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv; + struct edid *edid = NULL; + + intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus); + edid = drm_get_edid(&intel_output->base, + &intel_output->ddc_bus->adapter); + if (edid != NULL) { + sdvo_priv->is_hdmi = drm_detect_hdmi_monitor(edid); + kfree(edid); + intel_output->base.display_info.raw_edid = NULL; + } +} + static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector) { u8 response[2]; @@ -1371,9 +1388,10 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect if (status != SDVO_CMD_STATUS_SUCCESS) return connector_status_unknown; - if ((response[0] != 0) || (response[1] != 0)) + if ((response[0] != 0) || (response[1] != 0)) { + intel_sdvo_hdmi_sink_detect(connector); return connector_status_connected; - else + } else return connector_status_disconnected; } From 5b40f871158da7aaccff442645dae8b97c2e4d50 Mon Sep 17 00:00:00 2001 From: Ferenc Wagner Date: Mon, 6 Apr 2009 14:55:09 +0200 Subject: [PATCH 4/8] drm/i915: indicate framebuffer restore key in SysRq help message At the same time, bring the action message closer to the usual format. Signed-off-by: Ferenc Wagner Acked-by: Jesse Barnes Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/intel_fb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index b7f0ebe9f810..3e094beecb99 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -864,8 +864,8 @@ static void intelfb_sysrq(int dummy1, struct tty_struct *dummy3) static struct sysrq_key_op sysrq_intelfb_restore_op = { .handler = intelfb_sysrq, - .help_msg = "force fb", - .action_msg = "force restore of fb console", + .help_msg = "force-fb(G)", + .action_msg = "Restore framebuffer console", }; int intelfb_probe(struct drm_device *dev) From 2bc43b5cf5158a26fa1328234795abed2dff5275 Mon Sep 17 00:00:00 2001 From: Florian Mickler Date: Mon, 6 Apr 2009 22:55:41 +0200 Subject: [PATCH 5/8] drm/i915: Fix use of uninitialized var in 40a5f0de i915_gem_put_relocs_to_user returned an uninitialized value which got returned to userspace. This caused libdrm in my setup to never get out of a do{}while() loop retrying i915_gem_execbuffer. result was hanging X, overheating of cpu and 2-3gb of logfile-spam. This patch adresses the issue by 1. initializing vars in this file where necessary 2. correcting wrongly interpreted return values of copy_[from/to]_user Signed-off-by: Florian Mickler [anholt: cleanups of unnecessary changes, consistency in APIs] Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 34 +++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 33ab07b0d712..6f7d0e27036f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -141,15 +141,18 @@ fast_shmem_read(struct page **pages, int length) { char __iomem *vaddr; - int ret; + int unwritten; vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0); if (vaddr == NULL) return -ENOMEM; - ret = __copy_to_user_inatomic(data, vaddr + page_offset, length); + unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length); kunmap_atomic(vaddr, KM_USER0); - return ret; + if (unwritten) + return -EFAULT; + + return 0; } static inline int @@ -3000,13 +3003,13 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, drm_free(*relocs, reloc_count * sizeof(**relocs), DRM_MEM_DRIVER); *relocs = NULL; - return ret; + return -EFAULT; } reloc_index += exec_list[i].relocation_count; } - return ret; + return 0; } static int @@ -3015,23 +3018,28 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, struct drm_i915_gem_relocation_entry *relocs) { uint32_t reloc_count = 0, i; - int ret; + int ret = 0; for (i = 0; i < buffer_count; i++) { struct drm_i915_gem_relocation_entry __user *user_relocs; + int unwritten; user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr; - if (ret == 0) { - ret = copy_to_user(user_relocs, - &relocs[reloc_count], - exec_list[i].relocation_count * - sizeof(*relocs)); + unwritten = copy_to_user(user_relocs, + &relocs[reloc_count], + exec_list[i].relocation_count * + sizeof(*relocs)); + + if (unwritten) { + ret = -EFAULT; + goto err; } reloc_count += exec_list[i].relocation_count; } +err: drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER); return ret; @@ -3306,10 +3314,12 @@ err: (uintptr_t) args->buffers_ptr, exec_list, sizeof(*exec_list) * args->buffer_count); - if (ret) + if (ret) { + ret = -EFAULT; DRM_ERROR("failed to copy %d exec entries " "back to user (%d)\n", args->buffer_count, ret); + } } /* Copy the updated relocations out regardless of current error From e5e9ecde63ba365b510df0f4a9cb3b048a0ad785 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 7 Apr 2009 16:01:22 -0700 Subject: [PATCH 6/8] drm/i915: Correctly set the write flag for get_user_pages in pread. Otherwise, the results of our read didn't show up when we were faulting in the page being read into (as happened with a testcase reading into a big stack area). Likely accounts for some conformance test failures. Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6f7d0e27036f..3a1189d94a9a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -287,7 +287,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, down_read(&mm->mmap_sem); pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, - num_pages, 0, 0, user_pages, NULL); + num_pages, 1, 0, user_pages, NULL); up_read(&mm->mmap_sem); if (pinned_pages < num_pages) { ret = -EFAULT; From 280b713b5b0fd84cf2469098aee88acbb5de859c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 12 Mar 2009 16:56:27 -0700 Subject: [PATCH 7/8] drm/i915: Allow tiling of objects with bit 17 swizzling by the CPU. Save the bit 17 state of the pages when freeing the page list, and reswizzle them if necessary when rebinding the pages (in case they were swapped out). Since we have userland with expectations that the swizzle enums let it pread and pwrite contents accurately, we can't expose a new swizzle enum for bit 17 (which it would have to GTT map to handle), so we handle it down in pread and pwrite by swizzling the copy when bit 17 of the page address is set. Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.h | 5 + drivers/gpu/drm/i915/i915_gem.c | 130 ++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_tiling.c | 111 ++++++++++++++++++++- include/drm/i915_drm.h | 3 + 4 files changed, 235 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index efcd610d4fca..bccd4146d55c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -446,6 +446,9 @@ struct drm_i915_gem_object { uint32_t tiling_mode; uint32_t stride; + /** Record of address bit 17 of each page at last unbind. */ + long *bit_17; + /** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */ uint32_t agp_type; @@ -640,6 +643,8 @@ void i915_gem_object_put_pages(struct drm_gem_object *obj); /* i915_gem_tiling.c */ void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); +void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj); +void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj); /* i915_gem_debug.c */ void i915_gem_dump_object(struct drm_gem_object *obj, int len, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3a1189d94a9a..6dca9fc7c1db 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -155,6 +155,15 @@ fast_shmem_read(struct page **pages, return 0; } +static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj) +{ + drm_i915_private_t *dev_priv = obj->dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + + return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && + obj_priv->tiling_mode != I915_TILING_NONE; +} + static inline int slow_shmem_copy(struct page *dst_page, int dst_offset, @@ -182,6 +191,64 @@ slow_shmem_copy(struct page *dst_page, return 0; } +static inline int +slow_shmem_bit17_copy(struct page *gpu_page, + int gpu_offset, + struct page *cpu_page, + int cpu_offset, + int length, + int is_read) +{ + char *gpu_vaddr, *cpu_vaddr; + + /* Use the unswizzled path if this page isn't affected. */ + if ((page_to_phys(gpu_page) & (1 << 17)) == 0) { + if (is_read) + return slow_shmem_copy(cpu_page, cpu_offset, + gpu_page, gpu_offset, length); + else + return slow_shmem_copy(gpu_page, gpu_offset, + cpu_page, cpu_offset, length); + } + + gpu_vaddr = kmap_atomic(gpu_page, KM_USER0); + if (gpu_vaddr == NULL) + return -ENOMEM; + + cpu_vaddr = kmap_atomic(cpu_page, KM_USER1); + if (cpu_vaddr == NULL) { + kunmap_atomic(gpu_vaddr, KM_USER0); + return -ENOMEM; + } + + /* Copy the data, XORing A6 with A17 (1). The user already knows he's + * XORing with the other bits (A9 for Y, A9 and A10 for X) + */ + while (length > 0) { + int cacheline_end = ALIGN(gpu_offset + 1, 64); + int this_length = min(cacheline_end - gpu_offset, length); + int swizzled_gpu_offset = gpu_offset ^ 64; + + if (is_read) { + memcpy(cpu_vaddr + cpu_offset, + gpu_vaddr + swizzled_gpu_offset, + this_length); + } else { + memcpy(gpu_vaddr + swizzled_gpu_offset, + cpu_vaddr + cpu_offset, + this_length); + } + cpu_offset += this_length; + gpu_offset += this_length; + length -= this_length; + } + + kunmap_atomic(cpu_vaddr, KM_USER1); + kunmap_atomic(gpu_vaddr, KM_USER0); + + return 0; +} + /** * This is the fast shmem pread path, which attempts to copy_from_user directly * from the backing pages of the object to the user's address space. On a @@ -270,6 +337,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, int page_length; int ret; uint64_t data_ptr = args->data_ptr; + int do_bit17_swizzling; remain = args->size; @@ -294,6 +362,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, goto fail_put_user_pages; } + do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + mutex_lock(&dev->struct_mutex); ret = i915_gem_object_get_pages(obj); @@ -328,11 +398,20 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, if ((data_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - data_page_offset; - ret = slow_shmem_copy(user_pages[data_page_index], - data_page_offset, - obj_priv->pages[shmem_page_index], - shmem_page_offset, - page_length); + if (do_bit17_swizzling) { + ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length, + 1); + } else { + ret = slow_shmem_copy(user_pages[data_page_index], + data_page_offset, + obj_priv->pages[shmem_page_index], + shmem_page_offset, + page_length); + } if (ret) goto fail_put_pages; @@ -384,9 +463,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); - if (ret != 0) + if (i915_gem_object_needs_bit17_swizzle(obj)) { ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv); + } else { + ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv); + if (ret != 0) + ret = i915_gem_shmem_pread_slow(dev, obj, args, + file_priv); + } drm_gem_object_unreference(obj); @@ -728,6 +812,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, int page_length; int ret; uint64_t data_ptr = args->data_ptr; + int do_bit17_swizzling; remain = args->size; @@ -752,6 +837,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, goto fail_put_user_pages; } + do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + mutex_lock(&dev->struct_mutex); ret = i915_gem_object_get_pages(obj); @@ -786,11 +873,20 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, if ((data_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - data_page_offset; - ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], - shmem_page_offset, - user_pages[data_page_index], - data_page_offset, - page_length); + if (do_bit17_swizzling) { + ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length, + 0); + } else { + ret = slow_shmem_copy(obj_priv->pages[shmem_page_index], + shmem_page_offset, + user_pages[data_page_index], + data_page_offset, + page_length); + } if (ret) goto fail_put_pages; @@ -855,6 +951,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file_priv); } + } else if (i915_gem_object_needs_bit17_swizzle(obj)) { + ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv); } else { ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv); if (ret == -EFAULT) { @@ -1298,6 +1396,9 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) if (--obj_priv->pages_refcount != 0) return; + if (obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_save_bit_17_swizzle(obj); + for (i = 0; i < page_count; i++) if (obj_priv->pages[i] != NULL) { if (obj_priv->dirty) @@ -1923,6 +2024,10 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) } obj_priv->pages[i] = page; } + + if (obj_priv->tiling_mode != I915_TILING_NONE) + i915_gem_object_do_bit_17_swizzle(obj); + return 0; } @@ -3601,6 +3706,7 @@ void i915_gem_free_object(struct drm_gem_object *obj) i915_gem_free_mmap_offset(obj); drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER); + kfree(obj_priv->bit_17); drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 6be3f927c86a..f27e523c764f 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -25,6 +25,8 @@ * */ +#include "linux/string.h" +#include "linux/bitops.h" #include "drmP.h" #include "drm.h" #include "i915_drm.h" @@ -127,8 +129,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) swizzle_y = I915_BIT_6_SWIZZLE_9_11; } else { /* Bit 17 swizzling by the CPU in addition. */ - swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; - swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; + swizzle_y = I915_BIT_6_SWIZZLE_9_17; } break; } @@ -288,6 +290,19 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; else args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + + /* Hide bit 17 swizzling from the user. This prevents old Mesa + * from aborting the application on sw fallbacks to bit 17, + * and we use the pread/pwrite bit17 paths to swizzle for it. + * If there was a user that was relying on the swizzle + * information for drm_intel_bo_map()ed reads/writes this would + * break it, but we don't have any of those. + */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; + /* If we can't handle the swizzling, make it untiled. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { args->tiling_mode = I915_TILING_NONE; @@ -354,8 +369,100 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, DRM_ERROR("unknown tiling mode\n"); } + /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9; + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) + args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; + drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); return 0; } + +/** + * Swap every 64 bytes of this page around, to account for it having a new + * bit 17 of its physical address and therefore being interpreted differently + * by the GPU. + */ +static int +i915_gem_swizzle_page(struct page *page) +{ + char *vaddr; + int i; + char temp[64]; + + vaddr = kmap(page); + if (vaddr == NULL) + return -ENOMEM; + + for (i = 0; i < PAGE_SIZE; i += 128) { + memcpy(temp, &vaddr[i], 64); + memcpy(&vaddr[i], &vaddr[i + 64], 64); + memcpy(&vaddr[i + 64], temp, 64); + } + + kunmap(page); + + return 0; +} + +void +i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + int page_count = obj->size >> PAGE_SHIFT; + int i; + + if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17) + return; + + if (obj_priv->bit_17 == NULL) + return; + + for (i = 0; i < page_count; i++) { + char new_bit_17 = page_to_phys(obj_priv->pages[i]) >> 17; + if ((new_bit_17 & 0x1) != + (test_bit(i, obj_priv->bit_17) != 0)) { + int ret = i915_gem_swizzle_page(obj_priv->pages[i]); + if (ret != 0) { + DRM_ERROR("Failed to swizzle page\n"); + return; + } + set_page_dirty(obj_priv->pages[i]); + } + } +} + +void +i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj) +{ + struct drm_device *dev = obj->dev; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + int page_count = obj->size >> PAGE_SHIFT; + int i; + + if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17) + return; + + if (obj_priv->bit_17 == NULL) { + obj_priv->bit_17 = kmalloc(BITS_TO_LONGS(page_count) * + sizeof(long), GFP_KERNEL); + if (obj_priv->bit_17 == NULL) { + DRM_ERROR("Failed to allocate memory for bit 17 " + "record\n"); + return; + } + } + + for (i = 0; i < page_count; i++) { + if (page_to_phys(obj_priv->pages[i]) & (1 << 17)) + __set_bit(i, obj_priv->bit_17); + else + __clear_bit(i, obj_priv->bit_17); + } +} diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 67e3353a56d6..95962fa8398a 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -594,6 +594,9 @@ struct drm_i915_gem_busy { #define I915_BIT_6_SWIZZLE_9_10_11 4 /* Not seen by userland */ #define I915_BIT_6_SWIZZLE_UNKNOWN 5 +/* Seen by userland. */ +#define I915_BIT_6_SWIZZLE_9_17 6 +#define I915_BIT_6_SWIZZLE_9_10_17 7 struct drm_i915_gem_set_tiling { /** Handle of the buffer to have its tiling state updated */ From 68c84342171034120c8a1f6dfb8ef51b14250f11 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 8 Apr 2009 10:58:23 +0800 Subject: [PATCH 8/8] drm/i915: fix scheduling while holding the new active list spinlock regression caused by commit 5e118f4139feafe97e913df67b1f7c1e5083e535: i915_gem_object_move_to_inactive() should be called in task context, as it calls fput(); Signed-off-by: Shaohua Li [anholt: Add more detail to the comment about the lock break that's added] Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6dca9fc7c1db..4642115902d6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1596,8 +1596,19 @@ i915_gem_retire_request(struct drm_device *dev, if (obj->write_domain != 0) i915_gem_object_move_to_flushing(obj); - else + else { + /* Take a reference on the object so it won't be + * freed while the spinlock is held. The list + * protection for this spinlock is safe when breaking + * the lock like this since the next thing we do + * is just get the head of the list again. + */ + drm_gem_object_reference(obj); i915_gem_object_move_to_inactive(obj); + spin_unlock(&dev_priv->mm.active_list_lock); + drm_gem_object_unreference(obj); + spin_lock(&dev_priv->mm.active_list_lock); + } } out: spin_unlock(&dev_priv->mm.active_list_lock);