diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 275 |
1 files changed, 181 insertions, 94 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 32e6aade6223..ddc21d4b388d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1146,23 +1146,74 @@ static bool missed_irq(struct drm_i915_private *dev_priv, return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); } -static int __i915_spin_request(struct drm_i915_gem_request *req) +static unsigned long local_clock_us(unsigned *cpu) +{ + unsigned long t; + + /* Cheaply and approximately convert from nanoseconds to microseconds. + * The result and subsequent calculations are also defined in the same + * approximate microseconds units. The principal source of timing + * error here is from the simple truncation. + * + * Note that local_clock() is only defined wrt to the current CPU; + * the comparisons are no longer valid if we switch CPUs. Instead of + * blocking preemption for the entire busywait, we can detect the CPU + * switch and use that as indicator of system load and a reason to + * stop busywaiting, see busywait_stop(). + */ + *cpu = get_cpu(); + t = local_clock() >> 10; + put_cpu(); + + return t; +} + +static bool busywait_stop(unsigned long timeout, unsigned cpu) +{ + unsigned this_cpu; + + if (time_after(local_clock_us(&this_cpu), timeout)) + return true; + + return this_cpu != cpu; +} + +static int __i915_spin_request(struct drm_i915_gem_request *req, int state) { unsigned long timeout; + unsigned cpu; + + /* When waiting for high frequency requests, e.g. during synchronous + * rendering split between the CPU and GPU, the finite amount of time + * required to set up the irq and wait upon it limits the response + * rate. By busywaiting on the request completion for a short while we + * can service the high frequency waits as quick as possible. However, + * if it is a slow request, we want to sleep as quickly as possible. + * The tradeoff between waiting and sleeping is roughly the time it + * takes to sleep on a request, on the order of a microsecond. + */ - if (i915_gem_request_get_ring(req)->irq_refcount) + if (req->ring->irq_refcount) return -EBUSY; - timeout = jiffies + 1; + /* Only spin if we know the GPU is processing this request */ + if (!i915_gem_request_started(req, true)) + return -EAGAIN; + + timeout = local_clock_us(&cpu) + 5; while (!need_resched()) { if (i915_gem_request_completed(req, true)) return 0; - if (time_after_eq(jiffies, timeout)) + if (signal_pending_state(state, current)) + break; + + if (busywait_stop(timeout, cpu)) break; cpu_relax_lowlatency(); } + if (i915_gem_request_completed(req, false)) return 0; @@ -1197,6 +1248,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, struct drm_i915_private *dev_priv = dev->dev_private; const bool irq_test_in_progress = ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring); + int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; DEFINE_WAIT(wait); unsigned long timeout_expire; s64 before, now; @@ -1229,7 +1281,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, before = ktime_get_raw_ns(); /* Optimistic spin for the next jiffie before touching IRQs */ - ret = __i915_spin_request(req); + ret = __i915_spin_request(req, state); if (ret == 0) goto out; @@ -1241,8 +1293,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, for (;;) { struct timer_list timer; - prepare_to_wait(&ring->irq_queue, &wait, - interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); + prepare_to_wait(&ring->irq_queue, &wait, state); /* We need to check whether any gpu reset happened in between * the caller grabbing the seqno and now ... */ @@ -1260,7 +1311,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, break; } - if (interruptible && signal_pending(current)) { + if (signal_pending_state(state, current)) { ret = -ERESTARTSYS; break; } @@ -2554,6 +2605,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, request->batch_obj = obj; request->emitted_jiffies = jiffies; + request->previous_seqno = ring->last_submitted_seqno; ring->last_submitted_seqno = request->seqno; list_add_tail(&request->list, &ring->request_list); @@ -2745,6 +2797,8 @@ static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv, static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, struct intel_engine_cs *ring) { + struct intel_ringbuffer *buffer; + while (!list_empty(&ring->active_list)) { struct drm_i915_gem_object *obj; @@ -2760,18 +2814,16 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, * are the ones that keep the context and ringbuffer backing objects * pinned in place. */ - while (!list_empty(&ring->execlist_queue)) { - struct drm_i915_gem_request *submit_req; - submit_req = list_first_entry(&ring->execlist_queue, - struct drm_i915_gem_request, - execlist_link); - list_del(&submit_req->execlist_link); + if (i915.enable_execlists) { + spin_lock_irq(&ring->execlist_lock); - if (submit_req->ctx != ring->default_context) - intel_lr_context_unpin(submit_req); + /* list_splice_tail_init checks for empty lists */ + list_splice_tail_init(&ring->execlist_queue, + &ring->execlist_retired_req_list); - i915_gem_request_unreference(submit_req); + spin_unlock_irq(&ring->execlist_lock); + intel_execlists_retire_requests(ring); } /* @@ -2790,6 +2842,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, i915_gem_request_retire(request); } + + /* Having flushed all requests from all queues, we know that all + * ringbuffers must now be empty. However, since we do not reclaim + * all space when retiring the request (to prevent HEADs colliding + * with rapid ringbuffer wraparound) the amount of available space + * upon reset is less than when we start. Do one more pass over + * all the ringbuffers to reset last_retired_head. + */ + list_for_each_entry(buffer, &ring->buffers, link) { + buffer->last_retired_head = buffer->tail; + intel_ring_update_space(buffer); + } } void i915_gem_reset(struct drm_device *dev) @@ -2930,6 +2994,10 @@ i915_gem_idle_work_handler(struct work_struct *work) if (!list_empty(&ring->request_list)) return; + /* we probably should sync with hangcheck here, using cancel_work_sync. + * Also locking seems to be fubar here, ring->request_list is protected + * by dev->struct_mutex. */ + intel_mark_idle(dev); if (mutex_trylock(&dev->struct_mutex)) { @@ -3054,7 +3122,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (ret == 0) ret = __i915_wait_request(req[i], reset_counter, true, args->timeout_ns > 0 ? &args->timeout_ns : NULL, - file->driver_priv); + to_rps_client(file)); i915_gem_request_unreference__unlocked(req[i]); } return ret; @@ -3420,7 +3488,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (flags & PIN_MAPPABLE) end = min_t(u64, end, dev_priv->gtt.mappable_end); if (flags & PIN_ZONE_4G) - end = min_t(u64, end, (1ULL << 32)); + end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); if (alignment == 0) alignment = flags & PIN_MAPPABLE ? fence_alignment : @@ -3457,30 +3525,50 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) goto err_unpin; - if (flags & PIN_HIGH) { - search_flag = DRM_MM_SEARCH_BELOW; - alloc_flag = DRM_MM_CREATE_TOP; + if (flags & PIN_OFFSET_FIXED) { + uint64_t offset = flags & PIN_OFFSET_MASK; + + if (offset & (alignment - 1) || offset + size > end) { + ret = -EINVAL; + goto err_free_vma; + } + vma->node.start = offset; + vma->node.size = size; + vma->node.color = obj->cache_level; + ret = drm_mm_reserve_node(&vm->mm, &vma->node); + if (ret) { + ret = i915_gem_evict_for_vma(vma); + if (ret == 0) + ret = drm_mm_reserve_node(&vm->mm, &vma->node); + } + if (ret) + goto err_free_vma; } else { - search_flag = DRM_MM_SEARCH_DEFAULT; - alloc_flag = DRM_MM_CREATE_DEFAULT; - } + if (flags & PIN_HIGH) { + search_flag = DRM_MM_SEARCH_BELOW; + alloc_flag = DRM_MM_CREATE_TOP; + } else { + search_flag = DRM_MM_SEARCH_DEFAULT; + alloc_flag = DRM_MM_CREATE_DEFAULT; + } search_free: - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, - size, alignment, - obj->cache_level, - start, end, - search_flag, - alloc_flag); - if (ret) { - ret = i915_gem_evict_something(dev, vm, size, alignment, - obj->cache_level, - start, end, - flags); - if (ret == 0) - goto search_free; + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, + size, alignment, + obj->cache_level, + start, end, + search_flag, + alloc_flag); + if (ret) { + ret = i915_gem_evict_something(dev, vm, size, alignment, + obj->cache_level, + start, end, + flags); + if (ret == 0) + goto search_free; - goto err_free_vma; + goto err_free_vma; + } } if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { ret = -EINVAL; @@ -3834,7 +3922,7 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, * cacheline, whereas normally such cachelines would get * invalidated. */ - if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) + if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) return -ENODEV; level = I915_CACHE_LLC; @@ -3877,17 +3965,11 @@ rpm_put: int i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, - struct intel_engine_cs *pipelined, - struct drm_i915_gem_request **pipelined_request, const struct i915_ggtt_view *view) { u32 old_read_domains, old_write_domain; int ret; - ret = i915_gem_object_sync(obj, pipelined, pipelined_request); - if (ret) - return ret; - /* Mark the pin_display early so that we account for the * display coherency whilst setting up the cache domains. */ @@ -4077,9 +4159,36 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) vma->node.start < (flags & PIN_OFFSET_MASK)) return true; + if (flags & PIN_OFFSET_FIXED && + vma->node.start != (flags & PIN_OFFSET_MASK)) + return true; + return false; } +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + bool mappable, fenceable; + u32 fence_size, fence_alignment; + + fence_size = i915_gem_get_gtt_size(obj->base.dev, + obj->base.size, + obj->tiling_mode); + fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, + obj->base.size, + obj->tiling_mode, + true); + + fenceable = (vma->node.size == fence_size && + (vma->node.start & (fence_alignment - 1)) == 0); + + mappable = (vma->node.start + fence_size <= + to_i915(obj->base.dev)->gtt.mappable_end); + + obj->map_and_fenceable = mappable && fenceable; +} + static int i915_gem_object_do_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -4147,25 +4256,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL && (bound ^ vma->bound) & GLOBAL_BIND) { - bool mappable, fenceable; - u32 fence_size, fence_alignment; - - fence_size = i915_gem_get_gtt_size(obj->base.dev, - obj->base.size, - obj->tiling_mode); - fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev, - obj->base.size, - obj->tiling_mode, - true); - - fenceable = (vma->node.size == fence_size && - (vma->node.start & (fence_alignment - 1)) == 0); - - mappable = (vma->node.start + fence_size <= - dev_priv->gtt.mappable_end); - - obj->map_and_fenceable = mappable && fenceable; - + __i915_vma_set_map_and_fenceable(vma); WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable); } @@ -4484,10 +4575,8 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, { struct i915_vma *vma; list_for_each_entry(vma, &obj->vma_list, vma_link) { - if (i915_is_ggtt(vma->vm) && - vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) - continue; - if (vma->vm == vm) + if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && + vma->vm == vm) return vma; } return NULL; @@ -4576,7 +4665,6 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) struct intel_engine_cs *ring = req->ring; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; - u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); u32 *remap_info = dev_priv->l3_parity.remap_info[slice]; int i, ret; @@ -4592,10 +4680,10 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) * here because no other code should access these registers other than * at initialization time. */ - for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) { + for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) { intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit(ring, reg_base + i); - intel_ring_emit(ring, remap_info[i/4]); + intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); + intel_ring_emit(ring, remap_info[i]); } intel_ring_advance(ring); @@ -4763,18 +4851,9 @@ i915_gem_init_hw(struct drm_device *dev) if (HAS_GUC_UCODE(dev)) { ret = intel_guc_ucode_load(dev); if (ret) { - /* - * If we got an error and GuC submission is enabled, map - * the error to -EIO so the GPU will be declared wedged. - * OTOH, if we didn't intend to use the GuC anyway, just - * discard the error and carry on. - */ - DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret, - i915.enable_guc_submission ? "" : - " (ignored)"); - ret = i915.enable_guc_submission ? -EIO : 0; - if (ret) - goto out; + DRM_ERROR("Failed to initialize GuC, error %d\n", ret); + ret = -EIO; + goto out; } } @@ -4837,14 +4916,6 @@ int i915_gem_init(struct drm_device *dev) mutex_lock(&dev->struct_mutex); - if (IS_VALLEYVIEW(dev)) { - /* VLVA0 (potential hack), BIOS isn't actually waking us */ - I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ); - if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) & - VLV_GTLC_ALLOWWAKEACK), 10)) - DRM_DEBUG_DRIVER("allow wake ack timed out\n"); - } - if (!i915.enable_execlists) { dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission; dev_priv->gt.init_rings = i915_gem_init_rings; @@ -4962,7 +5033,7 @@ i915_gem_load(struct drm_device *dev) dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; - if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev)) + if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev)) dev_priv->num_fence_regs = 32; else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) dev_priv->num_fence_regs = 16; @@ -5183,6 +5254,21 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) return false; } +/* Like i915_gem_object_get_page(), but mark the returned page dirty */ +struct page * +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) +{ + struct page *page; + + /* Only default objects have per-page dirty tracking */ + if (WARN_ON(obj->ops != &i915_gem_object_ops)) + return NULL; + + page = i915_gem_object_get_page(obj, n); + set_page_dirty(page); + return page; +} + /* Allocate a new GEM object and fill it with the supplied data */ struct drm_i915_gem_object * i915_gem_object_create_from_data(struct drm_device *dev, @@ -5208,6 +5294,7 @@ i915_gem_object_create_from_data(struct drm_device *dev, i915_gem_object_pin_pages(obj); sg = obj->pages; bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); + obj->dirty = 1; /* Backing store is now out of date */ i915_gem_object_unpin_pages(obj); if (WARN_ON(bytes != size)) { |