aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_fence_reg.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_fence_reg.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_fence_reg.c156
1 files changed, 65 insertions, 91 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
index e037e94792f3..3084f52e3372 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c
@@ -210,6 +210,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
struct i915_vma *vma)
{
intel_wakeref_t wakeref;
+ struct i915_vma *old;
int ret;
if (vma) {
@@ -229,49 +230,55 @@ static int fence_update(struct drm_i915_fence_reg *fence,
return ret;
}
- if (fence->vma) {
- struct i915_vma *old = fence->vma;
-
+ old = xchg(&fence->vma, NULL);
+ if (old) {
ret = i915_active_request_retire(&old->last_fence,
&old->obj->base.dev->struct_mutex);
- if (ret)
+ if (ret) {
+ fence->vma = old;
return ret;
+ }
i915_vma_flush_writes(old);
- }
- if (fence->vma && fence->vma != vma) {
- /* Ensure that all userspace CPU access is completed before
+ /*
+ * Ensure that all userspace CPU access is completed before
* stealing the fence.
*/
- GEM_BUG_ON(fence->vma->fence != fence);
- i915_vma_revoke_mmap(fence->vma);
-
- fence->vma->fence = NULL;
- fence->vma = NULL;
+ if (old != vma) {
+ GEM_BUG_ON(old->fence != fence);
+ i915_vma_revoke_mmap(old);
+ old->fence = NULL;
+ }
list_move(&fence->link, &fence->i915->mm.fence_list);
}
- /* We only need to update the register itself if the device is awake.
+ /*
+ * We only need to update the register itself if the device is awake.
* If the device is currently powered down, we will defer the write
* to the runtime resume, see i915_gem_restore_fences().
+ *
+ * This only works for removing the fence register, on acquisition
+ * the caller must hold the rpm wakeref. The fence register must
+ * be cleared before we can use any other fences to ensure that
+ * the new fences do not overlap the elided clears, confusing HW.
*/
wakeref = intel_runtime_pm_get_if_in_use(fence->i915);
- if (wakeref) {
- fence_write(fence, vma);
- intel_runtime_pm_put(fence->i915, wakeref);
+ if (!wakeref) {
+ GEM_BUG_ON(vma);
+ return 0;
}
- if (vma) {
- if (fence->vma != vma) {
- vma->fence = fence;
- fence->vma = vma;
- }
+ WRITE_ONCE(fence->vma, vma);
+ fence_write(fence, vma);
+ if (vma) {
+ vma->fence = fence;
list_move_tail(&fence->link, &fence->i915->mm.fence_list);
}
+ intel_runtime_pm_put(fence->i915, wakeref);
return 0;
}
@@ -436,32 +443,6 @@ void i915_unreserve_fence(struct drm_i915_fence_reg *fence)
}
/**
- * i915_gem_revoke_fences - revoke fence state
- * @dev_priv: i915 device private
- *
- * Removes all GTT mmappings via the fence registers. This forces any user
- * of the fence to reacquire that fence before continuing with their access.
- * One use is during GPU reset where the fence register is lost and we need to
- * revoke concurrent userspace access via GTT mmaps until the hardware has been
- * reset and the fence registers have been restored.
- */
-void i915_gem_revoke_fences(struct drm_i915_private *dev_priv)
-{
- int i;
-
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
- for (i = 0; i < dev_priv->num_fence_regs; i++) {
- struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];
-
- GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
-
- if (fence->vma)
- i915_vma_revoke_mmap(fence->vma);
- }
-}
-
-/**
* i915_gem_restore_fences - restore fence state
* @dev_priv: i915 device private
*
@@ -473,9 +454,10 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
{
int i;
+ rcu_read_lock(); /* keep obj alive as we dereference */
for (i = 0; i < dev_priv->num_fence_regs; i++) {
struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
- struct i915_vma *vma = reg->vma;
+ struct i915_vma *vma = READ_ONCE(reg->vma);
GEM_BUG_ON(vma && vma->fence != reg);
@@ -483,18 +465,12 @@ void i915_gem_restore_fences(struct drm_i915_private *dev_priv)
* Commit delayed tiling changes if we have an object still
* attached to the fence, otherwise just clear the fence.
*/
- if (vma && !i915_gem_object_is_tiled(vma->obj)) {
- GEM_BUG_ON(!reg->dirty);
- GEM_BUG_ON(i915_vma_has_userfault(vma));
-
- list_move(&reg->link, &dev_priv->mm.fence_list);
- vma->fence = NULL;
+ if (vma && !i915_gem_object_is_tiled(vma->obj))
vma = NULL;
- }
fence_write(reg, vma);
- reg->vma = vma;
}
+ rcu_read_unlock();
}
/**
@@ -609,8 +585,38 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
*/
swizzle_x = I915_BIT_6_SWIZZLE_NONE;
swizzle_y = I915_BIT_6_SWIZZLE_NONE;
- } else if (IS_MOBILE(dev_priv) ||
- IS_I915G(dev_priv) || IS_I945G(dev_priv)) {
+ } else if (IS_G45(dev_priv) || IS_I965G(dev_priv) || IS_G33(dev_priv)) {
+ /* The 965, G33, and newer, have a very flexible memory
+ * configuration. It will enable dual-channel mode
+ * (interleaving) on as much memory as it can, and the GPU
+ * will additionally sometimes enable different bit 6
+ * swizzling for tiled objects from the CPU.
+ *
+ * Here's what I found on the G965:
+ * slot fill memory size swizzling
+ * 0A 0B 1A 1B 1-ch 2-ch
+ * 512 0 0 0 512 0 O
+ * 512 0 512 0 16 1008 X
+ * 512 0 0 512 16 1008 X
+ * 0 512 0 512 16 1008 X
+ * 1024 1024 1024 0 2048 1024 O
+ *
+ * We could probably detect this based on either the DRB
+ * matching, which was the case for the swizzling required in
+ * the table above, or from the 1-ch value being less than
+ * the minimum size of a rank.
+ *
+ * Reports indicate that the swizzling actually
+ * varies depending upon page placement inside the
+ * channels, i.e. we see swizzled pages where the
+ * banks of memory are paired and unswizzled on the
+ * uneven portion, so leave that as unknown.
+ */
+ if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
+ swizzle_x = I915_BIT_6_SWIZZLE_9_10;
+ swizzle_y = I915_BIT_6_SWIZZLE_9;
+ }
+ } else {
u32 dcc;
/* On 9xx chipsets, channel interleave by the CPU is
@@ -660,37 +666,6 @@ i915_gem_detect_bit_6_swizzle(struct drm_i915_private *dev_priv)
swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
}
- } else {
- /* The 965, G33, and newer, have a very flexible memory
- * configuration. It will enable dual-channel mode
- * (interleaving) on as much memory as it can, and the GPU
- * will additionally sometimes enable different bit 6
- * swizzling for tiled objects from the CPU.
- *
- * Here's what I found on the G965:
- * slot fill memory size swizzling
- * 0A 0B 1A 1B 1-ch 2-ch
- * 512 0 0 0 512 0 O
- * 512 0 512 0 16 1008 X
- * 512 0 0 512 16 1008 X
- * 0 512 0 512 16 1008 X
- * 1024 1024 1024 0 2048 1024 O
- *
- * We could probably detect this based on either the DRB
- * matching, which was the case for the swizzling required in
- * the table above, or from the 1-ch value being less than
- * the minimum size of a rank.
- *
- * Reports indicate that the swizzling actually
- * varies depending upon page placement inside the
- * channels, i.e. we see swizzled pages where the
- * banks of memory are paired and unswizzled on the
- * uneven portion, so leave that as unknown.
- */
- if (I915_READ16(C0DRB3) == I915_READ16(C1DRB3)) {
- swizzle_x = I915_BIT_6_SWIZZLE_9_10;
- swizzle_y = I915_BIT_6_SWIZZLE_9;
- }
}
if (swizzle_x == I915_BIT_6_SWIZZLE_UNKNOWN ||
@@ -790,8 +765,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj,
int i;
if (obj->bit_17 == NULL) {
- obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
- sizeof(long), GFP_KERNEL);
+ obj->bit_17 = bitmap_zalloc(page_count, GFP_KERNEL);
if (obj->bit_17 == NULL) {
DRM_ERROR("Failed to allocate memory for bit 17 "
"record\n");