aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_timeline.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-08 21:35:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-08 21:35:19 -0700
commita2d635decbfa9c1e4ae15cb05b68b2559f7f827c (patch)
tree1c3766c35215450ff9e4228efed578d5e6ba65d1 /drivers/gpu/drm/i915/i915_timeline.c
parent89c3b37af87ec183b666d83428cb28cc421671a6 (diff)
parenteb85d03e01c3e9f3b0ba7282b2e3515a635decb2 (diff)
Merge tag 'drm-next-2019-05-09' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "This has two exciting community drivers for ARM Mali accelerators. Since ARM has never been open source friendly on the GPU side of the house, the community has had to create open source drivers for the Mali GPUs. Lima covers the older t4xx and panfrost the newer 6xx/7xx series. Well done to all involved and hopefully this will help ARM head in the right direction. There is also now the ability if you don't have any of the legacy drivers enabled (pre-KMS) to remove all the pre-KMS support code from the core drm, this saves 10% or so in codesize on my machine. i915 also enable Icelake/Elkhart Lake Gen11 GPUs by default, vboxvideo moves out of staging. There are also some rcar-du patches which crossover with media tree but all should be acked by Mauro. Summary: uapi changes: - Colorspace connector property - fourcc - new YUV formts - timeline sync objects initially merged - expose FB_DAMAGE_CLIPS to atomic userspace new drivers: - vboxvideo: moved out of staging - aspeed: ASPEED SoC BMC chip display support - lima: ARM Mali4xx GPU acceleration driver support - panfrost: ARM Mali6xx/7xx Midgard/Bitfrost acceleration driver support core: - component helper docs - unplugging fixes - devm device init - MIPI/DSI rate control - shmem backed gem objects - connector, display_info, edid_quirks cleanups - dma_buf fence chain support - 64-bit dma-fence seqno comparison fixes - move initial fb config code to core - gem fence array helpers for Lima - ability to remove legacy support code if no drivers requires it (removes 10% of drm.ko size) - lease fixes ttm: - unified DRM_FILE_PAGE_OFFSET handling - Account for kernel allocations in kernel zone only panel: - OSD070T1718-19TS panel support - panel-tpo-td028ttec1 backlight support - Ronbo RB070D30 MIPI/DSI - Feiyang FY07024DI26A30-D MIPI-DSI panel - Rocktech jh057n00900 MIPI-DSI panel i915: - Comet Lake (Gen9) PCI IDs - Updated Icelake PCI IDs - Elkhartlake (Gen11) support - DP MST property addtions - plane and watermark fixes - Icelake port sync and VEBOX disable fixes - struct_mutex usage reduction - Icelake gamma fix - GuC reset fixes - make mmap more asynchronous - sound display power well race fixes - DDI/MIPI-DSI clocks for Icelake - Icelake RPS frequency changing support - Icelake workarounds amdgpu: - Use HMM for userptr - vega20 experimental smu11 support - RAS support for vega20 - BACO support for vega12 + fixes for vega20 - reworked IH interrupt handling - amdkfd RAS support - Freesync improvements - initial timeline sync object support - DC Z ordering fixes - NV12 planes support - colorspace properties for planes= - eDP opts if eDP already initialized nouveau: - misc fixes etnaviv: - misc fixes msm: - GPU zap shader support expansion - robustness ABI addition exynos: - Logging cleanups tegra: - Shared reset fix - CPU cache maintenance fix cirrus: - driver rewritten using simple helpers meson: - G12A support vmwgfx: - Resource dirtying management improvements - Userspace logging improvements virtio: - PRIME fixes rockchip: - rk3066 hdmi support sun4i: - DSI burst mode support vc4: - load tracker to detect underflow v3d: - v3d v4.2 support malidp: - initial Mali D71 support in komeda driver tfp410: - omap related improvement omapdrm: - drm bridge/panel support - drop some omap specific panels rcar-du: - Display writeback support" * tag 'drm-next-2019-05-09' of git://anongit.freedesktop.org/drm/drm: (1507 commits) drm/msm/a6xx: No zap shader is not an error drm/cma-helper: Fix drm_gem_cma_free_object() drm: Fix timestamp docs for variable refresh properties. drm/komeda: Mark the local functions as static drm/komeda: Fixed warning: Function parameter or member not described drm/komeda: Expose bus_width to Komeda-CORE drm/komeda: Add sysfs attribute: core_id and config_id drm: add non-desktop quirk for Valve HMDs drm/panfrost: Show stored feature registers drm/panfrost: Don't scream about deferred probe drm/panfrost: Disable PM on probe failure drm/panfrost: Set DMA masks earlier drm/panfrost: Add sanity checks to submit IOCTL drm/etnaviv: initialize idle mask before querying the HW db drm: introduce a capability flag for syncobj timeline support drm: report consistent errors when checking syncobj capibility drm/nouveau/nouveau: forward error generated while resuming objects tree drm/nouveau/fb/ramgk104: fix spelling mistake "sucessfully" -> "successfully" drm/nouveau/i2c: Disable i2c bus access after ->fini() drm/nouveau: Remove duplicate ACPI_VIDEO_NOTIFY_PROBE definition ...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_timeline.c')
-rw-r--r--drivers/gpu/drm/i915/i915_timeline.c301
1 files changed, 272 insertions, 29 deletions
diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c
index b2202d2e58a2..5fbea0892f33 100644
--- a/drivers/gpu/drm/i915/i915_timeline.c
+++ b/drivers/gpu/drm/i915/i915_timeline.c
@@ -6,19 +6,32 @@
#include "i915_drv.h"
-#include "i915_timeline.h"
+#include "i915_active.h"
#include "i915_syncmap.h"
+#include "i915_timeline.h"
+
+#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
+#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
struct i915_timeline_hwsp {
- struct i915_vma *vma;
+ struct i915_gt_timelines *gt;
struct list_head free_link;
+ struct i915_vma *vma;
u64 free_bitmap;
};
-static inline struct i915_timeline_hwsp *
-i915_timeline_hwsp(const struct i915_timeline *tl)
+struct i915_timeline_cacheline {
+ struct i915_active active;
+ struct i915_timeline_hwsp *hwsp;
+ void *vaddr;
+#define CACHELINE_BITS 6
+#define CACHELINE_FREE CACHELINE_BITS
+};
+
+static inline struct drm_i915_private *
+hwsp_to_i915(struct i915_timeline_hwsp *hwsp)
{
- return tl->hwsp_ggtt->private;
+ return container_of(hwsp->gt, struct drm_i915_private, gt.timelines);
}
static struct i915_vma *__hwsp_alloc(struct drm_i915_private *i915)
@@ -71,6 +84,7 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
vma->private = hwsp;
hwsp->vma = vma;
hwsp->free_bitmap = ~0ull;
+ hwsp->gt = gt;
spin_lock(&gt->hwsp_lock);
list_add(&hwsp->free_link, &gt->hwsp_free_list);
@@ -88,14 +102,9 @@ hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline)
return hwsp->vma;
}
-static void hwsp_free(struct i915_timeline *timeline)
+static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline)
{
- struct i915_gt_timelines *gt = &timeline->i915->gt.timelines;
- struct i915_timeline_hwsp *hwsp;
-
- hwsp = i915_timeline_hwsp(timeline);
- if (!hwsp) /* leave global HWSP alone! */
- return;
+ struct i915_gt_timelines *gt = hwsp->gt;
spin_lock(&gt->hwsp_lock);
@@ -103,7 +112,8 @@ static void hwsp_free(struct i915_timeline *timeline)
if (!hwsp->free_bitmap)
list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
- hwsp->free_bitmap |= BIT_ULL(timeline->hwsp_offset / CACHELINE_BYTES);
+ GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
+ hwsp->free_bitmap |= BIT_ULL(cacheline);
/* And if no one is left using it, give the page back to the system */
if (hwsp->free_bitmap == ~0ull) {
@@ -115,9 +125,78 @@ static void hwsp_free(struct i915_timeline *timeline)
spin_unlock(&gt->hwsp_lock);
}
+static void __idle_cacheline_free(struct i915_timeline_cacheline *cl)
+{
+ GEM_BUG_ON(!i915_active_is_idle(&cl->active));
+
+ i915_gem_object_unpin_map(cl->hwsp->vma->obj);
+ i915_vma_put(cl->hwsp->vma);
+ __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
+
+ i915_active_fini(&cl->active);
+ kfree(cl);
+}
+
+static void __cacheline_retire(struct i915_active *active)
+{
+ struct i915_timeline_cacheline *cl =
+ container_of(active, typeof(*cl), active);
+
+ i915_vma_unpin(cl->hwsp->vma);
+ if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
+ __idle_cacheline_free(cl);
+}
+
+static struct i915_timeline_cacheline *
+cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline)
+{
+ struct i915_timeline_cacheline *cl;
+ void *vaddr;
+
+ GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
+
+ cl = kmalloc(sizeof(*cl), GFP_KERNEL);
+ if (!cl)
+ return ERR_PTR(-ENOMEM);
+
+ vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ kfree(cl);
+ return ERR_CAST(vaddr);
+ }
+
+ i915_vma_get(hwsp->vma);
+ cl->hwsp = hwsp;
+ cl->vaddr = page_pack_bits(vaddr, cacheline);
+
+ i915_active_init(hwsp_to_i915(hwsp), &cl->active, __cacheline_retire);
+
+ return cl;
+}
+
+static void cacheline_acquire(struct i915_timeline_cacheline *cl)
+{
+ if (cl && i915_active_acquire(&cl->active))
+ __i915_vma_pin(cl->hwsp->vma);
+}
+
+static void cacheline_release(struct i915_timeline_cacheline *cl)
+{
+ if (cl)
+ i915_active_release(&cl->active);
+}
+
+static void cacheline_free(struct i915_timeline_cacheline *cl)
+{
+ GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
+ cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
+
+ if (i915_active_is_idle(&cl->active))
+ __idle_cacheline_free(cl);
+}
+
int i915_timeline_init(struct drm_i915_private *i915,
struct i915_timeline *timeline,
- const char *name,
struct i915_vma *hwsp)
{
void *vaddr;
@@ -133,37 +212,47 @@ int i915_timeline_init(struct drm_i915_private *i915,
BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
timeline->i915 = i915;
- timeline->name = name;
timeline->pin_count = 0;
timeline->has_initial_breadcrumb = !hwsp;
+ timeline->hwsp_cacheline = NULL;
- timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
if (!hwsp) {
+ struct i915_timeline_cacheline *cl;
unsigned int cacheline;
hwsp = hwsp_alloc(timeline, &cacheline);
if (IS_ERR(hwsp))
return PTR_ERR(hwsp);
+ cl = cacheline_alloc(hwsp->private, cacheline);
+ if (IS_ERR(cl)) {
+ __idle_hwsp_free(hwsp->private, cacheline);
+ return PTR_ERR(cl);
+ }
+
+ timeline->hwsp_cacheline = cl;
timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
- }
- timeline->hwsp_ggtt = i915_vma_get(hwsp);
- vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
- if (IS_ERR(vaddr)) {
- hwsp_free(timeline);
- i915_vma_put(hwsp);
- return PTR_ERR(vaddr);
+ vaddr = page_mask_bits(cl->vaddr);
+ } else {
+ timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
+
+ vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
}
timeline->hwsp_seqno =
memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
+ timeline->hwsp_ggtt = i915_vma_get(hwsp);
+ GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
+
timeline->fence_context = dma_fence_context_alloc(1);
spin_lock_init(&timeline->lock);
+ mutex_init(&timeline->mutex);
- INIT_ACTIVE_REQUEST(&timeline->barrier);
INIT_ACTIVE_REQUEST(&timeline->last_request);
INIT_LIST_HEAD(&timeline->requests);
@@ -236,18 +325,19 @@ void i915_timeline_fini(struct i915_timeline *timeline)
{
GEM_BUG_ON(timeline->pin_count);
GEM_BUG_ON(!list_empty(&timeline->requests));
- GEM_BUG_ON(i915_active_request_isset(&timeline->barrier));
i915_syncmap_free(&timeline->sync);
- hwsp_free(timeline);
- i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+ if (timeline->hwsp_cacheline)
+ cacheline_free(timeline->hwsp_cacheline);
+ else
+ i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+
i915_vma_put(timeline->hwsp_ggtt);
}
struct i915_timeline *
i915_timeline_create(struct drm_i915_private *i915,
- const char *name,
struct i915_vma *global_hwsp)
{
struct i915_timeline *timeline;
@@ -257,7 +347,7 @@ i915_timeline_create(struct drm_i915_private *i915,
if (!timeline)
return ERR_PTR(-ENOMEM);
- err = i915_timeline_init(i915, timeline, name, global_hwsp);
+ err = i915_timeline_init(i915, timeline, global_hwsp);
if (err) {
kfree(timeline);
return ERR_PTR(err);
@@ -284,6 +374,7 @@ int i915_timeline_pin(struct i915_timeline *tl)
i915_ggtt_offset(tl->hwsp_ggtt) +
offset_in_page(tl->hwsp_offset);
+ cacheline_acquire(tl->hwsp_cacheline);
timeline_add_to_active(tl);
return 0;
@@ -293,6 +384,157 @@ unpin:
return err;
}
+static u32 timeline_advance(struct i915_timeline *tl)
+{
+ GEM_BUG_ON(!tl->pin_count);
+ GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
+
+ return tl->seqno += 1 + tl->has_initial_breadcrumb;
+}
+
+static void timeline_rollback(struct i915_timeline *tl)
+{
+ tl->seqno -= 1 + tl->has_initial_breadcrumb;
+}
+
+static noinline int
+__i915_timeline_get_seqno(struct i915_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno)
+{
+ struct i915_timeline_cacheline *cl;
+ unsigned int cacheline;
+ struct i915_vma *vma;
+ void *vaddr;
+ int err;
+
+ /*
+ * If there is an outstanding GPU reference to this cacheline,
+ * such as it being sampled by a HW semaphore on another timeline,
+ * we cannot wraparound our seqno value (the HW semaphore does
+ * a strict greater-than-or-equals compare, not i915_seqno_passed).
+ * So if the cacheline is still busy, we must detach ourselves
+ * from it and leave it inflight alongside its users.
+ *
+ * However, if nobody is watching and we can guarantee that nobody
+ * will, we could simply reuse the same cacheline.
+ *
+ * if (i915_active_request_is_signaled(&tl->last_request) &&
+ * i915_active_is_signaled(&tl->hwsp_cacheline->active))
+ * return 0;
+ *
+ * That seems unlikely for a busy timeline that needed to wrap in
+ * the first place, so just replace the cacheline.
+ */
+
+ vma = hwsp_alloc(tl, &cacheline);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_rollback;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err) {
+ __idle_hwsp_free(vma->private, cacheline);
+ goto err_rollback;
+ }
+
+ cl = cacheline_alloc(vma->private, cacheline);
+ if (IS_ERR(cl)) {
+ err = PTR_ERR(cl);
+ __idle_hwsp_free(vma->private, cacheline);
+ goto err_unpin;
+ }
+ GEM_BUG_ON(cl->hwsp->vma != vma);
+
+ /*
+ * Attach the old cacheline to the current request, so that we only
+ * free it after the current request is retired, which ensures that
+ * all writes into the cacheline from previous requests are complete.
+ */
+ err = i915_active_ref(&tl->hwsp_cacheline->active,
+ tl->fence_context, rq);
+ if (err)
+ goto err_cacheline;
+
+ cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
+ cacheline_free(tl->hwsp_cacheline);
+
+ i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
+ i915_vma_put(tl->hwsp_ggtt);
+
+ tl->hwsp_ggtt = i915_vma_get(vma);
+
+ vaddr = page_mask_bits(cl->vaddr);
+ tl->hwsp_offset = cacheline * CACHELINE_BYTES;
+ tl->hwsp_seqno =
+ memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
+
+ tl->hwsp_offset += i915_ggtt_offset(vma);
+
+ cacheline_acquire(cl);
+ tl->hwsp_cacheline = cl;
+
+ *seqno = timeline_advance(tl);
+ GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
+ return 0;
+
+err_cacheline:
+ cacheline_free(cl);
+err_unpin:
+ i915_vma_unpin(vma);
+err_rollback:
+ timeline_rollback(tl);
+ return err;
+}
+
+int i915_timeline_get_seqno(struct i915_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno)
+{
+ *seqno = timeline_advance(tl);
+
+ /* Replace the HWSP on wraparound for HW semaphores */
+ if (unlikely(!*seqno && tl->hwsp_cacheline))
+ return __i915_timeline_get_seqno(tl, rq, seqno);
+
+ return 0;
+}
+
+static int cacheline_ref(struct i915_timeline_cacheline *cl,
+ struct i915_request *rq)
+{
+ return i915_active_ref(&cl->active, rq->fence.context, rq);
+}
+
+int i915_timeline_read_hwsp(struct i915_request *from,
+ struct i915_request *to,
+ u32 *hwsp)
+{
+ struct i915_timeline_cacheline *cl = from->hwsp_cacheline;
+ struct i915_timeline *tl = from->timeline;
+ int err;
+
+ GEM_BUG_ON(to->timeline == tl);
+
+ mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
+ err = i915_request_completed(from);
+ if (!err)
+ err = cacheline_ref(cl, to);
+ if (!err) {
+ if (likely(cl == tl->hwsp_cacheline)) {
+ *hwsp = tl->hwsp_offset;
+ } else { /* across a seqno wrap, recover the original offset */
+ *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
+ ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
+ CACHELINE_BYTES;
+ }
+ }
+ mutex_unlock(&tl->mutex);
+
+ return err;
+}
+
void i915_timeline_unpin(struct i915_timeline *tl)
{
GEM_BUG_ON(!tl->pin_count);
@@ -300,6 +542,7 @@ void i915_timeline_unpin(struct i915_timeline *tl)
return;
timeline_remove_from_active(tl);
+ cacheline_release(tl->hwsp_cacheline);
/*
* Since this timeline is idle, all bariers upon which we were waiting