aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_reg.h
diff options
context:
space:
mode:
authorRobert Bragg <robert@sixbynine.org>2017-06-13 12:23:03 +0100
committerBen Widawsky <ben@bwidawsk.net>2017-06-14 12:31:57 -0700
commit19f81df2859eb10e92d68991cefa39f826dea013 (patch)
tree9cc888e18aef9c9fedc69f2eabb91e1590761c9d /drivers/gpu/drm/i915/i915_reg.h
parent5182f646c7615ede9a9ba3ecd241f6cbe16829dc (diff)
drm/i915/perf: Add OA unit support for Gen 8+
Enables access to OA unit metrics for BDW, CHV, SKL and BXT which all share (more-or-less) the same OA unit design. Of particular note in comparison to Haswell: some OA unit HW config state has become per-context state and as a consequence it is somewhat more complicated to manage synchronous state changes from the cpu while there's no guarantee of what context (if any) is currently actively running on the gpu. The periodic sampling frequency which can be particularly useful for system-wide analysis (as opposed to command stream synchronised MI_REPORT_PERF_COUNT commands) is perhaps the most surprising state to have become per-context save and restored (while the OABUFFER destination is still a shared, system-wide resource). This support for gen8+ takes care to consider a number of timing challenges involved in synchronously updating per-context state primarily by programming all config state from the cpu and updating all current and saved contexts synchronously while the OA unit is still disabled. The driver intentionally avoids depending on command streamer programming to update OA state considering the lack of synchronization between the automatic loading of OACTXCONTROL state (that includes the periodic sampling state and enable state) on context restore and the parsing of any general purpose BB the driver can control. I.e. this implementation is careful to avoid the possibility of a context restore temporarily enabling any out-of-date periodic sampling state. In addition to the risk of transiently-out-of-date state being loaded automatically; there are also internal HW latencies involved in the loading of MUX configurations which would be difficult to account for from the command streamer (and we only want to enable the unit when once the MUX configuration is complete). Since the Gen8+ OA unit design no longer supports clock gating the unit off for a single given context (which effectively stopped any progress of counters while any other context was running) and instead supports tagging OA reports with a context ID for filtering on the CPU, it means we can no longer hide the system-wide progress of counters from a non-privileged application only interested in metrics for its own context. Although we could theoretically try and subtract the progress of other contexts before forwarding reports via read() we aren't in a position to filter reports captured via MI_REPORT_PERF_COUNT commands. As a result, for Gen8+, we always require the dev.i915.perf_stream_paranoid to be unset for any access to OA metrics if not root. v5: Drain submitted requests when enabling metric set to ensure no lite-restore erases the context image we just updated (Lionel) v6: In addition to drain, switch to kernel context & update all context in place (Chris) v7: Add missing mutex_unlock() if switching to kernel context fails (Matthew) v8: Simplify OA period/flex-eu-counters programming by using the batchbuffer instead of modifying ctx-image (Lionel) v9: Back to updating the context image (due to erroneous testing, batchbuffer programming the OA unit doesn't actually work) (Lionel) Pin context before updating context image (Chris) Drop MMIO programming now that we switch to a kernel context with right values in initial context image (Chris) v10: Just pin_map the contexts we want to modify or let the configuration happen on first use (Chris) v11: Update kernel context OA config through the batchbuffer rather than on the fly ctx-image update (Lionel) v12: Rework OA context registers update again by swithing away from user contexts and reconfiguring the kernel context through the batchbuffer and updating all the other contexts' context image. Also take care to lock slice/subslice configuration when OA is on. (Lionel) v13: Request rpcs updates on all engine when updating the OA config (Lionel) v14: Drop any kind of rpcs management now that we monitor sseu configuration changes in a later patch (Lionel) Remove usleep after programming the NOA configs on Gen8+, this doesn't seem to be needed (Lionel) v15: Respect coding style for block comments (Chris) v16: Add missing i915_add_request() in case we fail to emit OA configuration (Matthew) Signed-off-by: Robert Bragg <robert@sixbynine.org> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Matthew Auld <matthew.auld@intel.com> \o/ Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_reg.h')
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h22
1 files changed, 22 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 88e4707f571d..bd535f12db18 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -656,6 +656,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GEN8_OACTXID _MMIO(0x2364)
+#define GEN8_OA_DEBUG _MMIO(0x2B04)
+#define GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS (1<<5)
+#define GEN9_OA_DEBUG_INCLUDE_CLK_RATIO (1<<6)
+#define GEN9_OA_DEBUG_DISABLE_GO_1_0_REPORTS (1<<2)
+#define GEN9_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1<<1)
+
#define GEN8_OACONTROL _MMIO(0x2B00)
#define GEN8_OA_REPORT_FORMAT_A12 (0<<2)
#define GEN8_OA_REPORT_FORMAT_A12_B8_C8 (2<<2)
@@ -677,6 +683,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GEN7_OABUFFER_STOP_RESUME_ENABLE (1<<1)
#define GEN7_OABUFFER_RESUME (1<<0)
+#define GEN8_OABUFFER_UDW _MMIO(0x23b4)
#define GEN8_OABUFFER _MMIO(0x2b14)
#define GEN7_OASTATUS1 _MMIO(0x2364)
@@ -695,7 +702,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define GEN8_OASTATUS_REPORT_LOST (1<<0)
#define GEN8_OAHEADPTR _MMIO(0x2B0C)
+#define GEN8_OAHEADPTR_MASK 0xffffffc0
#define GEN8_OATAILPTR _MMIO(0x2B10)
+#define GEN8_OATAILPTR_MASK 0xffffffc0
#define OABUFFER_SIZE_128K (0<<3)
#define OABUFFER_SIZE_256K (1<<3)
@@ -708,7 +717,17 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define OA_MEM_SELECT_GGTT (1<<0)
+/*
+ * Flexible, Aggregate EU Counter Registers.
+ * Note: these aren't contiguous
+ */
#define EU_PERF_CNTL0 _MMIO(0xe458)
+#define EU_PERF_CNTL1 _MMIO(0xe558)
+#define EU_PERF_CNTL2 _MMIO(0xe658)
+#define EU_PERF_CNTL3 _MMIO(0xe758)
+#define EU_PERF_CNTL4 _MMIO(0xe45c)
+#define EU_PERF_CNTL5 _MMIO(0xe55c)
+#define EU_PERF_CNTL6 _MMIO(0xe65c)
#define GDT_CHICKEN_BITS _MMIO(0x9840)
#define GT_NOA_ENABLE 0x00000080
@@ -2494,6 +2513,9 @@ enum skl_disp_power_wells {
#define GEN8_RC_SEMA_IDLE_MSG_DISABLE (1 << 12)
#define GEN8_FF_DOP_CLOCK_GATE_DISABLE (1<<10)
+#define GEN6_RCS_PWR_FSM _MMIO(0x22ac)
+#define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
+
/* Fuse readout registers for GT */
#define CHV_FUSE_GT _MMIO(VLV_DISPLAY_BASE + 0x2168)
#define CHV_FGT_DISABLE_SS0 (1 << 10)