diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 272 |
1 files changed, 156 insertions, 116 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 0db9f488da7e..0f2976507e48 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1580,10 +1580,10 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @se_num: shader engine to address * @sh_num: sh block to address + * @instance: Certain registers are instanced per SE or SH. + * 0xffffffff means broadcast to all SEs or SHs (CIK). * - * Select which SE, SH combinations to address. Certain - * registers are instanced per SE or SH. 0xffffffff means - * broadcast to all SEs or SHs (CIK). + * Select which SE, SH combinations to address. */ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) @@ -1779,8 +1779,6 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev, * gfx_v7_0_setup_rb - setup the RBs on the asic * * @adev: amdgpu_device pointer - * @se_num: number of SEs (shader engines) for the asic - * @sh_per_se: number of SH blocks per SE for the asic * * Configures per-SE/SH RB registers (CIK). */ @@ -1841,6 +1839,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +#define DEFAULT_SH_MEM_BASES (0x6000) /** * gfx_v7_0_init_compute_vmid - gart enable * @@ -1849,9 +1848,6 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) * Initialize compute vmid sh_mem registers * */ -#define DEFAULT_SH_MEM_BASES (0x6000) -#define FIRST_COMPUTE_VMID (8) -#define LAST_COMPUTE_VMID (16) static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -1869,7 +1865,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT; mutex_lock(&adev->srbm_mutex); - for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { cik_srbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32(mmSH_MEM_CONFIG, sh_mem_config); @@ -1879,6 +1875,33 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + + /* Initialize all compute VMIDs to have no GDS, GWS, or OA + access. These should be enabled by FW for target VMIDs. */ + for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { + WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[i].gws, 0); + WREG32(amdgpu_gds_reg_offset[i].oa, 0); + } +} + +static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); + } } static void gfx_v7_0_config_init(struct amdgpu_device *adev) @@ -1959,6 +1982,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); gfx_v7_0_init_compute_vmid(adev); + gfx_v7_0_init_gds_vmid(adev); WREG32(mmSX_DEBUG_1, 0x20); @@ -2025,30 +2049,9 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev) udelay(50); } -/* - * GPU scratch registers helpers function. - */ -/** - * gfx_v7_0_scratch_init - setup driver info for CP scratch regs - * - * @adev: amdgpu_device pointer - * - * Set up the number and offset of the CP scratch registers. - * NOTE: use of CP scratch registers is a legacy inferface and - * is not used by default on newer asics (r6xx+). On newer asics, - * memory buffers are used for fences rather than scratch regs. - */ -static void gfx_v7_0_scratch_init(struct amdgpu_device *adev) -{ - adev->gfx.scratch.num_reg = 8; - adev->gfx.scratch.reg_base = mmSCRATCH_REG0; - adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; -} - /** * gfx_v7_0_ring_test_ring - basic gfx ring test * - * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information * * Allocate a scratch register and write to it using the gfx ring (CIK). @@ -2059,44 +2062,35 @@ static void gfx_v7_0_scratch_init(struct amdgpu_device *adev) static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t scratch; uint32_t tmp = 0; unsigned i; int r; - r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) - return r; - - WREG32(scratch, 0xCAFEDEAD); + WREG32(mmSCRATCH_REG0, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) - goto error_free_scratch; + return r; amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(scratch); + tmp = RREG32(mmSCRATCH_REG0); if (tmp == 0xDEADBEEF) break; udelay(1); } if (i >= adev->usec_timeout) r = -ETIMEDOUT; - -error_free_scratch: - amdgpu_gfx_scratch_free(adev, scratch); return r; } /** - * gfx_v7_0_ring_emit_hdp - emit an hdp flush on the cp + * gfx_v7_0_ring_emit_hdp_flush - emit an hdp flush on the cp * - * @adev: amdgpu_device pointer - * @ridx: amdgpu ring index + * @ring: amdgpu_ring structure holding ring information * * Emits an hdp flush on the cp. */ @@ -2145,10 +2139,12 @@ static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) /** * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring * - * @adev: amdgpu_device pointer - * @fence: amdgpu fence object + * @ring: amdgpu_ring structure holding ring information + * @addr: address + * @seq: sequence number + * @flags: fence related flags * - * Emits a fence sequnce number on the gfx ring and flushes + * Emits a fence sequence number on the gfx ring and flushes * GPU caches. */ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, @@ -2186,10 +2182,12 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, /** * gfx_v7_0_ring_emit_fence_compute - emit a fence on the compute ring * - * @adev: amdgpu_device pointer - * @fence: amdgpu fence object + * @ring: amdgpu_ring structure holding ring information + * @addr: address + * @seq: sequence number + * @flags: fence related flags * - * Emits a fence sequnce number on the compute ring and flushes + * Emits a fence sequence number on the compute ring and flushes * GPU caches. */ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, @@ -2216,15 +2214,17 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, * IB stuff */ /** - * gfx_v7_0_ring_emit_ib - emit an IB (Indirect Buffer) on the ring + * gfx_v7_0_ring_emit_ib_gfx - emit an IB (Indirect Buffer) on the ring * * @ring: amdgpu_ring structure holding ring information + * @job: job to retrieve vmid from * @ib: amdgpu indirect buffer object + * @flags: options (AMDGPU_HAVE_CTX_SWITCH) * * Emits an DE (drawing engine) or CE (constant engine) IB * on the gfx ring. IBs are usually generated by userspace * acceleration drivers and submitted to the kernel for - * sheduling on the ring. This function schedules the IB + * scheduling on the ring. This function schedules the IB * on the gfx ring for execution by the GPU. */ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, @@ -2316,6 +2316,7 @@ static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) * gfx_v7_0_ring_test_ib - basic ring IB test * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Allocate an IB and execute it on the gfx ring (CIK). * Provides a basic gfx ring test to verify that IBs are working. @@ -2326,53 +2327,46 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; struct dma_fence *f = NULL; - uint32_t scratch; uint32_t tmp = 0; long r; - r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) - return r; - - WREG32(scratch, 0xCAFEDEAD); + WREG32(mmSCRATCH_REG0, 0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 256, &ib); + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); if (r) - goto err1; + return r; ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); + ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START; ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) - goto err2; + goto error; r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { r = -ETIMEDOUT; - goto err2; + goto error; } else if (r < 0) { - goto err2; + goto error; } - tmp = RREG32(scratch); + tmp = RREG32(mmSCRATCH_REG0); if (tmp == 0xDEADBEEF) r = 0; else r = -EINVAL; -err2: +error: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); -err1: - amdgpu_gfx_scratch_free(adev, scratch); return r; } /* * CP. - * On CIK, gfx and compute now have independant command processors. + * On CIK, gfx and compute now have independent command processors. * * GFX * Gfx consists of a single ring and can process both gfx jobs and @@ -2403,15 +2397,12 @@ err1: */ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { - int i; - - if (enable) { + if (enable) WREG32(mmCP_ME_CNTL, 0); - } else { - WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK)); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].sched.ready = false; - } + else + WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | + CP_ME_CNTL__PFP_HALT_MASK | + CP_ME_CNTL__CE_HALT_MASK)); udelay(50); } @@ -2604,7 +2595,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); /* set the wb address wether it's enabled or not */ - rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + rptr_addr = ring->rptr_gpu_addr; WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); @@ -2629,7 +2620,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring) { - return ring->adev->wb.wb[ring->rptr_offs]; + return *ring->rptr_cpu_addr; } static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) @@ -2650,7 +2641,7 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ - return ring->adev->wb.wb[ring->wptr_offs]; + return *ring->wptr_cpu_addr; } static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) @@ -2658,7 +2649,7 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } @@ -2672,15 +2663,11 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) */ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) { - int i; - - if (enable) { + if (enable) WREG32(mmCP_MEC_CNTL, 0); - } else { - WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); - for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].sched.ready = false; - } + else + WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | + CP_MEC_CNTL__MEC_ME2_HALT_MASK)); udelay(50); } @@ -2958,12 +2945,12 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + wb_gpu_addr = ring->wptr_gpu_addr; mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + wb_gpu_addr = ring->rptr_gpu_addr; mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; mqd->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; @@ -3017,7 +3004,7 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, mqd->cp_hqd_active = 1; } -int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) { uint32_t tmp; uint32_t mqd_reg; @@ -3173,9 +3160,9 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev) } /** - * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP + * gfx_v7_0_ring_emit_pipeline_sync - cik vm flush using the CP * - * @ring: the ring to emmit the commands to + * @ring: the ring to emit the commands to * * Sync the command pipeline with the PFP. E.g. wait for everything * to be completed. @@ -3197,7 +3184,7 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 4); /* poll interval */ if (usepfp) { - /* synce CE with ME to prevent CE fetch CEIB before context switch done */ + /* sync CE with ME to prevent CE fetch CEIB before context switch done */ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); amdgpu_ring_write(ring, 0); amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); @@ -3214,7 +3201,9 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) /** * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP * - * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using the CP (CIK). @@ -3318,6 +3307,10 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) return r; } + /* init spm vmid with 0xf */ + if (adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + return 0; } @@ -3542,6 +3535,22 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) return 0; } +static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +{ + u32 data; + + amdgpu_gfx_off_ctrl(adev, false); + + data = RREG32(mmRLC_SPM_VMID); + + data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK; + data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT; + + WREG32(mmRLC_SPM_VMID, data); + + amdgpu_gfx_off_ctrl(adev, true); +} + static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) { u32 data, orig, tmp, tmp2; @@ -4157,6 +4166,7 @@ static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); + dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, @@ -4193,7 +4203,8 @@ static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { .resume = gfx_v7_0_rlc_resume, .stop = gfx_v7_0_rlc_stop, .reset = gfx_v7_0_rlc_reset, - .start = gfx_v7_0_rlc_start + .start = gfx_v7_0_rlc_start, + .update_spm_vmid = gfx_v7_0_update_spm_vmid }; static int gfx_v7_0_early_init(void *handle) @@ -4201,7 +4212,8 @@ static int gfx_v7_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), + AMDGPU_MAX_COMPUTE_RINGS); adev->gfx.funcs = &gfx_v7_0_gfx_funcs; adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; gfx_v7_0_set_ring_funcs(adev); @@ -4230,7 +4242,7 @@ static int gfx_v7_0_late_init(void *handle) static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; - u32 mc_shared_chmap, mc_arb_ramcfg; + u32 mc_arb_ramcfg; u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; u32 tmp; @@ -4307,10 +4319,14 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) break; } - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; + adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); + adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; adev->gfx.config.mem_max_burst_length_bytes = 256; if (adev->flags & AMD_IS_APU) { @@ -4390,7 +4406,8 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); + &adev->gfx.eop_irq, irq_type, + AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) return r; @@ -4436,8 +4453,6 @@ static int gfx_v7_0_sw_init(void *handle) if (r) return r; - gfx_v7_0_scratch_init(adev); - r = gfx_v7_0_init_microcode(adev); if (r) { DRM_ERROR("Failed to load gfx firmware!\n"); @@ -4462,7 +4477,9 @@ static int gfx_v7_0_sw_init(void *handle) ring->ring_obj = NULL; sprintf(ring->name, "gfx"); r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP); + &adev->gfx.eop_irq, + AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, + AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) return r; } @@ -4526,6 +4543,8 @@ static int gfx_v7_0_hw_init(void *handle) gfx_v7_0_constants_init(adev); + /* init CSB */ + adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* init rlc */ r = adev->gfx.rlc.funcs->resume(adev); if (r) @@ -4950,6 +4969,32 @@ static int gfx_v7_0_set_powergating_state(void *handle, return 0; } +static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + +static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); + amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | + PACKET3_TC_ACTION_ENA | + PACKET3_SH_KCACHE_ACTION_ENA | + PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */ + amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ + amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ + amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ + amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ +} + static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .name = "gfx_v7_0", .early_init = gfx_v7_0_early_init, @@ -4982,7 +5027,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ - 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ + 3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/ + 5, /* SURFACE_SYNC */ .emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */ .emit_ib = gfx_v7_0_ring_emit_ib_gfx, .emit_fence = gfx_v7_0_ring_emit_fence_gfx, @@ -4997,6 +5043,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, .emit_wreg = gfx_v7_0_ring_emit_wreg, .soft_recovery = gfx_v7_0_ring_soft_recovery, + .emit_mem_sync = gfx_v7_0_emit_mem_sync, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { @@ -5013,7 +5060,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { 5 + /* hdp invalidate */ 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */ - 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ + 7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ + 7, /* gfx_v7_0_emit_mem_sync_compute */ .emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */ .emit_ib = gfx_v7_0_ring_emit_ib_compute, .emit_fence = gfx_v7_0_ring_emit_fence_compute, @@ -5026,6 +5074,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v7_0_ring_emit_wreg, + .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute, }; static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) @@ -5131,15 +5180,6 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->lds_size = 64; } -const struct amdgpu_ip_block_version gfx_v7_0_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_GFX, - .major = 7, - .minor = 0, - .rev = 0, - .funcs = &gfx_v7_0_ip_funcs, -}; - const struct amdgpu_ip_block_version gfx_v7_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, |