aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c272
1 files changed, 156 insertions, 116 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 0db9f488da7e..0f2976507e48 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1580,10 +1580,10 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
* @adev: amdgpu_device pointer
* @se_num: shader engine to address
* @sh_num: sh block to address
+ * @instance: Certain registers are instanced per SE or SH.
+ * 0xffffffff means broadcast to all SEs or SHs (CIK).
*
- * Select which SE, SH combinations to address. Certain
- * registers are instanced per SE or SH. 0xffffffff means
- * broadcast to all SEs or SHs (CIK).
+ * Select which SE, SH combinations to address.
*/
static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
u32 se_num, u32 sh_num, u32 instance)
@@ -1779,8 +1779,6 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev,
* gfx_v7_0_setup_rb - setup the RBs on the asic
*
* @adev: amdgpu_device pointer
- * @se_num: number of SEs (shader engines) for the asic
- * @sh_per_se: number of SH blocks per SE for the asic
*
* Configures per-SE/SH RB registers (CIK).
*/
@@ -1841,6 +1839,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
mutex_unlock(&adev->grbm_idx_mutex);
}
+#define DEFAULT_SH_MEM_BASES (0x6000)
/**
* gfx_v7_0_init_compute_vmid - gart enable
*
@@ -1849,9 +1848,6 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
* Initialize compute vmid sh_mem registers
*
*/
-#define DEFAULT_SH_MEM_BASES (0x6000)
-#define FIRST_COMPUTE_VMID (8)
-#define LAST_COMPUTE_VMID (16)
static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
{
int i;
@@ -1869,7 +1865,7 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
sh_mem_config |= MTYPE_NONCACHED << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT;
mutex_lock(&adev->srbm_mutex);
- for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
cik_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32(mmSH_MEM_CONFIG, sh_mem_config);
@@ -1879,6 +1875,33 @@ static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev)
}
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ access. These should be enabled by FW for target VMIDs. */
+ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
+ WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[i].oa, 0);
+ }
+}
+
+static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev)
+{
+ int vmid;
+
+ /*
+ * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
+ * access. Compute VMIDs should be enabled by FW for target VMIDs,
+ * the driver can enable them for graphics. VMID0 should maintain
+ * access so that HWS firmware can save/restore entries.
+ */
+ for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[vmid].oa, 0);
+ }
}
static void gfx_v7_0_config_init(struct amdgpu_device *adev)
@@ -1959,6 +1982,7 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
mutex_unlock(&adev->srbm_mutex);
gfx_v7_0_init_compute_vmid(adev);
+ gfx_v7_0_init_gds_vmid(adev);
WREG32(mmSX_DEBUG_1, 0x20);
@@ -2025,30 +2049,9 @@ static void gfx_v7_0_constants_init(struct amdgpu_device *adev)
udelay(50);
}
-/*
- * GPU scratch registers helpers function.
- */
-/**
- * gfx_v7_0_scratch_init - setup driver info for CP scratch regs
- *
- * @adev: amdgpu_device pointer
- *
- * Set up the number and offset of the CP scratch registers.
- * NOTE: use of CP scratch registers is a legacy inferface and
- * is not used by default on newer asics (r6xx+). On newer asics,
- * memory buffers are used for fences rather than scratch regs.
- */
-static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
-{
- adev->gfx.scratch.num_reg = 8;
- adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
- adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
-}
-
/**
* gfx_v7_0_ring_test_ring - basic gfx ring test
*
- * @adev: amdgpu_device pointer
* @ring: amdgpu_ring structure holding ring information
*
* Allocate a scratch register and write to it using the gfx ring (CIK).
@@ -2059,44 +2062,35 @@ static void gfx_v7_0_scratch_init(struct amdgpu_device *adev)
static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- uint32_t scratch;
uint32_t tmp = 0;
unsigned i;
int r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
- goto error_free_scratch;
+ return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
+ amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
break;
udelay(1);
}
if (i >= adev->usec_timeout)
r = -ETIMEDOUT;
-
-error_free_scratch:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
/**
- * gfx_v7_0_ring_emit_hdp - emit an hdp flush on the cp
+ * gfx_v7_0_ring_emit_hdp_flush - emit an hdp flush on the cp
*
- * @adev: amdgpu_device pointer
- * @ridx: amdgpu ring index
+ * @ring: amdgpu_ring structure holding ring information
*
* Emits an hdp flush on the cp.
*/
@@ -2145,10 +2139,12 @@ static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
/**
* gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
*
- * @adev: amdgpu_device pointer
- * @fence: amdgpu fence object
+ * @ring: amdgpu_ring structure holding ring information
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
*
- * Emits a fence sequnce number on the gfx ring and flushes
+ * Emits a fence sequence number on the gfx ring and flushes
* GPU caches.
*/
static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
@@ -2186,10 +2182,12 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
/**
* gfx_v7_0_ring_emit_fence_compute - emit a fence on the compute ring
*
- * @adev: amdgpu_device pointer
- * @fence: amdgpu fence object
+ * @ring: amdgpu_ring structure holding ring information
+ * @addr: address
+ * @seq: sequence number
+ * @flags: fence related flags
*
- * Emits a fence sequnce number on the compute ring and flushes
+ * Emits a fence sequence number on the compute ring and flushes
* GPU caches.
*/
static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
@@ -2216,15 +2214,17 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
* IB stuff
*/
/**
- * gfx_v7_0_ring_emit_ib - emit an IB (Indirect Buffer) on the ring
+ * gfx_v7_0_ring_emit_ib_gfx - emit an IB (Indirect Buffer) on the ring
*
* @ring: amdgpu_ring structure holding ring information
+ * @job: job to retrieve vmid from
* @ib: amdgpu indirect buffer object
+ * @flags: options (AMDGPU_HAVE_CTX_SWITCH)
*
* Emits an DE (drawing engine) or CE (constant engine) IB
* on the gfx ring. IBs are usually generated by userspace
* acceleration drivers and submitted to the kernel for
- * sheduling on the ring. This function schedules the IB
+ * scheduling on the ring. This function schedules the IB
* on the gfx ring for execution by the GPU.
*/
static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
@@ -2316,6 +2316,7 @@ static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
* gfx_v7_0_ring_test_ib - basic ring IB test
*
* @ring: amdgpu_ring structure holding ring information
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
*
* Allocate an IB and execute it on the gfx ring (CIK).
* Provides a basic gfx ring test to verify that IBs are working.
@@ -2326,53 +2327,46 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct dma_fence *f = NULL;
- uint32_t scratch;
uint32_t tmp = 0;
long r;
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r)
- return r;
-
- WREG32(scratch, 0xCAFEDEAD);
+ WREG32(mmSCRATCH_REG0, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
- goto err1;
+ return r;
ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
- ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
+ ib.ptr[1] = mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START;
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
- goto err2;
+ goto error;
r = dma_fence_wait_timeout(f, false, timeout);
if (r == 0) {
r = -ETIMEDOUT;
- goto err2;
+ goto error;
} else if (r < 0) {
- goto err2;
+ goto error;
}
- tmp = RREG32(scratch);
+ tmp = RREG32(mmSCRATCH_REG0);
if (tmp == 0xDEADBEEF)
r = 0;
else
r = -EINVAL;
-err2:
+error:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
-err1:
- amdgpu_gfx_scratch_free(adev, scratch);
return r;
}
/*
* CP.
- * On CIK, gfx and compute now have independant command processors.
+ * On CIK, gfx and compute now have independent command processors.
*
* GFX
* Gfx consists of a single ring and can process both gfx jobs and
@@ -2403,15 +2397,12 @@ err1:
*/
static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
{
- int i;
-
- if (enable) {
+ if (enable)
WREG32(mmCP_ME_CNTL, 0);
- } else {
- WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
- for (i = 0; i < adev->gfx.num_gfx_rings; i++)
- adev->gfx.gfx_ring[i].sched.ready = false;
- }
+ else
+ WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK |
+ CP_ME_CNTL__PFP_HALT_MASK |
+ CP_ME_CNTL__CE_HALT_MASK));
udelay(50);
}
@@ -2604,7 +2595,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
/* set the wb address wether it's enabled or not */
- rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ rptr_addr = ring->rptr_gpu_addr;
WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
@@ -2629,7 +2620,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
static u64 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring)
{
- return ring->adev->wb.wb[ring->rptr_offs];
+ return *ring->rptr_cpu_addr;
}
static u64 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
@@ -2650,7 +2641,7 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
static u64 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
/* XXX check if swapping is necessary on BE */
- return ring->adev->wb.wb[ring->wptr_offs];
+ return *ring->wptr_cpu_addr;
}
static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
@@ -2658,7 +2649,7 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
/* XXX check if swapping is necessary on BE */
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
+ *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
}
@@ -2672,15 +2663,11 @@ static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
*/
static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
{
- int i;
-
- if (enable) {
+ if (enable)
WREG32(mmCP_MEC_CNTL, 0);
- } else {
- WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
- for (i = 0; i < adev->gfx.num_compute_rings; i++)
- adev->gfx.compute_ring[i].sched.ready = false;
- }
+ else
+ WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK |
+ CP_MEC_CNTL__MEC_ME2_HALT_MASK));
udelay(50);
}
@@ -2958,12 +2945,12 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+ wb_gpu_addr = ring->wptr_gpu_addr;
mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
/* set the wb address wether it's enabled or not */
- wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+ wb_gpu_addr = ring->rptr_gpu_addr;
mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
mqd->cp_hqd_pq_rptr_report_addr_hi =
upper_32_bits(wb_gpu_addr) & 0xffff;
@@ -3017,7 +3004,7 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
mqd->cp_hqd_active = 1;
}
-int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
+static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
{
uint32_t tmp;
uint32_t mqd_reg;
@@ -3173,9 +3160,9 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
}
/**
- * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
+ * gfx_v7_0_ring_emit_pipeline_sync - cik vm flush using the CP
*
- * @ring: the ring to emmit the commands to
+ * @ring: the ring to emit the commands to
*
* Sync the command pipeline with the PFP. E.g. wait for everything
* to be completed.
@@ -3197,7 +3184,7 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 4); /* poll interval */
if (usepfp) {
- /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+ /* sync CE with ME to prevent CE fetch CEIB before context switch done */
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
@@ -3214,7 +3201,9 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
/**
* gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
*
- * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring pointer
+ * @vmid: vmid number to use
+ * @pd_addr: address
*
* Update the page table base and flush the VM TLB
* using the CP (CIK).
@@ -3318,6 +3307,10 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
return r;
}
+ /* init spm vmid with 0xf */
+ if (adev->gfx.rlc.funcs->update_spm_vmid)
+ adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
+
return 0;
}
@@ -3542,6 +3535,22 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
return 0;
}
+static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
+{
+ u32 data;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+
+ data = RREG32(mmRLC_SPM_VMID);
+
+ data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK;
+ data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT;
+
+ WREG32(mmRLC_SPM_VMID, data);
+
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
{
u32 data, orig, tmp, tmp2;
@@ -4157,6 +4166,7 @@ static void gfx_v7_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
}
static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
@@ -4193,7 +4203,8 @@ static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
.resume = gfx_v7_0_rlc_resume,
.stop = gfx_v7_0_rlc_stop,
.reset = gfx_v7_0_rlc_reset,
- .start = gfx_v7_0_rlc_start
+ .start = gfx_v7_0_rlc_start,
+ .update_spm_vmid = gfx_v7_0_update_spm_vmid
};
static int gfx_v7_0_early_init(void *handle)
@@ -4201,7 +4212,8 @@ static int gfx_v7_0_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
+ AMDGPU_MAX_COMPUTE_RINGS);
adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
gfx_v7_0_set_ring_funcs(adev);
@@ -4230,7 +4242,7 @@ static int gfx_v7_0_late_init(void *handle)
static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
{
u32 gb_addr_config;
- u32 mc_shared_chmap, mc_arb_ramcfg;
+ u32 mc_arb_ramcfg;
u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
u32 tmp;
@@ -4307,10 +4319,14 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
break;
}
- mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
+ adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFBANK);
+ adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
+ MC_ARB_RAMCFG, NOOFRANKS);
+
adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
adev->gfx.config.mem_max_burst_length_bytes = 256;
if (adev->flags & AMD_IS_APU) {
@@ -4390,7 +4406,8 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, irq_type);
+ &adev->gfx.eop_irq, irq_type,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
@@ -4436,8 +4453,6 @@ static int gfx_v7_0_sw_init(void *handle)
if (r)
return r;
- gfx_v7_0_scratch_init(adev);
-
r = gfx_v7_0_init_microcode(adev);
if (r) {
DRM_ERROR("Failed to load gfx firmware!\n");
@@ -4462,7 +4477,9 @@ static int gfx_v7_0_sw_init(void *handle)
ring->ring_obj = NULL;
sprintf(ring->name, "gfx");
r = amdgpu_ring_init(adev, ring, 1024,
- &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
+ &adev->gfx.eop_irq,
+ AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
}
@@ -4526,6 +4543,8 @@ static int gfx_v7_0_hw_init(void *handle)
gfx_v7_0_constants_init(adev);
+ /* init CSB */
+ adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
/* init rlc */
r = adev->gfx.rlc.funcs->resume(adev);
if (r)
@@ -4950,6 +4969,32 @@ static int gfx_v7_0_set_powergating_state(void *handle,
return 0;
}
+static void gfx_v7_0_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+ PACKET3_TC_ACTION_ENA |
+ PACKET3_SH_KCACHE_ACTION_ENA |
+ PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
+static void gfx_v7_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
+ amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+ PACKET3_TC_ACTION_ENA |
+ PACKET3_SH_KCACHE_ACTION_ENA |
+ PACKET3_SH_ICACHE_ACTION_ENA); /* CP_COHER_CNTL */
+ amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
+ amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
+ amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
+ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
+}
+
static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
.name = "gfx_v7_0",
.early_init = gfx_v7_0_early_init,
@@ -4982,7 +5027,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
- 3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+ 3 + 4 + /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
+ 5, /* SURFACE_SYNC */
.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
@@ -4997,6 +5043,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
.emit_wreg = gfx_v7_0_ring_emit_wreg,
.soft_recovery = gfx_v7_0_ring_soft_recovery,
+ .emit_mem_sync = gfx_v7_0_emit_mem_sync,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5013,7 +5060,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
5 + /* hdp invalidate */
7 + /* gfx_v7_0_ring_emit_pipeline_sync */
CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
- 7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+ 7 + 7 + 7 + /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
+ 7, /* gfx_v7_0_emit_mem_sync_compute */
.emit_ib_size = 7, /* gfx_v7_0_ring_emit_ib_compute */
.emit_ib = gfx_v7_0_ring_emit_ib_compute,
.emit_fence = gfx_v7_0_ring_emit_fence_compute,
@@ -5026,6 +5074,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v7_0_ring_emit_wreg,
+ .emit_mem_sync = gfx_v7_0_emit_mem_sync_compute,
};
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5131,15 +5180,6 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
cu_info->lds_size = 64;
}
-const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_GFX,
- .major = 7,
- .minor = 0,
- .rev = 0,
- .funcs = &gfx_v7_0_ip_funcs,
-};
-
const struct amdgpu_ip_block_version gfx_v7_1_ip_block =
{
.type = AMD_IP_BLOCK_TYPE_GFX,