aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c52
1 files changed, 40 insertions, 12 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5d8b30fd4534..cc8ad3831982 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -60,6 +60,7 @@
#include "amdgpu_pm.h"
#include "amdgpu_xgmi.h"
+#include "amdgpu_ras.h"
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
@@ -1506,7 +1507,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return -EAGAIN;
}
- adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
+ adev->pm.pp_feature = amdgpu_pp_feature_mask;
+ if (amdgpu_sriov_vf(adev))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
for (i = 0; i < adev->num_ip_blocks; i++) {
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
@@ -1638,6 +1641,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
{
int i, r;
+ r = amdgpu_ras_init(adev);
+ if (r)
+ return r;
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
@@ -1681,6 +1688,13 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
}
}
+ r = amdgpu_ib_pool_init(adev);
+ if (r) {
+ dev_err(adev->dev, "IB initialization failed (%d).\n", r);
+ amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
+ goto init_failed;
+ }
+
r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
if (r)
goto init_failed;
@@ -1869,6 +1883,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
{
int i, r;
+ amdgpu_ras_pre_fini(adev);
+
if (adev->gmc.xgmi.num_physical_nodes > 1)
amdgpu_xgmi_remove_device(adev);
@@ -1917,6 +1933,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
amdgpu_free_static_csa(&adev->virt.csa_obj);
amdgpu_device_wb_fini(adev);
amdgpu_device_vram_scratch_fini(adev);
+ amdgpu_ib_pool_fini(adev);
}
r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
@@ -1937,6 +1954,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
adev->ip_blocks[i].status.late_initialized = false;
}
+ amdgpu_ras_fini(adev);
+
if (amdgpu_sriov_vf(adev))
if (amdgpu_virt_release_full_gpu(adev, false))
DRM_ERROR("failed to release exclusive mode on fini\n");
@@ -1999,6 +2018,10 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
r = amdgpu_device_enable_mgpu_fan_boost();
if (r)
DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+ /*set to low pstate by default */
+ amdgpu_xgmi_set_pstate(adev, 0);
+
}
static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
@@ -2369,7 +2392,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
adev->asic_reset_res = amdgpu_asic_reset(adev);
if (adev->asic_reset_res)
- DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
+ DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
adev->asic_reset_res, adev->ddev->unique);
}
@@ -2448,6 +2471,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
mutex_init(&adev->lock_reset);
+ mutex_init(&adev->virt.dpm_mutex);
amdgpu_device_check_arguments(adev);
@@ -2642,13 +2666,6 @@ fence_driver_init:
/* Get a log2 for easy divisions. */
adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
- r = amdgpu_ib_pool_init(adev);
- if (r) {
- dev_err(adev->dev, "IB initialization failed (%d).\n", r);
- amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
- goto failed;
- }
-
amdgpu_fbdev_init(adev);
r = amdgpu_pm_sysfs_init(adev);
@@ -2694,6 +2711,9 @@ fence_driver_init:
goto failed;
}
+ /* must succeed. */
+ amdgpu_ras_post_init(adev);
+
return 0;
failed:
@@ -2726,7 +2746,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
else
drm_atomic_helper_shutdown(adev->ddev);
}
- amdgpu_ib_pool_fini(adev);
amdgpu_fence_driver_fini(adev);
amdgpu_pm_sysfs_fini(adev);
amdgpu_fbdev_fini(adev);
@@ -3165,6 +3184,7 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
/* No need to recover an evicted BO */
if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
+ shadow->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET ||
shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
continue;
@@ -3224,6 +3244,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
if (r)
return r;
+ amdgpu_amdkfd_pre_reset(adev);
+
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
@@ -3243,6 +3265,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
+ amdgpu_amdkfd_post_reset(adev);
error:
amdgpu_virt_init_data_exchange(adev);
@@ -3375,7 +3398,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
r = amdgpu_asic_reset(tmp_adev);
if (r) {
- DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s",
+ DRM_ERROR("ASIC reset failed with error, %d for drm dev, %s",
r, tmp_adev->ddev->unique);
break;
}
@@ -3392,6 +3415,11 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
break;
}
}
+
+ list_for_each_entry(tmp_adev, device_list_handle,
+ gmc.xgmi.head) {
+ amdgpu_ras_reserve_bad_pages(tmp_adev);
+ }
}
}
@@ -3410,7 +3438,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
if (vram_lost) {
- DRM_ERROR("VRAM is lost!\n");
+ DRM_INFO("VRAM is lost due to GPU reset!\n");
atomic_inc(&tmp_adev->vram_lost_counter);
}