summaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/arm/Kbuild17
-rw-r--r--drivers/gpu/arm/Kconfig19
-rw-r--r--drivers/gpu/arm/midgard/Kbuild241
-rw-r--r--drivers/gpu/arm/midgard/Kconfig209
-rw-r--r--drivers/gpu/arm/midgard/Makefile37
-rw-r--r--drivers/gpu/arm/midgard/Makefile.kbase17
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/Kbuild64
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h29
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c22
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h26
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c284
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h24
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c116
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h67
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c124
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c85
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c536
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h62
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h45
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h39
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c471
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c385
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h112
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c1477
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h185
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c1407
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h83
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c296
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h140
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c316
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h44
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c295
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h59
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c63
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h77
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c375
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c179
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h92
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c65
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h40
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c69
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h63
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h555
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c72
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h60
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c1264
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h557
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c532
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c39
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c895
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h227
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c107
-rw-r--r--drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h57
-rw-r--r--drivers/gpu/arm/midgard/docs/Doxyfile126
-rw-r--r--drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot112
-rw-r--r--drivers/gpu/arm/midgard/docs/policy_overview.dot63
-rw-r--r--drivers/gpu/arm/midgard/mali_base_hwconfig_features.h163
-rw-r--r--drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h786
-rw-r--r--drivers/gpu/arm/midgard/mali_base_kernel.h1613
-rw-r--r--drivers/gpu/arm/midgard/mali_base_kernel_sync.h47
-rw-r--r--drivers/gpu/arm/midgard/mali_base_mem_priv.h52
-rw-r--r--drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h24
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase.h499
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c209
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h23
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_cache_policy.c61
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_cache_policy.h45
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_config.c51
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_config.h327
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_config_defaults.h259
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_context.c274
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_core_linux.c3930
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_cpuprops.c125
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_cpuprops.h53
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_debug.c39
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_debug.h164
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c251
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h25
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_defs.h1126
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_device.c659
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c76
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_event.c195
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_gator.h45
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_gator_api.c322
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_gator_api.h219
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h2159
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c98
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h38
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_gpuprops.c296
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_gpuprops.h54
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h91
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_hw.c210
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_hw.h52
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h54
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h37
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h35
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h116
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h283
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h206
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h53
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_instr.c157
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_instr.h85
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_jd.c1726
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c114
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h37
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_jm.c132
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_jm.h106
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_js.c3177
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_js.h1054
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c310
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h158
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_js_defs.h517
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_js_policy.h763
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c297
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h81
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_linux.h43
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem.c1363
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem.h774
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c292
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h32
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c402
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem_linux.c1909
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem_linux.h73
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h45
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c105
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h58
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h33
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mmu.c1653
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h121
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h44
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c188
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_platform_fake.c126
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_pm.c204
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_pm.h171
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h40
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_replay.c1134
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_security.c76
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_security.h52
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_smc.c77
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_smc.h58
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_softjobs.c442
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_sync.c182
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_sync.h91
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_sync_user.c157
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_tlstream.c1695
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_tlstream.h284
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_trace_defs.h264
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c232
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h356
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h134
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_uku.h561
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_utility.c33
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_utility.h37
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_vinstr.c485
-rw-r--r--drivers/gpu/arm/midgard/mali_kbase_vinstr.h102
-rw-r--r--drivers/gpu/arm/midgard/mali_linux_kbase_trace.h201
-rw-r--r--drivers/gpu/arm/midgard/mali_linux_trace.h211
-rw-r--r--drivers/gpu/arm/midgard/mali_malisw.h131
-rw-r--r--drivers/gpu/arm/midgard/mali_midg_regmap.h542
-rw-r--r--drivers/gpu/arm/midgard/mali_timeline.h396
-rw-r--r--drivers/gpu/arm/midgard/mali_uk.h141
-rw-r--r--drivers/gpu/arm/midgard/platform/Kbuild21
-rw-r--r--drivers/gpu/arm/midgard/platform/Kconfig24
-rw-r--r--drivers/gpu/arm/midgard/platform/devicetree/Kbuild16
-rw-r--r--drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c23
-rw-r--r--drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h86
-rw-r--r--drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c107
-rw-r--r--drivers/gpu/arm/midgard/platform/juno_soc/Kbuild19
-rw-r--r--drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c84
-rw-r--r--drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c172
-rw-r--r--drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h94
-rw-r--r--drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h26
-rw-r--r--drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h38
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress/Kbuild18
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h97
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c85
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c210
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h48
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild16
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h86
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c79
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild18
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h88
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c83
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c71
-rw-r--r--drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h28
-rw-r--r--drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h53
-rw-r--r--drivers/gpu/drm/pl111/Kbuild28
-rw-r--r--drivers/gpu/drm/pl111/Kconfig23
-rw-r--r--drivers/gpu/drm/pl111/Makefile32
-rw-r--r--drivers/gpu/drm/pl111/pl111_clcd_ext.h95
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm.h270
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_connector.c170
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_crtc.c449
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_cursor.c331
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_device.c336
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_dma_buf.c625
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_encoder.c107
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_fb.c202
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_funcs.h130
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_gem.c476
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_pl111.c417
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_platform.c151
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_suspend.c43
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm_vma.c308
205 files changed, 58813 insertions, 0 deletions
diff --git a/drivers/gpu/arm/Kbuild b/drivers/gpu/arm/Kbuild
new file mode 100644
index 00000000000..19c7e9a2659
--- /dev/null
+++ b/drivers/gpu/arm/Kbuild
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
+obj-$(CONFIG_MALI_MIDGARD) += midgard/
diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig
new file mode 100644
index 00000000000..1f30eb541d6
--- /dev/null
+++ b/drivers/gpu/arm/Kconfig
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
+menu "ARM GPU Configuration"
+source "drivers/gpu/arm/midgard/Kconfig"
+endmenu
diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild
new file mode 100644
index 00000000000..f4384983140
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Kbuild
@@ -0,0 +1,241 @@
+#
+# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= "r6p0-02rel0"
+
+# Paths required for build
+KBASE_PATH = $(src)
+KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
+UMP_PATH = $(src)/../../../base
+
+ifeq ($(CONFIG_MALI_ERROR_INJECTION),y)
+MALI_ERROR_INJECT_ON = 1
+endif
+
+# Set up defaults if not defined by build system
+MALI_CUSTOMER_RELEASE ?= 1
+MALI_UNIT_TEST ?= 0
+MALI_KERNEL_TEST_API ?= 0
+MALI_ERROR_INJECT_ON ?= 0
+MALI_MOCK_TEST ?= 0
+MALI_COVERAGE ?= 0
+MALI_INSTRUMENTATION_LEVEL ?= 0
+# This workaround is for what seems to be a compiler bug we observed in
+# GCC 4.7 on AOSP 4.3. The bug caused an intermittent failure compiling
+# the "_Pragma" syntax, where an error message is returned:
+#
+# "internal compiler error: unspellable token PRAGMA"
+#
+# This regression has thus far only been seen on the GCC 4.7 compiler bundled
+# with AOSP 4.3.0. So this makefile, intended for in-tree kernel builds
+# which are not known to be used with AOSP, is hardcoded to disable the
+# workaround, i.e. set the define to 0.
+MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0
+
+# Set up our defines, which will be passed to gcc
+DEFINES = \
+ -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+ -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \
+ -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+ -DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \
+ -DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \
+ -DMALI_COVERAGE=$(MALI_COVERAGE) \
+ -DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \
+ -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
+ -DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598)
+
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+else
+# out-of-tree
+DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)
+endif
+
+DEFINES += -I$(srctree)/drivers/staging/android
+
+# Use our defines when compiling
+ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux
+
+SRC := \
+ mali_kbase_device.c \
+ mali_kbase_cache_policy.c \
+ mali_kbase_mem.c \
+ mali_kbase_mmu.c \
+ mali_kbase_jd.c \
+ mali_kbase_jd_debugfs.c \
+ mali_kbase_jm.c \
+ mali_kbase_cpuprops.c \
+ mali_kbase_gpuprops.c \
+ mali_kbase_js.c \
+ mali_kbase_js_ctx_attr.c \
+ mali_kbase_event.c \
+ mali_kbase_context.c \
+ mali_kbase_pm.c \
+ mali_kbase_config.c \
+ mali_kbase_security.c \
+ mali_kbase_instr.c \
+ mali_kbase_vinstr.c \
+ mali_kbase_softjobs.c \
+ mali_kbase_10969_workaround.c \
+ mali_kbase_hw.c \
+ mali_kbase_utility.c \
+ mali_kbase_debug.c \
+ mali_kbase_trace_timeline.c \
+ mali_kbase_gpu_memory_debugfs.c \
+ mali_kbase_mem_linux.c \
+ mali_kbase_core_linux.c \
+ mali_kbase_sync.c \
+ mali_kbase_sync_user.c \
+ mali_kbase_replay.c \
+ mali_kbase_mem_profile_debugfs.c \
+ mali_kbase_mmu_mode_lpae.c \
+ mali_kbase_disjoint_events.c \
+ mali_kbase_gator_api.c \
+ mali_kbase_debug_mem_view.c \
+ mali_kbase_smc.c
+
+ifeq ($(CONFIG_MALI_MIPE_ENABLED),y)
+ SRC += mali_kbase_tlstream.c
+ ifeq ($(MALI_UNIT_TEST),1)
+ SRC += mali_kbase_tlstream_test.c
+ endif
+endif
+
+# Job Scheduler Policy: Completely Fair Scheduler
+SRC += mali_kbase_js_policy_cfs.c
+
+ifeq ($(CONFIG_MACH_MANTA),y)
+ SRC += mali_kbase_mem_alloc_carveout.c
+else
+ SRC += mali_kbase_mem_alloc.c
+endif
+
+ccflags-y += -I$(KBASE_PATH)
+
+# in-tree/out-of-tree logic needs to be slightly different to determine if a file is present
+ifeq ($(KBUILD_EXTMOD),)
+# in-tree
+MALI_METRICS_PATH = $(srctree)/drivers/gpu/arm/midgard
+else
+# out-of-tree
+MALI_METRICS_PATH = $(KBUILD_EXTMOD)
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y)
+ SRC += mali_kbase_platform_fake.c
+
+ ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y)
+ SRC += platform/vexpress/mali_kbase_config_vexpress.c \
+ platform/vexpress/mali_kbase_cpu_vexpress.c
+ ccflags-y += -I$(src)/platform/vexpress
+ endif
+
+ ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y)
+ SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c
+ ccflags-y += -I$(src)/platform/rtsm_ve
+ endif
+
+ ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y)
+ SRC += platform/juno/mali_kbase_config_vexpress.c
+ ccflags-y += -I$(src)/platform/juno
+ endif
+
+ ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y)
+ SRC += platform/juno_soc/mali_kbase_config_juno_soc.c
+ ccflags-y += -I$(src)/platform/juno_soc
+ endif
+
+ ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y)
+ SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
+ ccflags-y += -I$(src)/platform/vexpress_1xv7_a57
+ endif
+
+ ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y)
+ SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \
+ platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
+ ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz
+ endif
+
+ ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y)
+ SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \
+ platform/a7_kipling/mali_kbase_cpu_a7_kipling.c
+ ccflags-y += -I$(src)/platform/a7_kipling
+ endif
+
+ ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+ # remove begin and end quotes from the Kconfig string type
+ platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+ MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+ ifeq ($(CONFIG_MALI_MIDGARD),m)
+ include $(src)/platform/$(platform_name)/Kbuild
+ else ifeq ($(CONFIG_MALI_MIDGARD),y)
+ obj-$(CONFIG_MALI_MIDGARD) += platform/
+ endif
+ endif
+endif # CONFIG_MALI_PLATFORM_FAKE=y
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name)
+ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR)
+ifeq ($(CONFIG_MALI_MIDGARD),m)
+include $(src)/platform/$(platform_name)/Kbuild
+else ifeq ($(CONFIG_MALI_MIDGARD),y)
+obj-$(CONFIG_MALI_MIDGARD) += platform/
+endif
+endif
+
+ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y)
+ SRC += platform/devicetree/mali_kbase_runtime_pm.c
+ SRC += platform/devicetree/mali_kbase_config_devicetree.c
+ ccflags-y += -I$(src)/platform/devicetree
+endif
+
+# Tell the Linux build system from which .o file to create the kernel module
+obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o
+
+# Tell the Linux build system to enable building of our .c files
+mali_kbase-y := $(SRC:.c=.o)
+
+ifneq ($(wildcard $(src)/internal/Kbuild),)
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+# This include may set MALI_BACKEND_PATH and CONFIG_MALI_BACKEND_REAL
+include $(src)/internal/Kbuild
+mali_kbase-y += $(INTERNAL:.c=.o)
+endif
+endif
+
+MALI_BACKEND_PATH ?= backend
+CONFIG_MALI_BACKEND ?= gpu
+CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND)
+
+ifeq ($(MALI_MOCK_TEST),1)
+ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu)
+# Test functionality
+mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o
+endif
+endif
+
+include $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild
+mali_kbase-y += $(BACKEND:.c=.o)
+
+ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
+subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)
diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig
new file mode 100644
index 00000000000..15430438004
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Kconfig
@@ -0,0 +1,209 @@
+#
+# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
+menuconfig MALI_MIDGARD
+ tristate "Mali Midgard series support"
+ default n
+ help
+ Enable this option to build support for a ARM Mali Midgard GPU.
+
+ To compile this driver as a module, choose M here:
+ this will generate a single module, called mali_kbase.
+
+choice
+ prompt "Streamline support"
+ depends on MALI_MIDGARD
+ default MALI_TIMELINE_DISABLED
+ help
+ Select streamline support configuration.
+
+config MALI_TIMELINE_DISABLED
+ bool "Streamline support disabled"
+ help
+ Disable support for ARM Streamline Performance Analyzer.
+
+ Timeline support will not be included in
+ kernel code.
+ Debug stream will not be generated.
+
+config MALI_GATOR_SUPPORT
+ bool "Streamline support via Gator"
+ help
+ Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+ You will need the Gator device driver already loaded before loading this driver when enabling
+ Streamline debug support.
+
+config MALI_MIPE_ENABLED
+ bool "Streamline support via MIPE"
+ help
+ Adds diagnostic support for use with the ARM Streamline Performance Analyzer.
+
+ Stream will be transmitted directly to Mali GPU library.
+ Compatible version of the library is required to read debug stream generated by kernel.
+
+endchoice
+
+config MALI_MIDGARD_DVFS
+ bool "Enable DVFS"
+ depends on MALI_MIDGARD
+ default n
+ help
+ Choose this option to enable DVFS in the Mali Midgard DDK.
+
+config MALI_MIDGARD_RT_PM
+ bool "Enable Runtime power management"
+ depends on MALI_MIDGARD
+ depends on PM_RUNTIME
+ default n
+ help
+ Choose this option to enable runtime power management in the Mali Midgard DDK.
+
+config MALI_MIDGARD_ENABLE_TRACE
+ bool "Enable kbase tracing"
+ depends on MALI_MIDGARD
+ default n
+ help
+ Enables tracing in kbase. Trace log available through
+ the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
+
+config MALI_MIDGARD_DEBUG_SYS
+ bool "Enable sysfs for the Mali Midgard DDK "
+ depends on MALI_MIDGARD && SYSFS
+ default n
+ help
+ Enables sysfs for the Mali Midgard DDK. Set/Monitor the Mali Midgard DDK
+
+config MALI_DEVFREQ
+ bool "devfreq support for Mali"
+ depends on MALI_MIDGARD && PM_DEVFREQ
+ help
+ Support devfreq for Mali.
+
+ Using the devfreq framework and, by default, the simpleondemand
+ governor, the frequency of Mali will be dynamically selected from the
+ available OPPs.
+
+
+# MALI_EXPERT configuration options
+
+menuconfig MALI_EXPERT
+ depends on MALI_MIDGARD
+ bool "Enable Expert Settings"
+ default n
+ help
+ Enabling this option and modifying the default settings may produce a driver with performance or
+ other limitations.
+
+config MALI_DEBUG_SHADER_SPLIT_FS
+ bool "Allow mapping of shader cores via sysfs"
+ depends on MALI_MIDGARD && MALI_MIDGARD_DEBUG_SYS && MALI_EXPERT
+ default n
+ help
+ Select this option to provide a sysfs entry for runtime configuration of shader
+ core affinity masks.
+
+config MALI_PLATFORM_FAKE
+ bool "Enable fake platform device support"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default n
+ help
+ When you start to work with the Mali Midgard series device driver the platform-specific code of
+ the Linux kernel for your platform may not be complete. In this situation the kernel device driver
+ supports creating the platform device outside of the Linux platform-specific code.
+ Enable this option if would like to use a platform device configuration from within the device driver.
+
+choice
+ prompt "Platform configuration"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default MALI_PLATFORM_VEXPRESS
+ help
+ Select the SOC platform that contains a Mali Midgard GPU
+
+config MALI_PLATFORM_VEXPRESS
+ depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+ bool "Versatile Express"
+config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ
+ depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4)
+ bool "Versatile Express w/Virtex7 @ 40Mhz"
+config MALI_PLATFORM_GOLDFISH
+ depends on ARCH_GOLDFISH
+ bool "Android Goldfish virtual CPU"
+config MALI_PLATFORM_PBX
+ depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX
+ bool "Realview PBX-A9"
+config MALI_PLATFORM_THIRDPARTY
+ bool "Third Party Platform"
+endchoice
+
+config MALI_PLATFORM_THIRDPARTY_NAME
+ depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT
+ string "Third party platform name"
+ help
+ Enter the name of a third party platform that is supported. The third part configuration
+ file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name
+ specified here.
+
+config MALI_DEBUG
+ bool "Debug build"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default n
+ help
+ Select this option for increased checking and reporting of errors.
+
+config MALI_NO_MALI
+ bool "No Mali"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default n
+ help
+ This can be used to test the driver in a simulated environment
+ whereby the hardware is not physically present. If the hardware is physically
+ present it will not be used. This can be used to test the majority of the
+ driver without needing actual hardware or for software benchmarking.
+ All calls to the simulated hardware will complete immediately as if the hardware
+ completed the task.
+
+config MALI_ERROR_INJECT
+ bool "Error injection"
+ depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
+ default n
+ help
+ Enables insertion of errors to test module failure and recovery mechanisms.
+
+config MALI_TRACE_TIMELINE
+ bool "Timeline tracing"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default n
+ help
+ Enables timeline tracing through the kernel tracepoint system.
+
+config MALI_SYSTEM_TRACE
+ bool "Enable system event tracing support"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default n
+ help
+ Choose this option to enable system trace events for each
+ kbase event. This is typically used for debugging but has
+ minimal overhead when not in use. Enable only if you know what
+ you are doing.
+
+config MALI_GPU_TRACEPOINTS
+ bool "Enable GPU tracepoints"
+ depends on MALI_MIDGARD && ANDROID
+ select GPU_TRACEPOINTS
+ help
+ Enables GPU tracepoints using Android trace event definitions.
+
+source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile
new file mode 100644
index 00000000000..ded089243e6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Makefile
@@ -0,0 +1,37 @@
+#
+# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump
+KBASE_PATH_RELATIVE = $(CURDIR)
+KDS_PATH_RELATIVE = $(CURDIR)/../../../..
+EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers
+
+ifeq ($(MALI_UNIT_TEST), 1)
+ EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers
+ EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/../../../../tests/kutf/Module.symvers
+endif
+
+# GPL driver supports KDS
+EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers
+
+# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
+all:
+ $(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
+
+clean:
+ $(MAKE) -C $(KDIR) M=$(CURDIR) clean
diff --git a/drivers/gpu/arm/midgard/Makefile.kbase b/drivers/gpu/arm/midgard/Makefile.kbase
new file mode 100644
index 00000000000..2bef9c25eae
--- /dev/null
+++ b/drivers/gpu/arm/midgard/Makefile.kbase
@@ -0,0 +1,17 @@
+#
+# (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM)
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
new file mode 100644
index 00000000000..05a7a826c5d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild
@@ -0,0 +1,64 @@
+#
+# (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+BACKEND += \
+ backend/gpu/mali_kbase_cache_policy_backend.c \
+ backend/gpu/mali_kbase_device_hw.c \
+ backend/gpu/mali_kbase_gpu.c \
+ backend/gpu/mali_kbase_gpuprops_backend.c \
+ backend/gpu/mali_kbase_irq_linux.c \
+ backend/gpu/mali_kbase_instr_backend.c \
+ backend/gpu/mali_kbase_jm_as.c \
+ backend/gpu/mali_kbase_jm_hw.c \
+ backend/gpu/mali_kbase_jm_rb.c \
+ backend/gpu/mali_kbase_js_affinity.c \
+ backend/gpu/mali_kbase_js_backend.c \
+ backend/gpu/mali_kbase_mmu_hw_direct.c \
+ backend/gpu/mali_kbase_pm_backend.c \
+ backend/gpu/mali_kbase_pm_driver.c \
+ backend/gpu/mali_kbase_pm_metrics.c \
+ backend/gpu/mali_kbase_pm_ca.c \
+ backend/gpu/mali_kbase_pm_ca_fixed.c \
+ backend/gpu/mali_kbase_pm_always_on.c \
+ backend/gpu/mali_kbase_pm_coarse_demand.c \
+ backend/gpu/mali_kbase_pm_demand.c \
+ backend/gpu/mali_kbase_pm_policy.c \
+ backend/gpu/mali_kbase_time.c
+
+ifeq ($(MALI_CUSTOMER_RELEASE),0)
+BACKEND += \
+ backend/gpu/mali_kbase_pm_ca_random.c \
+ backend/gpu/mali_kbase_pm_demand_always_powered.c \
+ backend/gpu/mali_kbase_pm_fast_start.c
+endif
+
+ifeq ($(CONFIG_MALI_DEVFREQ),y)
+BACKEND += backend/gpu/mali_kbase_devfreq.c
+endif
+
+ifeq ($(CONFIG_MALI_NO_MALI),y)
+ # Dummy model
+ BACKEND += backend/gpu/mali_kbase_model_dummy.c
+ BACKEND += backend/gpu/mali_kbase_model_linux.c
+ # HW error simulation
+ BACKEND += backend/gpu/mali_kbase_model_error_generator.c
+endif
+
+ifeq ($(wildcard $(MALI_METRICS_PATH)/backend/gpu/mali_kbase_pm_metrics_linux.c),)
+ BACKEND += backend/gpu/mali_kbase_pm_metrics_dummy.c
+else
+ BACKEND += backend/gpu/mali_kbase_pm_metrics_linux.c
+endif
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
new file mode 100644
index 00000000000..c8ae87eb84a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific configuration
+ */
+
+#ifndef _KBASE_BACKEND_CONFIG_H_
+#define _KBASE_BACKEND_CONFIG_H_
+
+/* Enable GPU reset API */
+#define KBASE_GPU_RESET_EN 1
+
+#endif /* _KBASE_BACKEND_CONFIG_H_ */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
new file mode 100644
index 00000000000..92a14fa1bae
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -0,0 +1,22 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include "backend/gpu/mali_kbase_cache_policy_backend.h"
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
new file mode 100644
index 00000000000..42069fc88a1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _KBASE_CACHE_POLICY_BACKEND_H_
+#define _KBASE_CACHE_POLICY_BACKEND_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+
+
+#endif /* _KBASE_CACHE_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
new file mode 100644
index 00000000000..3791457f605
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c
@@ -0,0 +1,284 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp opp
+#define dev_pm_opp_get_voltage opp_get_voltage
+#define dev_pm_opp_get_opp_count opp_get_opp_count
+#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil
+#endif /* Linux >= 3.13 */
+
+
+static int
+kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
+{
+ struct kbase_device *kbdev = dev_get_drvdata(dev);
+ struct dev_pm_opp *opp;
+ unsigned long freq = 0;
+ unsigned long voltage;
+ int err;
+
+ freq = *target_freq;
+
+ rcu_read_lock();
+ opp = devfreq_recommended_opp(dev, &freq, flags);
+ voltage = dev_pm_opp_get_voltage(opp);
+ rcu_read_unlock();
+ if (IS_ERR_OR_NULL(opp)) {
+ dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp));
+ return PTR_ERR(opp);
+ }
+
+ /*
+ * Only update if there is a change of frequency
+ */
+ if (kbdev->current_freq == freq) {
+ *target_freq = freq;
+ return 0;
+ }
+
+#ifdef CONFIG_REGULATOR
+ if (kbdev->regulator && kbdev->current_voltage != voltage
+ && kbdev->current_freq < freq) {
+ err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+ if (err) {
+ dev_err(dev, "Failed to increase voltage (%d)\n", err);
+ return err;
+ }
+ }
+#endif
+
+ err = clk_set_rate(kbdev->clock, freq);
+ if (err) {
+ dev_err(dev, "Failed to set clock %lu (target %lu)\n",
+ freq, *target_freq);
+ return err;
+ }
+
+#ifdef CONFIG_REGULATOR
+ if (kbdev->regulator && kbdev->current_voltage != voltage
+ && kbdev->current_freq > freq) {
+ err = regulator_set_voltage(kbdev->regulator, voltage, voltage);
+ if (err) {
+ dev_err(dev, "Failed to decrease voltage (%d)\n", err);
+ return err;
+ }
+ }
+#endif
+
+ *target_freq = freq;
+ kbdev->current_voltage = voltage;
+ kbdev->current_freq = freq;
+
+ kbase_pm_reset_dvfs_utilisation(kbdev);
+
+ return err;
+}
+
+static int
+kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq)
+{
+ struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+ *freq = kbdev->current_freq;
+
+ return 0;
+}
+
+static int
+kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat)
+{
+ struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+ stat->current_frequency = kbdev->current_freq;
+
+ kbase_pm_get_dvfs_utilisation(kbdev,
+ &stat->total_time, &stat->busy_time);
+
+ stat->private_data = NULL;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+ memcpy(&kbdev->devfreq_cooling->last_status, stat, sizeof(*stat));
+#endif
+
+ return 0;
+}
+
+static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev,
+ struct devfreq_dev_profile *dp)
+{
+ int count;
+ int i = 0;
+ unsigned long freq = 0;
+ struct dev_pm_opp *opp;
+
+ rcu_read_lock();
+ count = dev_pm_opp_get_opp_count(kbdev->dev);
+ if (count < 0) {
+ rcu_read_unlock();
+ return count;
+ }
+ rcu_read_unlock();
+
+ dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]),
+ GFP_KERNEL);
+ if (!dp->freq_table)
+ return -ENOMEM;
+
+ rcu_read_lock();
+ for (i = 0; i < count; i++, freq++) {
+ opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq);
+ if (IS_ERR(opp))
+ break;
+
+ dp->freq_table[i] = freq;
+ }
+ rcu_read_unlock();
+
+ if (count != i)
+ dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n",
+ count, i);
+
+ dp->max_state = i;
+
+ return 0;
+}
+
+static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev)
+{
+ struct devfreq_dev_profile *dp = kbdev->devfreq->profile;
+
+ kfree(dp->freq_table);
+}
+
+static void kbase_devfreq_exit(struct device *dev)
+{
+ struct kbase_device *kbdev = dev_get_drvdata(dev);
+
+ kbase_devfreq_term_freq_table(kbdev);
+}
+
+int kbase_devfreq_init(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_DEVFREQ_THERMAL
+ struct devfreq_cooling_ops *callbacks = POWER_MODEL_CALLBACKS;
+#endif
+ struct devfreq_dev_profile *dp;
+ int err;
+
+ dev_dbg(kbdev->dev, "Init Mali devfreq\n");
+
+ if (!kbdev->clock)
+ return -ENODEV;
+
+ kbdev->current_freq = clk_get_rate(kbdev->clock);
+
+ dp = &kbdev->devfreq_profile;
+
+ dp->initial_freq = kbdev->current_freq;
+ dp->polling_ms = 100;
+ dp->target = kbase_devfreq_target;
+ dp->get_dev_status = kbase_devfreq_status;
+ dp->get_cur_freq = kbase_devfreq_cur_freq;
+ dp->exit = kbase_devfreq_exit;
+
+ if (kbase_devfreq_init_freq_table(kbdev, dp))
+ return -EFAULT;
+
+ kbdev->devfreq = devfreq_add_device(kbdev->dev, dp,
+ "simple_ondemand", NULL);
+ if (IS_ERR(kbdev->devfreq)) {
+ kbase_devfreq_term_freq_table(kbdev);
+ return PTR_ERR(kbdev->devfreq);
+ }
+
+ err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq);
+ if (err) {
+ dev_err(kbdev->dev,
+ "Failed to register OPP notifier (%d)\n", err);
+ goto opp_notifier_failed;
+ }
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+ if (callbacks) {
+
+ kbdev->devfreq_cooling = of_devfreq_cooling_register_power(
+ kbdev->dev->of_node,
+ kbdev->devfreq,
+ callbacks);
+ if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) {
+ err = PTR_ERR(kbdev->devfreq_cooling);
+ dev_err(kbdev->dev,
+ "Failed to register cooling device (%d)\n",
+ err);
+ goto cooling_failed;
+ }
+ }
+#endif
+
+ return 0;
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+cooling_failed:
+ devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+#endif /* CONFIG_DEVFREQ_THERMAL */
+opp_notifier_failed:
+ err = devfreq_remove_device(kbdev->devfreq);
+ if (err)
+ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+ else
+ kbdev->devfreq = NULL;
+
+ return err;
+}
+
+void kbase_devfreq_term(struct kbase_device *kbdev)
+{
+ int err;
+
+ dev_dbg(kbdev->dev, "Term Mali devfreq\n");
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+ devfreq_cooling_unregister(kbdev->devfreq_cooling);
+#endif
+
+ devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq);
+
+ err = devfreq_remove_device(kbdev->devfreq);
+ if (err)
+ dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err);
+ else
+ kbdev->devfreq = NULL;
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
new file mode 100644
index 00000000000..c0bf8b15b3b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _BASE_DEVFREQ_H_
+#define _BASE_DEVFREQ_H_
+
+int kbase_devfreq_init(struct kbase_device *kbdev);
+void kbase_devfreq_term(struct kbase_device *kbdev);
+
+#endif /* _BASE_DEVFREQ_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
new file mode 100644
index 00000000000..83d5ec9f7a9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ *
+ */
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+ struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+ KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+ KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+ dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value);
+ writel(value, kbdev->reg + offset);
+ if (kctx && kctx->jctx.tb)
+ kbase_device_trace_register_access(kctx, REG_WRITE, offset,
+ value);
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_write);
+
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+ struct kbase_context *kctx)
+{
+ u32 val;
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+ KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID);
+ KBASE_DEBUG_ASSERT(kbdev->dev != NULL);
+ val = readl(kbdev->reg + offset);
+ dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val);
+ if (kctx && kctx->jctx.tb)
+ kbase_device_trace_register_access(kctx, REG_READ, offset, val);
+ return val;
+}
+
+KBASE_EXPORT_TEST_API(kbase_reg_read);
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
+
+/**
+ * kbase_report_gpu_fault - Report a GPU fault.
+ * @kbdev: Kbase device pointer
+ * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS
+ * was also set
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using dev_warn().
+ */
+static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple)
+{
+ u32 status;
+ u64 address;
+
+ status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL);
+ address = (u64) kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32;
+ address |= kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL);
+
+ dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx",
+ status & 0xFF,
+ kbase_exception_name(kbdev, status),
+ address);
+ if (multiple)
+ dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n");
+}
+
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
+{
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val);
+ if (val & GPU_FAULT)
+ kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS);
+
+ if (val & RESET_COMPLETED)
+ kbase_pm_reset_done(kbdev);
+
+ if (val & PRFCNT_SAMPLE_COMPLETED)
+ kbase_instr_hwcnt_sample_done(kbdev);
+
+ if (val & CLEAN_CACHES_COMPLETED)
+ kbase_clean_caches_done(kbdev);
+
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL);
+
+ /* kbase_pm_check_transitions must be called after the IRQ has been
+ * cleared. This is because it might trigger further power transitions
+ * and we don't want to miss the interrupt raised to notify us that
+ * these further transitions have finished.
+ */
+ if (val & POWER_CHANGED_ALL)
+ kbase_pm_power_changed(kbdev);
+
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
new file mode 100644
index 00000000000..5b20445932f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h
@@ -0,0 +1,67 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access device APIs
+ */
+
+#ifndef _KBASE_DEVICE_INTERNAL_H_
+#define _KBASE_DEVICE_INTERNAL_H_
+
+/**
+ * kbase_reg_write - write to GPU register
+ * @kbdev: Kbase device pointer
+ * @offset: Offset of register
+ * @value: Value to write
+ * @kctx: Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ */
+void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value,
+ struct kbase_context *kctx);
+
+/**
+ * kbase_reg_read - read from GPU register
+ * @kbdev: Kbase device pointer
+ * @offset: Offset of register
+ * @kctx: Kbase context pointer. May be NULL
+ *
+ * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If
+ * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr
+ * != KBASEP_AS_NR_INVALID).
+ *
+ * Return: Value in desired register
+ */
+u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset,
+ struct kbase_context *kctx);
+
+
+/**
+ * kbase_gpu_interrupt - GPU interrupt handler
+ * @kbdev: Kbase device pointer
+ * @val: The value of the GPU IRQ status register which triggered the call
+ *
+ * This function is called from the interrupt handler when a GPU irq is to be
+ * handled.
+ */
+void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val);
+
+#endif /* _KBASE_DEVICE_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
new file mode 100644
index 00000000000..72a98d0f795
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c
@@ -0,0 +1,124 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend APIs
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_hwaccess_backend.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+int kbase_backend_early_init(struct kbase_device *kbdev)
+{
+ int err;
+
+ err = kbasep_platform_device_init(kbdev);
+ if (err)
+ return err;
+
+ /* Ensure we can access the GPU registers */
+ kbase_pm_register_access_enable(kbdev);
+
+ /* Find out GPU properties based on the GPU feature registers */
+ kbase_gpuprops_set(kbdev);
+
+ /* We're done accessing the GPU registers for now. */
+ kbase_pm_register_access_disable(kbdev);
+
+ err = kbase_hwaccess_pm_init(kbdev);
+ if (err)
+ goto fail_pm;
+
+ err = kbase_install_interrupts(kbdev);
+ if (err)
+ goto fail_interrupts;
+
+ return 0;
+
+fail_interrupts:
+ kbase_hwaccess_pm_term(kbdev);
+fail_pm:
+ kbasep_platform_device_term(kbdev);
+
+ return err;
+}
+
+void kbase_backend_early_term(struct kbase_device *kbdev)
+{
+ kbase_release_interrupts(kbdev);
+ kbase_hwaccess_pm_term(kbdev);
+ kbasep_platform_device_term(kbdev);
+}
+
+int kbase_backend_late_init(struct kbase_device *kbdev)
+{
+ int err;
+
+ err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT);
+ if (err)
+ return err;
+
+ err = kbase_backend_timer_init(kbdev);
+ if (err)
+ goto fail_timer;
+
+/* Currently disabled on the prototype */
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+ if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
+ dev_err(kbdev->dev, "Interrupt assigment check failed.\n");
+ err = -EINVAL;
+ goto fail_interrupt_test;
+ }
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+
+ err = kbase_job_slot_init(kbdev);
+ if (err)
+ goto fail_job_slot;
+
+ init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
+
+ return 0;
+
+fail_job_slot:
+/* Currently disabled on the prototype */
+#ifdef CONFIG_MALI_DEBUG
+#ifndef CONFIG_MALI_NO_MALI
+fail_interrupt_test:
+#endif /* !CONFIG_MALI_NO_MALI */
+#endif /* CONFIG_MALI_DEBUG */
+ kbase_backend_timer_term(kbdev);
+fail_timer:
+ kbase_hwaccess_pm_halt(kbdev);
+
+ return err;
+}
+
+void kbase_backend_late_term(struct kbase_device *kbdev)
+{
+ kbase_job_slot_halt(kbdev);
+ kbase_job_slot_term(kbdev);
+ kbase_backend_timer_term(kbdev);
+ kbase_hwaccess_pm_halt(kbdev);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
new file mode 100644
index 00000000000..bc7235f5833
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query backend APIs
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+ struct kbase_gpuprops_regdump *regdump)
+{
+ int i;
+
+ /* Fill regdump with the content of the relevant registers */
+ regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL);
+
+ regdump->l2_features = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(L2_FEATURES), NULL);
+ regdump->suspend_size = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(SUSPEND_SIZE), NULL);
+ regdump->tiler_features = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(TILER_FEATURES), NULL);
+ regdump->mem_features = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(MEM_FEATURES), NULL);
+ regdump->mmu_features = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(MMU_FEATURES), NULL);
+ regdump->as_present = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(AS_PRESENT), NULL);
+ regdump->js_present = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(JS_PRESENT), NULL);
+
+ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+ regdump->js_features[i] = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL);
+
+ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+ regdump->texture_features[i] = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL);
+
+ regdump->thread_max_threads = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL);
+ regdump->thread_max_workgroup_size = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE),
+ NULL);
+ regdump->thread_max_barrier_size = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL);
+ regdump->thread_features = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(THREAD_FEATURES), NULL);
+
+ regdump->shader_present_lo = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL);
+ regdump->shader_present_hi = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL);
+
+ regdump->tiler_present_lo = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(TILER_PRESENT_LO), NULL);
+ regdump->tiler_present_hi = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(TILER_PRESENT_HI), NULL);
+
+ regdump->l2_present_lo = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(L2_PRESENT_LO), NULL);
+ regdump->l2_present_hi = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(L2_PRESENT_HI), NULL);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
new file mode 100644
index 00000000000..2c987071a77
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c
@@ -0,0 +1,536 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * GPU backend instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_instr_internal.h>
+
+/**
+ * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to
+ * hardware
+ *
+ * @kbdev: Kbase device
+ */
+static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ unsigned long pm_flags;
+ u32 irq_mask;
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ /* Wait for any reset to complete */
+ while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+ kbdev->hwcnt.backend.state !=
+ KBASE_INSTR_STATE_RESETTING);
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ }
+ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+ KBASE_INSTR_STATE_REQUEST_CLEAN);
+
+ /* Enable interrupt */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+ irq_mask | CLEAN_CACHES_COMPLETED, NULL);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+ /* clean&invalidate the caches so we're sure the mmu tables for the dump
+ * buffer is valid */
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING;
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ struct kbase_uk_hwcnt_setup *setup)
+{
+ unsigned long flags, pm_flags;
+ int err = -EINVAL;
+ struct kbasep_js_device_data *js_devdata;
+ u32 irq_mask;
+ int ret;
+ u64 shader_cores_needed;
+
+ KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx);
+
+ shader_cores_needed = kbase_pm_get_present_cores(kbdev,
+ KBASE_PM_CORE_SHADER);
+
+ js_devdata = &kbdev->js_data;
+
+ /* alignment failure */
+ if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1)))
+ goto out_err;
+
+ /* Override core availability policy to ensure all cores are available
+ */
+ kbase_pm_ca_instr_enable(kbdev);
+
+ /* Request the cores early on synchronously - we'll release them on any
+ * errors (e.g. instrumentation already active) */
+ kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed);
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+ /* GPU is being reset */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ wait_event(kbdev->hwcnt.backend.wait,
+ kbdev->hwcnt.backend.triggered != 0);
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ }
+
+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+ /* Instrumentation is already enabled */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ goto out_unrequest_cores;
+ }
+
+ /* Enable interrupt */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask |
+ PRFCNT_SAMPLE_COMPLETED, NULL);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+ /* In use, this context is the owner */
+ kbdev->hwcnt.kctx = kctx;
+ /* Remember the dump address so we can reprogram it later */
+ kbdev->hwcnt.addr = setup->dump_buffer;
+ /* Remember all the settings for suspend/resume */
+ if (&kbdev->hwcnt.suspended_state != setup)
+ memcpy(&kbdev->hwcnt.suspended_state, setup,
+ sizeof(kbdev->hwcnt.suspended_state));
+
+ /* Request the clean */
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+ kbdev->hwcnt.backend.triggered = 0;
+ /* Clean&invalidate the caches so we're sure the mmu tables for the dump
+ * buffer is valid */
+ ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+ &kbdev->hwcnt.backend.cache_clean_work);
+ KBASE_DEBUG_ASSERT(ret);
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+ /* Wait for cacheclean to complete */
+ wait_event(kbdev->hwcnt.backend.wait,
+ kbdev->hwcnt.backend.triggered != 0);
+
+ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+ KBASE_INSTR_STATE_IDLE);
+
+ kbase_pm_request_l2_caches(kbdev);
+
+ /* Configure */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+ (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT)
+ | PRFCNT_CONFIG_MODE_OFF, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+ setup->dump_buffer & 0xFFFFFFFF, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+ setup->dump_buffer >> 32, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+ setup->jm_bm, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+ setup->shader_bm, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+ setup->mmu_l2_bm, kctx);
+ /* Due to PRLAM-8186 we need to disable the Tiler before we enable the
+ * HW counter dump. */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0,
+ kctx);
+ else
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+ setup->tiler_bm, kctx);
+
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+ (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
+ PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+ /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump
+ */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+ setup->tiler_bm, kctx);
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+ /* GPU is being reset */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ wait_event(kbdev->hwcnt.backend.wait,
+ kbdev->hwcnt.backend.triggered != 0);
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ }
+
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+ kbdev->hwcnt.backend.triggered = 1;
+ wake_up(&kbdev->hwcnt.backend.wait);
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+ err = 0;
+
+ dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx);
+ return err;
+ out_unrequest_cores:
+ kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed);
+ out_err:
+ return err;
+}
+
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
+{
+ unsigned long flags, pm_flags;
+ int err = -EINVAL;
+ u32 irq_mask;
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ while (1) {
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) {
+ /* Instrumentation is not enabled */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ goto out;
+ }
+
+ if (kbdev->hwcnt.kctx != kctx) {
+ /* Instrumentation has been setup for another context */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ goto out;
+ }
+
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE)
+ break;
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+ /* Ongoing dump/setup - wait for its completion */
+ wait_event(kbdev->hwcnt.backend.wait,
+ kbdev->hwcnt.backend.triggered != 0);
+ }
+
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+ kbdev->hwcnt.backend.triggered = 0;
+
+ /* Disable interrupt */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+ irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+ /* Disable the counters */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx);
+
+ kbdev->hwcnt.kctx = NULL;
+ kbdev->hwcnt.addr = 0ULL;
+
+ kbase_pm_ca_instr_disable(kbdev);
+
+ kbase_pm_unrequest_cores(kbdev, true,
+ kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+ kbase_pm_release_l2_caches(kbdev);
+
+ dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+ kctx);
+
+ err = 0;
+
+ out:
+ return err;
+}
+
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx)
+{
+ unsigned long flags;
+ int err = -EINVAL;
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ if (kbdev->hwcnt.kctx != kctx) {
+ /* The instrumentation has been setup for another context */
+ goto unlock;
+ }
+
+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) {
+ /* HW counters are disabled or another dump is ongoing, or we're
+ * resetting */
+ goto unlock;
+ }
+
+ kbdev->hwcnt.backend.triggered = 0;
+
+ /* Mark that we're dumping - the PF handler can signal that we faulted
+ */
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING;
+
+ /* Reconfigure the dump address */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+ kbdev->hwcnt.addr & 0xFFFFFFFF, NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+ kbdev->hwcnt.addr >> 32, NULL);
+
+ /* Start dumping */
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL,
+ kbdev->hwcnt.addr, 0);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_PRFCNT_SAMPLE, kctx);
+
+ dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx);
+
+ err = 0;
+
+ unlock:
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump);
+
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+ bool * const success)
+{
+ unsigned long flags;
+ bool complete = false;
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) {
+ *success = true;
+ complete = true;
+ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+ *success = false;
+ complete = true;
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+ return complete;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
+
+void kbasep_cache_clean_worker(struct work_struct *data)
+{
+ struct kbase_device *kbdev;
+ unsigned long flags;
+
+ kbdev = container_of(data, struct kbase_device,
+ hwcnt.backend.cache_clean_work);
+
+ mutex_lock(&kbdev->cacheclean_lock);
+ kbasep_instr_hwcnt_cacheclean(kbdev);
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ /* Wait for our condition, and any reset to complete */
+ while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING ||
+ kbdev->hwcnt.backend.state ==
+ KBASE_INSTR_STATE_CLEANING) {
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ wait_event(kbdev->hwcnt.backend.cache_clean_wait,
+ (kbdev->hwcnt.backend.state !=
+ KBASE_INSTR_STATE_RESETTING &&
+ kbdev->hwcnt.backend.state !=
+ KBASE_INSTR_STATE_CLEANING));
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ }
+ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+ KBASE_INSTR_STATE_CLEANED);
+
+ /* All finished and idle */
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+ kbdev->hwcnt.backend.triggered = 1;
+ wake_up(&kbdev->hwcnt.backend.wait);
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ mutex_unlock(&kbdev->cacheclean_lock);
+}
+
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+ kbdev->hwcnt.backend.triggered = 1;
+ wake_up(&kbdev->hwcnt.backend.wait);
+ } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
+ int ret;
+ /* Always clean and invalidate the cache after a successful dump
+ */
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
+ ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
+ &kbdev->hwcnt.backend.cache_clean_work);
+ KBASE_DEBUG_ASSERT(ret);
+ }
+ /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset,
+ * and the instrumentation state hasn't been restored yet -
+ * kbasep_reset_timeout_worker() will do the rest of the work */
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+}
+
+void kbase_clean_caches_done(struct kbase_device *kbdev)
+{
+ u32 irq_mask;
+
+ if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) {
+ unsigned long flags;
+ unsigned long pm_flags;
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ /* Disable interrupt */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags);
+ irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+ NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+ irq_mask & ~CLEAN_CACHES_COMPLETED, NULL);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags);
+
+ /* Wakeup... */
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) {
+ /* Only wake if we weren't resetting */
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+ wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+ }
+ /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a
+ * reset, and the instrumentation state hasn't been restored yet
+ * - kbasep_reset_timeout_worker() will do the rest of the work
+ */
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ }
+}
+
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ unsigned long flags;
+ int err;
+
+ /* Wait for dump & cacheclean to complete */
+ wait_event(kbdev->hwcnt.backend.wait,
+ kbdev->hwcnt.backend.triggered != 0);
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+ /* GPU is being reset */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ wait_event(kbdev->hwcnt.backend.wait,
+ kbdev->hwcnt.backend.triggered != 0);
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ }
+
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) {
+ err = -EINVAL;
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+ } else {
+ /* Dump done */
+ KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
+ KBASE_INSTR_STATE_IDLE);
+ err = 0;
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+ return err;
+}
+
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx)
+{
+ unsigned long flags;
+ int err = -EINVAL;
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+ /* GPU is being reset */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ wait_event(kbdev->hwcnt.backend.wait,
+ kbdev->hwcnt.backend.triggered != 0);
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ }
+
+ /* Check it's the context previously set up and we're not already
+ * dumping */
+ if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state !=
+ KBASE_INSTR_STATE_IDLE)
+ goto out;
+
+ /* Clear the counters */
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_PRFCNT_CLEAR, kctx);
+
+ err = 0;
+
+out:
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear);
+
+int kbase_instr_backend_init(struct kbase_device *kbdev)
+{
+ int ret = 0;
+
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
+
+ init_waitqueue_head(&kbdev->hwcnt.backend.wait);
+ init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait);
+ INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
+ kbasep_cache_clean_worker);
+ kbdev->hwcnt.backend.triggered = 0;
+
+ kbdev->hwcnt.backend.cache_clean_wq =
+ alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
+ if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
+ ret = -EINVAL;
+
+ return ret;
+}
+
+void kbase_instr_backend_term(struct kbase_device *kbdev)
+{
+ destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
new file mode 100644
index 00000000000..23bd80a5a15
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h
@@ -0,0 +1,62 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend-specific instrumentation definitions
+ */
+
+#ifndef _KBASE_INSTR_DEFS_H_
+#define _KBASE_INSTR_DEFS_H_
+
+/*
+ * Instrumentation State Machine States
+ */
+enum kbase_instr_state {
+ /* State where instrumentation is not active */
+ KBASE_INSTR_STATE_DISABLED = 0,
+ /* State machine is active and ready for a command. */
+ KBASE_INSTR_STATE_IDLE,
+ /* Hardware is currently dumping a frame. */
+ KBASE_INSTR_STATE_DUMPING,
+ /* We've requested a clean to occur on a workqueue */
+ KBASE_INSTR_STATE_REQUEST_CLEAN,
+ /* Hardware is currently cleaning and invalidating caches. */
+ KBASE_INSTR_STATE_CLEANING,
+ /* Cache clean completed, and either a) a dump is complete, or
+ * b) instrumentation can now be setup. */
+ KBASE_INSTR_STATE_CLEANED,
+ /* kbasep_reset_timeout_worker() has started (but not compelted) a
+ * reset. This generally indicates the current action should be aborted,
+ * and kbasep_reset_timeout_worker() will handle the cleanup */
+ KBASE_INSTR_STATE_RESETTING,
+ /* An error has occured during DUMPING (page fault). */
+ KBASE_INSTR_STATE_FAULT
+};
+
+/* Structure used for instrumentation and HW counters dumping */
+struct kbase_instr_backend {
+ wait_queue_head_t wait;
+ int triggered;
+
+ enum kbase_instr_state state;
+ wait_queue_head_t cache_clean_wait;
+ struct workqueue_struct *cache_clean_wq;
+ struct work_struct cache_clean_work;
+};
+
+#endif /* _KBASE_INSTR_DEFS_H_ */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
new file mode 100644
index 00000000000..e96aeae786e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Backend-specific HW access instrumentation APIs
+ */
+
+#ifndef _KBASE_INSTR_INTERNAL_H_
+#define _KBASE_INSTR_INTERNAL_H_
+
+/**
+ * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning
+ * @data: a &struct work_struct
+ */
+void kbasep_cache_clean_worker(struct work_struct *data);
+
+/**
+ * kbase_clean_caches_done() - Cache clean interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_clean_caches_done(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
new file mode 100644
index 00000000000..8781561e73d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Backend specific IRQ APIs
+ */
+
+#ifndef _KBASE_IRQ_INTERNAL_H_
+#define _KBASE_IRQ_INTERNAL_H_
+
+int kbase_install_interrupts(struct kbase_device *kbdev);
+
+void kbase_release_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed
+ * execution
+ * @kbdev: The kbase device
+ */
+void kbase_synchronize_irqs(struct kbase_device *kbdev);
+
+int kbasep_common_test_interrupt_handlers(
+ struct kbase_device * const kbdev);
+
+#endif /* _KBASE_IRQ_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
new file mode 100644
index 00000000000..49c72f90aac
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c
@@ -0,0 +1,471 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+
+#include <linux/interrupt.h>
+
+#if !defined(CONFIG_MALI_NO_MALI)
+
+/* GPU IRQ Tags */
+#define JOB_IRQ_TAG 0
+#define MMU_IRQ_TAG 1
+#define GPU_IRQ_TAG 2
+
+
+static void *kbase_tag(void *ptr, u32 tag)
+{
+ return (void *)(((uintptr_t) ptr) | tag);
+}
+
+static void *kbase_untag(void *ptr)
+{
+ return (void *)(((uintptr_t) ptr) & ~3);
+}
+
+
+
+
+static irqreturn_t kbase_job_irq_handler(int irq, void *data)
+{
+ unsigned long flags;
+ struct kbase_device *kbdev = kbase_untag(data);
+ u32 val;
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (!kbdev->pm.backend.gpu_powered) {
+ /* GPU is turned off - IRQ is not for us */
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+ flags);
+ return IRQ_NONE;
+ }
+
+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+ if (!kbdev->pm.backend.driver_ready_for_irqs)
+ dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+ __func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (!val)
+ return IRQ_NONE;
+
+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+ kbase_job_done(kbdev, val);
+
+ return IRQ_HANDLED;
+}
+
+KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
+
+static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
+{
+ unsigned long flags;
+ struct kbase_device *kbdev = kbase_untag(data);
+ u32 val;
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (!kbdev->pm.backend.gpu_powered) {
+ /* GPU is turned off - IRQ is not for us */
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+ flags);
+ return IRQ_NONE;
+ }
+
+ atomic_inc(&kbdev->faults_pending);
+
+ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+ if (!kbdev->pm.backend.driver_ready_for_irqs)
+ dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+ __func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (!val) {
+ atomic_dec(&kbdev->faults_pending);
+ return IRQ_NONE;
+ }
+
+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+ kbase_mmu_interrupt(kbdev, val);
+
+ atomic_dec(&kbdev->faults_pending);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_gpu_irq_handler(int irq, void *data)
+{
+ unsigned long flags;
+ struct kbase_device *kbdev = kbase_untag(data);
+ u32 val;
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (!kbdev->pm.backend.gpu_powered) {
+ /* GPU is turned off - IRQ is not for us */
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+ flags);
+ return IRQ_NONE;
+ }
+
+ val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL);
+
+#ifdef CONFIG_MALI_DEBUG
+ if (!kbdev->pm.backend.driver_ready_for_irqs)
+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n",
+ __func__, irq, val);
+#endif /* CONFIG_MALI_DEBUG */
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (!val)
+ return IRQ_NONE;
+
+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+ kbase_gpu_interrupt(kbdev, val);
+
+ return IRQ_HANDLED;
+}
+static irq_handler_t kbase_handler_table[] = {
+ [JOB_IRQ_TAG] = kbase_job_irq_handler,
+ [MMU_IRQ_TAG] = kbase_mmu_irq_handler,
+ [GPU_IRQ_TAG] = kbase_gpu_irq_handler,
+};
+
+
+#ifdef CONFIG_MALI_DEBUG
+#define JOB_IRQ_HANDLER JOB_IRQ_TAG
+#define MMU_IRQ_HANDLER MMU_IRQ_TAG
+#define GPU_IRQ_HANDLER GPU_IRQ_TAG
+
+/**
+ * kbase_set_custom_irq_handler - Set a custom IRQ handler
+ * @kbdev: Device for which the handler is to be registered
+ * @custom_handler: Handler to be registered
+ * @irq_type: Interrupt type
+ *
+ * Registers given interrupt handler for requested interrupt type
+ * In the case where irq handler is not specified, the default handler shall be
+ * registered
+ *
+ * Return: 0 case success, error code otherwise
+ */
+int kbase_set_custom_irq_handler(struct kbase_device *kbdev,
+ irq_handler_t custom_handler,
+ int irq_type)
+{
+ int result = 0;
+ irq_handler_t requested_irq_handler = NULL;
+
+ KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) &&
+ (GPU_IRQ_HANDLER >= irq_type));
+
+ /* Release previous handler */
+ if (kbdev->irqs[irq_type].irq)
+ free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type));
+
+ requested_irq_handler = (NULL != custom_handler) ? custom_handler :
+ kbase_handler_table[irq_type];
+
+ if (0 != request_irq(kbdev->irqs[irq_type].irq,
+ requested_irq_handler,
+ kbdev->irqs[irq_type].flags | IRQF_SHARED,
+ dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) {
+ result = -EINVAL;
+ dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+ kbdev->irqs[irq_type].irq, irq_type);
+#ifdef CONFIG_SPARSE_IRQ
+ dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+ }
+
+ return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler);
+
+/* test correct interrupt assigment and reception by cpu */
+struct kbasep_irq_test {
+ struct hrtimer timer;
+ wait_queue_head_t wait;
+ int triggered;
+ u32 timeout;
+};
+
+static struct kbasep_irq_test kbasep_irq_test_data;
+
+#define IRQ_TEST_TIMEOUT 500
+
+static irqreturn_t kbase_job_irq_test_handler(int irq, void *data)
+{
+ unsigned long flags;
+ struct kbase_device *kbdev = kbase_untag(data);
+ u32 val;
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (!kbdev->pm.backend.gpu_powered) {
+ /* GPU is turned off - IRQ is not for us */
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+ flags);
+ return IRQ_NONE;
+ }
+
+ val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL);
+
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (!val)
+ return IRQ_NONE;
+
+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+ kbasep_irq_test_data.triggered = 1;
+ wake_up(&kbasep_irq_test_data.wait);
+
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
+{
+ unsigned long flags;
+ struct kbase_device *kbdev = kbase_untag(data);
+ u32 val;
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (!kbdev->pm.backend.gpu_powered) {
+ /* GPU is turned off - IRQ is not for us */
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+ flags);
+ return IRQ_NONE;
+ }
+
+ val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL);
+
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (!val)
+ return IRQ_NONE;
+
+ dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val);
+
+ kbasep_irq_test_data.triggered = 1;
+ wake_up(&kbasep_irq_test_data.wait);
+
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL);
+
+ return IRQ_HANDLED;
+}
+
+static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer)
+{
+ struct kbasep_irq_test *test_data = container_of(timer,
+ struct kbasep_irq_test, timer);
+
+ test_data->timeout = 1;
+ test_data->triggered = 1;
+ wake_up(&test_data->wait);
+ return HRTIMER_NORESTART;
+}
+
+static int kbasep_common_test_interrupt(
+ struct kbase_device * const kbdev, u32 tag)
+{
+ int err = 0;
+ irq_handler_t test_handler;
+
+ u32 old_mask_val;
+ u16 mask_offset;
+ u16 rawstat_offset;
+
+ switch (tag) {
+ case JOB_IRQ_TAG:
+ test_handler = kbase_job_irq_test_handler;
+ rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT);
+ mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK);
+ break;
+ case MMU_IRQ_TAG:
+ test_handler = kbase_mmu_irq_test_handler;
+ rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
+ mask_offset = MMU_REG(MMU_IRQ_MASK);
+ break;
+ case GPU_IRQ_TAG:
+ /* already tested by pm_driver - bail out */
+ default:
+ return 0;
+ }
+
+ /* store old mask */
+ old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL);
+ /* mask interrupts */
+ kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+ if (kbdev->irqs[tag].irq) {
+ /* release original handler and install test handler */
+ if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) {
+ err = -EINVAL;
+ } else {
+ kbasep_irq_test_data.timeout = 0;
+ hrtimer_init(&kbasep_irq_test_data.timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ kbasep_irq_test_data.timer.function =
+ kbasep_test_interrupt_timeout;
+
+ /* trigger interrupt */
+ kbase_reg_write(kbdev, mask_offset, 0x1, NULL);
+ kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL);
+
+ hrtimer_start(&kbasep_irq_test_data.timer,
+ HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT),
+ HRTIMER_MODE_REL);
+
+ wait_event(kbasep_irq_test_data.wait,
+ kbasep_irq_test_data.triggered != 0);
+
+ if (kbasep_irq_test_data.timeout != 0) {
+ dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n",
+ kbdev->irqs[tag].irq, tag);
+ err = -EINVAL;
+ } else {
+ dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n",
+ kbdev->irqs[tag].irq, tag);
+ }
+
+ hrtimer_cancel(&kbasep_irq_test_data.timer);
+ kbasep_irq_test_data.triggered = 0;
+
+ /* mask interrupts */
+ kbase_reg_write(kbdev, mask_offset, 0x0, NULL);
+
+ /* release test handler */
+ free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag));
+ }
+
+ /* restore original interrupt */
+ if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag],
+ kbdev->irqs[tag].flags | IRQF_SHARED,
+ dev_name(kbdev->dev), kbase_tag(kbdev, tag))) {
+ dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n",
+ kbdev->irqs[tag].irq, tag);
+ err = -EINVAL;
+ }
+ }
+ /* restore old mask */
+ kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL);
+
+ return err;
+}
+
+int kbasep_common_test_interrupt_handlers(
+ struct kbase_device * const kbdev)
+{
+ int err;
+
+ init_waitqueue_head(&kbasep_irq_test_data.wait);
+ kbasep_irq_test_data.triggered = 0;
+
+ /* A suspend won't happen during startup/insmod */
+ kbase_pm_context_active(kbdev);
+
+ err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG);
+ if (err) {
+ dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n");
+ goto out;
+ }
+
+ err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG);
+ if (err) {
+ dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n");
+ goto out;
+ }
+
+ dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n");
+
+ out:
+ kbase_pm_context_idle(kbdev);
+
+ return err;
+}
+#endif /* CONFIG_MALI_DEBUG */
+
+int kbase_install_interrupts(struct kbase_device *kbdev)
+{
+ u32 nr = ARRAY_SIZE(kbase_handler_table);
+ int err;
+ u32 i;
+
+ for (i = 0; i < nr; i++) {
+ err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i],
+ kbdev->irqs[i].flags | IRQF_SHARED,
+ dev_name(kbdev->dev),
+ kbase_tag(kbdev, i));
+ if (err) {
+ dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n",
+ kbdev->irqs[i].irq, i);
+#ifdef CONFIG_SPARSE_IRQ
+ dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n");
+#endif /* CONFIG_SPARSE_IRQ */
+ goto release;
+ }
+ }
+
+ return 0;
+
+ release:
+ while (i-- > 0)
+ free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+
+ return err;
+}
+
+void kbase_release_interrupts(struct kbase_device *kbdev)
+{
+ u32 nr = ARRAY_SIZE(kbase_handler_table);
+ u32 i;
+
+ for (i = 0; i < nr; i++) {
+ if (kbdev->irqs[i].irq)
+ free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i));
+ }
+}
+
+void kbase_synchronize_irqs(struct kbase_device *kbdev)
+{
+ u32 nr = ARRAY_SIZE(kbase_handler_table);
+ u32 i;
+
+ for (i = 0; i < nr; i++) {
+ if (kbdev->irqs[i].irq)
+ synchronize_irq(kbdev->irqs[i].irq);
+ }
+}
+
+#endif /* !defined(CONFIG_MALI_NO_MALI) */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
new file mode 100644
index 00000000000..f2167887229
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c
@@ -0,0 +1,385 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register backend context / address space management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+/**
+ * assign_and_activate_kctx_addr_space - Assign an AS to a context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @current_as: Address Space to assign
+ *
+ * Assign an Address Space (AS) to a context, and add the context to the Policy.
+ *
+ * This includes
+ * setting up the global runpool_irq structure and the context on the AS,
+ * Activating the MMU on the AS,
+ * Allowing jobs to be submitted on the AS.
+ *
+ * Context:
+ * kbasep_js_kctx_info.jsctx_mutex held,
+ * kbasep_js_device_data.runpool_mutex held,
+ * AS transaction mutex held,
+ * Runpool IRQ lock held
+ */
+static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ struct kbase_as *current_as)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ struct kbasep_js_per_as_data *js_per_as_data;
+ int as_nr = current_as->number;
+
+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+ lockdep_assert_held(&current_as->transaction_mutex);
+ lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+ js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+ /* Attribute handling */
+ kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx);
+
+ /* Assign addr space */
+ kctx->as_nr = as_nr;
+
+ /* If the GPU is currently powered, activate this address space on the
+ * MMU */
+ if (kbdev->pm.backend.gpu_powered)
+ kbase_mmu_update(kctx);
+ /* If the GPU was not powered then the MMU will be reprogrammed on the
+ * next pm_context_active() */
+
+ /* Allow it to run jobs */
+ kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+ /* Book-keeping */
+ js_per_as_data->kctx = kctx;
+ js_per_as_data->as_busy_refcount = 0;
+
+ kbase_js_runpool_inc_context_count(kbdev, kctx);
+}
+
+/**
+ * release_addr_space - Release an address space
+ * @kbdev: Kbase device
+ * @kctx_as_nr: Address space of context to release
+ * @kctx: Context being released
+ *
+ * Context: kbasep_js_device_data.runpool_mutex must be held
+ *
+ * Release an address space, making it available for being picked again.
+ */
+static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ u16 as_bit = (1u << kctx_as_nr);
+
+ js_devdata = &kbdev->js_data;
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+
+ /* The address space must not already be free */
+ KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit));
+
+ js_devdata->as_free |= as_bit;
+
+ kbase_js_runpool_dec_context_count(kbdev, kctx);
+}
+
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ int i;
+
+ if (kbdev->hwaccess.active_kctx == kctx) {
+ /* Context is already active */
+ return true;
+ }
+
+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+ struct kbasep_js_per_as_data *js_per_as_data =
+ &kbdev->js_data.runpool_irq.per_as_data[i];
+
+ if (js_per_as_data->kctx == kctx) {
+ /* Context already has ASID - mark as active */
+ return true;
+ }
+ }
+
+ /* Context does not have address space assigned */
+ return false;
+}
+
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_per_as_data *js_per_as_data;
+ int as_nr = kctx->as_nr;
+
+ if (as_nr == KBASEP_AS_NR_INVALID) {
+ WARN(1, "Attempting to release context without ASID\n");
+ return;
+ }
+
+ lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex);
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr];
+ if (js_per_as_data->as_busy_refcount != 0) {
+ WARN(1, "Attempting to release active ASID\n");
+ return;
+ }
+
+ /* Release context from address space */
+ js_per_as_data->kctx = NULL;
+
+ kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx);
+ /* If the GPU is currently powered, de-activate this address space on
+ * the MMU */
+ if (kbdev->pm.backend.gpu_powered)
+ kbase_mmu_disable(kctx);
+ /* If the GPU was not powered then the MMU will be reprogrammed on the
+ * next pm_context_active() */
+
+ release_addr_space(kbdev, as_nr, kctx);
+ kctx->as_nr = KBASEP_AS_NR_INVALID;
+}
+
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+}
+
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+ int as_nr)
+{
+ struct kbasep_js_device_data *js_devdata;
+
+ js_devdata = &kbdev->js_data;
+
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+
+ js_devdata->as_free |= (1 << as_nr);
+}
+
+/**
+ * check_is_runpool_full - check whether the runpool is full for a specified
+ * context
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ *
+ * If kctx == NULL, then this makes the least restrictive check on the
+ * runpool. A specific context that is supplied immediately after could fail
+ * the check, even under the same conditions.
+ *
+ * Therefore, once a context is obtained you \b must re-check it with this
+ * function, since the return value could change to false.
+ *
+ * Context:
+ * In all cases, the caller must hold kbasep_js_device_data.runpool_mutex.
+ * When kctx != NULL the caller must hold the
+ * kbasep_js_kctx_info.ctx.jsctx_mutex.
+ * When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but
+ * it doesn't do any harm to do so).
+ *
+ * Return: true if the runpool is full
+ */
+static bool check_is_runpool_full(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ bool is_runpool_full;
+
+ js_devdata = &kbdev->js_data;
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+
+ /* Regardless of whether a context is submitting or not, can't have more
+ * than there are HW address spaces */
+ is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >=
+ kbdev->nr_hw_address_spaces);
+
+ if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags &
+ KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+ /* Contexts that submit might use less of the address spaces
+ * available, due to HW workarounds. In which case, the runpool
+ * is also full when the number of submitting contexts exceeds
+ * the number of submittable address spaces.
+ *
+ * Both checks must be made: can have nr_user_address_spaces ==
+ * nr_hw_address spaces, and at the same time can have
+ * nr_user_contexts_running < nr_all_contexts_running. */
+ is_runpool_full |= (bool)
+ (js_devdata->nr_user_contexts_running >=
+ kbdev->nr_user_address_spaces);
+ }
+
+ return is_runpool_full;
+}
+
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ unsigned long flags;
+ int i;
+
+ js_devdata = &kbdev->js_data;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_lock(&js_devdata->runpool_mutex);
+
+ /* First try to find a free address space */
+ if (check_is_runpool_full(kbdev, kctx))
+ i = -1;
+ else
+ i = ffs(js_devdata->as_free) - 1;
+
+ if (i >= 0 && i < kbdev->nr_hw_address_spaces) {
+ js_devdata->as_free &= ~(1 << i);
+
+ mutex_unlock(&js_devdata->runpool_mutex);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+ return i;
+ }
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ /* No address space currently free, see if we can release one */
+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+ struct kbasep_js_per_as_data *js_per_as_data;
+ struct kbasep_js_kctx_info *as_js_kctx_info;
+ struct kbase_context *as_kctx;
+
+ js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i];
+ as_kctx = js_per_as_data->kctx;
+ as_js_kctx_info = &as_kctx->jctx.sched_info;
+
+ /* Don't release privileged or active contexts, or contexts with
+ * jobs running */
+ if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags &
+ KBASE_CTX_FLAG_PRIVILEGED) &&
+ js_per_as_data->as_busy_refcount == 0) {
+ if (!kbasep_js_runpool_retain_ctx_nolock(kbdev,
+ as_kctx)) {
+ WARN(1, "Failed to retain active context\n");
+
+ spin_unlock_irqrestore(
+ &js_devdata->runpool_irq.lock,
+ flags);
+ mutex_unlock(&js_devdata->runpool_mutex);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+ return KBASEP_AS_NR_INVALID;
+ }
+
+ kbasep_js_clear_submit_allowed(js_devdata, as_kctx);
+
+ /* Drop and retake locks to take the jsctx_mutex on the
+ * context we're about to release without violating lock
+ * ordering
+ */
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+ flags);
+ mutex_unlock(&js_devdata->runpool_mutex);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+
+ /* Release context from address space */
+ mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex);
+ mutex_lock(&js_devdata->runpool_mutex);
+
+ kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx);
+
+ if (!as_js_kctx_info->ctx.is_scheduled) {
+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev,
+ as_kctx,
+ true);
+
+ js_devdata->as_free &= ~(1 << i);
+
+ mutex_unlock(&js_devdata->runpool_mutex);
+ mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+ return i;
+ }
+
+ /* Context was retained while locks were dropped,
+ * continue looking for free AS */
+
+ mutex_unlock(&js_devdata->runpool_mutex);
+ mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex);
+
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_lock(&js_devdata->runpool_mutex);
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ }
+ }
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ mutex_unlock(&js_devdata->runpool_mutex);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+ return KBASEP_AS_NR_INVALID;
+}
+
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ int as_nr)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ struct kbase_as *new_address_space = NULL;
+
+ js_devdata = &kbdev->js_data;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ if (kbdev->hwaccess.active_kctx == kctx ||
+ kctx->as_nr != KBASEP_AS_NR_INVALID ||
+ as_nr == KBASEP_AS_NR_INVALID) {
+ WARN(1, "Invalid parameters to use_ctx()\n");
+ return false;
+ }
+
+ new_address_space = &kbdev->as[as_nr];
+
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+ lockdep_assert_held(&new_address_space->transaction_mutex);
+ lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+ assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space);
+
+ if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) {
+ /* We need to retain it to keep the corresponding address space
+ */
+ kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+ }
+
+ return true;
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
new file mode 100644
index 00000000000..1598fce4c28
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific definitions
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_DEFS_H_
+#define _KBASE_HWACCESS_GPU_DEFS_H_
+
+/* SLOT_RB_SIZE must be < 256 */
+#define SLOT_RB_SIZE 2
+#define SLOT_RB_MASK (SLOT_RB_SIZE - 1)
+
+/**
+ * struct rb_entry - Ringbuffer entry
+ * @katom: Atom associated with this entry
+ */
+struct rb_entry {
+ struct kbase_jd_atom *katom;
+};
+
+/**
+ * struct slot_rb - Slot ringbuffer
+ * @entries: Ringbuffer entries
+ * @last_context: The last context to submit a job on this slot
+ * @read_idx: Current read index of buffer
+ * @write_idx: Current write index of buffer
+ * @job_chain_flag: Flag used to implement jobchain disambiguation
+ */
+struct slot_rb {
+ struct rb_entry entries[SLOT_RB_SIZE];
+
+ struct kbase_context *last_context;
+
+ u8 read_idx;
+ u8 write_idx;
+
+ u8 job_chain_flag;
+};
+
+/**
+ * struct kbase_backend_data - GPU backend specific data for HW access layer
+ * @slot_rb: Slot ringbuffers
+ * @rmu_workaround_flag: When PRLAM-8987 is present, this flag determines
+ * whether slots 0/1 or slot 2 are currently being
+ * pulled from
+ * @scheduling_timer: The timer tick used for rescheduling jobs
+ * @timer_running: Is the timer running? The runpool_mutex must be
+ * held whilst modifying this.
+ *
+ * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure
+ */
+struct kbase_backend_data {
+ struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
+
+ bool rmu_workaround_flag;
+
+ struct hrtimer scheduling_timer;
+
+ bool timer_running;
+
+ /* Set when we're about to reset the GPU */
+ atomic_t reset_gpu;
+
+/* The GPU reset isn't pending */
+#define KBASE_RESET_GPU_NOT_PENDING 0
+/* kbase_prepare_to_reset_gpu has been called */
+#define KBASE_RESET_GPU_PREPARED 1
+/* kbase_reset_gpu has been called - the reset will now definitely happen
+ * within the timeout period */
+#define KBASE_RESET_GPU_COMMITTED 2
+/* The GPU reset process is currently occuring (timeout has expired or
+ * kbasep_try_reset_gpu_early was called) */
+#define KBASE_RESET_GPU_HAPPENING 3
+
+ /* Work queue and work item for performing the reset in */
+ struct workqueue_struct *reset_workq;
+ struct work_struct reset_work;
+ wait_queue_head_t reset_wait;
+ struct hrtimer reset_timer;
+};
+
+/**
+ * struct kbase_jd_atom_backend - GPU backend specific katom data
+ */
+struct kbase_jd_atom_backend {
+};
+
+/**
+ * struct kbase_context_backend - GPU backend specific context data
+ */
+struct kbase_context_backend {
+};
+
+#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
new file mode 100644
index 00000000000..508394ea025
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c
@@ -0,0 +1,1477 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel job manager APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+
+#define beenthere(kctx, f, a...) \
+ dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+u64 mali_js0_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
+u64 mali_js1_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
+u64 mali_js2_affinity_mask = 0xFFFFFFFFFFFFFFFFULL;
+#endif
+
+#if KBASE_GPU_RESET_EN
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev);
+static void kbasep_reset_timeout_worker(struct work_struct *data);
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer);
+#endif /* KBASE_GPU_RESET_EN */
+
+static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js,
+ struct kbase_context *kctx)
+{
+ return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx);
+}
+
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom,
+ int js)
+{
+ struct kbase_context *kctx;
+ u32 cfg;
+ u64 jc_head = katom->jc;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+ KBASE_DEBUG_ASSERT(katom);
+
+ kctx = katom->kctx;
+
+ /* Command register must be available */
+ KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx));
+ /* Affinity is not violating */
+ kbase_js_debug_log_current_affinities(kbdev);
+ KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js,
+ katom->affinity));
+
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO),
+ jc_head & 0xFFFFFFFF, kctx);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI),
+ jc_head >> 32, kctx);
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+ {
+ u64 mask;
+ u32 value;
+
+ if (0 == js)
+ mask = mali_js0_affinity_mask;
+ else if (1 == js)
+ mask = mali_js1_affinity_mask;
+ else
+ mask = mali_js2_affinity_mask;
+
+ value = katom->affinity & (mask & 0xFFFFFFFF);
+
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+ value, kctx);
+
+ value = (katom->affinity >> 32) & ((mask>>32) & 0xFFFFFFFF);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+ value, kctx);
+ }
+#else
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO),
+ katom->affinity & 0xFFFFFFFF, kctx);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI),
+ katom->affinity >> 32, kctx);
+#endif
+
+ /* start MMU, medium priority, cache clean/flush on end, clean/flush on
+ * start */
+ cfg = kctx->as_nr;
+
+ cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
+ cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE;
+
+ cfg |= JS_CONFIG_START_MMU;
+ cfg |= JS_CONFIG_THREAD_PRI(8);
+
+ if (kbase_hw_has_feature(kbdev,
+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+ if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) {
+ cfg |= JS_CONFIG_JOB_CHAIN_FLAG;
+ katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN;
+ kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+ true;
+ } else {
+ katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN;
+ kbdev->hwaccess.backend.slot_rb[js].job_chain_flag =
+ false;
+ }
+ }
+
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx);
+
+
+ /* Write an approximate start timestamp.
+ * It's approximate because there might be a job in the HEAD register.
+ * In such cases, we'll try to make a better approximation in the IRQ
+ * handler (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */
+ katom->start_timestamp = ktime_get();
+
+ /* GO ! */
+ dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx",
+ katom, kctx, js, jc_head, katom->affinity);
+
+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js,
+ (u32) katom->affinity);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+ kbase_trace_mali_job_slots_event(
+ GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js),
+ kctx, kbase_jd_atom_id(kctx, katom));
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_tl_ret_atom_lpu(
+ katom,
+ &kbdev->gpu_props.props.raw_props.js_features[js]);
+#endif
+#ifdef CONFIG_GPU_TRACEPOINTS
+ if (kbase_backend_nr_atoms_submitted(kbdev, js) == 1) {
+ /* If this is the only job on the slot, trace it as starting */
+ char js_string[16];
+
+ trace_gpu_sched_switch(
+ kbasep_make_job_slot_string(js, js_string),
+ ktime_to_ns(katom->start_timestamp),
+ (u32)katom->kctx, 0, katom->work_id);
+ kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx;
+ }
+#endif
+ kbase_timeline_job_slot_submit(kbdev, kctx, katom, js);
+
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+ JS_COMMAND_START, katom->kctx);
+}
+
+/**
+ * kbasep_job_slot_update_head_start_timestamp - Update timestamp
+ * @kbdev: kbase device
+ * @js: job slot
+ * @end_timestamp: timestamp
+ *
+ * Update the start_timestamp of the job currently in the HEAD, based on the
+ * fact that we got an IRQ for the previous set of completed jobs.
+ *
+ * The estimate also takes into account the %KBASE_JS_IRQ_THROTTLE_TIME_US and
+ * the time the job was submitted, to work out the best estimate (which might
+ * still result in an over-estimate to the calculated time spent)
+ */
+static void kbasep_job_slot_update_head_start_timestamp(
+ struct kbase_device *kbdev,
+ int js,
+ ktime_t end_timestamp)
+{
+ if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) {
+ struct kbase_jd_atom *katom;
+ ktime_t new_timestamp;
+ ktime_t timestamp_diff;
+ /* The atom in the HEAD */
+ katom = kbase_gpu_inspect(kbdev, js, 0);
+
+ KBASE_DEBUG_ASSERT(katom != NULL);
+
+ /* Account for any IRQ Throttle time - makes an overestimate of
+ * the time spent by the job */
+ new_timestamp = ktime_sub_ns(end_timestamp,
+ KBASE_JS_IRQ_THROTTLE_TIME_US * 1000);
+ timestamp_diff = ktime_sub(new_timestamp,
+ katom->start_timestamp);
+ if (ktime_to_ns(timestamp_diff) >= 0) {
+ /* Only update the timestamp if it's a better estimate
+ * than what's currently stored. This is because our
+ * estimate that accounts for the throttle time may be
+ * too much of an overestimate */
+ katom->start_timestamp = new_timestamp;
+ }
+ }
+}
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done)
+{
+ unsigned long flags;
+ int i;
+ u32 count = 0;
+ ktime_t end_timestamp = ktime_get();
+ struct kbasep_js_device_data *js_devdata;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+ js_devdata = &kbdev->js_data;
+
+ KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done);
+
+ memset(&kbdev->slot_submit_count_irq[0], 0,
+ sizeof(kbdev->slot_submit_count_irq));
+
+ /* write irq throttle register, this will prevent irqs from occurring
+ * until the given number of gpu clock cycles have passed */
+ {
+ int irq_throttle_cycles =
+ atomic_read(&kbdev->irq_throttle_cycles);
+
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE),
+ irq_throttle_cycles, NULL);
+ }
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ while (done) {
+ u32 failed = done >> 16;
+
+ /* treat failed slots as finished slots */
+ u32 finished = (done & 0xFFFF) | failed;
+
+ /* Note: This is inherently unfair, as we always check
+ * for lower numbered interrupts before the higher
+ * numbered ones.*/
+ i = ffs(finished) - 1;
+ KBASE_DEBUG_ASSERT(i >= 0);
+
+ do {
+ int nr_done;
+ u32 active;
+ u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */
+ u64 job_tail = 0;
+
+ if (failed & (1u << i)) {
+ /* read out the job slot status code if the job
+ * slot reported failure */
+ completion_code = kbase_reg_read(kbdev,
+ JOB_SLOT_REG(i, JS_STATUS), NULL);
+
+ switch (completion_code) {
+ case BASE_JD_EVENT_STOPPED:
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+ kbase_trace_mali_job_slots_event(
+ GATOR_MAKE_EVENT(
+ GATOR_JOB_SLOT_SOFT_STOPPED, i),
+ NULL, 0);
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_aux_job_softstop(i);
+#endif
+ /* Soft-stopped job - read the value of
+ * JS<n>_TAIL so that the job chain can
+ * be resumed */
+ job_tail = (u64)kbase_reg_read(kbdev,
+ JOB_SLOT_REG(i, JS_TAIL_LO),
+ NULL) |
+ ((u64)kbase_reg_read(kbdev,
+ JOB_SLOT_REG(i, JS_TAIL_HI),
+ NULL) << 32);
+ break;
+ case BASE_JD_EVENT_NOT_STARTED:
+ /* PRLAM-10673 can cause a TERMINATED
+ * job to come back as NOT_STARTED, but
+ * the error interrupt helps us detect
+ * it */
+ completion_code =
+ BASE_JD_EVENT_TERMINATED;
+ /* fall through */
+ default:
+ dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)",
+ i, completion_code,
+ kbase_exception_name
+ (kbdev,
+ completion_code));
+ }
+
+ kbase_gpu_irq_evict(kbdev, i);
+ }
+
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+ done & ((1 << i) | (1 << (i + 16))),
+ NULL);
+ active = kbase_reg_read(kbdev,
+ JOB_CONTROL_REG(JOB_IRQ_JS_STATE),
+ NULL);
+
+ if (((active >> i) & 1) == 0 &&
+ (((done >> (i + 16)) & 1) == 0)) {
+ /* There is a potential race we must work
+ * around:
+ *
+ * 1. A job slot has a job in both current and
+ * next registers
+ * 2. The job in current completes
+ * successfully, the IRQ handler reads
+ * RAWSTAT and calls this function with the
+ * relevant bit set in "done"
+ * 3. The job in the next registers becomes the
+ * current job on the GPU
+ * 4. Sometime before the JOB_IRQ_CLEAR line
+ * above the job on the GPU _fails_
+ * 5. The IRQ_CLEAR clears the done bit but not
+ * the failed bit. This atomically sets
+ * JOB_IRQ_JS_STATE. However since both jobs
+ * have now completed the relevant bits for
+ * the slot are set to 0.
+ *
+ * If we now did nothing then we'd incorrectly
+ * assume that _both_ jobs had completed
+ * successfully (since we haven't yet observed
+ * the fail bit being set in RAWSTAT).
+ *
+ * So at this point if there are no active jobs
+ * left we check to see if RAWSTAT has a failure
+ * bit set for the job slot. If it does we know
+ * that there has been a new failure that we
+ * didn't previously know about, so we make sure
+ * that we record this in active (but we wait
+ * for the next loop to deal with it).
+ *
+ * If we were handling a job failure (i.e. done
+ * has the relevant high bit set) then we know
+ * that the value read back from
+ * JOB_IRQ_JS_STATE is the correct number of
+ * remaining jobs because the failed job will
+ * have prevented any futher jobs from starting
+ * execution.
+ */
+ u32 rawstat = kbase_reg_read(kbdev,
+ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+ if ((rawstat >> (i + 16)) & 1) {
+ /* There is a failed job that we've
+ * missed - add it back to active */
+ active |= (1u << i);
+ }
+ }
+
+ dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n",
+ completion_code);
+
+ nr_done = kbase_backend_nr_atoms_submitted(kbdev, i);
+ nr_done -= (active >> i) & 1;
+ nr_done -= (active >> (i + 16)) & 1;
+
+ if (nr_done <= 0) {
+ dev_warn(kbdev->dev, "Spurious interrupt on slot %d",
+ i);
+
+ goto spurious;
+ }
+
+ count += nr_done;
+
+ while (nr_done) {
+ if (nr_done == 1) {
+ kbase_gpu_complete_hw(kbdev, i,
+ completion_code,
+ job_tail,
+ &end_timestamp);
+ kbase_jm_try_kick_all(kbdev);
+ } else {
+ /* More than one job has completed.
+ * Since this is not the last job being
+ * reported this time it must have
+ * passed. This is because the hardware
+ * will not allow further jobs in a job
+ * slot to complete until the failed job
+ * is cleared from the IRQ status.
+ */
+ kbase_gpu_complete_hw(kbdev, i,
+ BASE_JD_EVENT_DONE,
+ 0,
+ &end_timestamp);
+ }
+ nr_done--;
+ }
+ spurious:
+ done = kbase_reg_read(kbdev,
+ JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL);
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) {
+ /* Workaround for missing interrupt caused by
+ * PRLAM-10883 */
+ if (((active >> i) & 1) && (0 ==
+ kbase_reg_read(kbdev,
+ JOB_SLOT_REG(i,
+ JS_STATUS), NULL))) {
+ /* Force job slot to be processed again
+ */
+ done |= (1u << i);
+ }
+ }
+
+ failed = done >> 16;
+ finished = (done & 0xFFFF) | failed;
+ } while (finished & (1 << i));
+
+ kbasep_job_slot_update_head_start_timestamp(kbdev, i,
+ end_timestamp);
+ }
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+#if KBASE_GPU_RESET_EN
+ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+ KBASE_RESET_GPU_COMMITTED) {
+ /* If we're trying to reset the GPU then we might be able to do
+ * it early (without waiting for a timeout) because some jobs
+ * have completed
+ */
+ kbasep_try_reset_gpu_early(kbdev);
+ }
+#endif /* KBASE_GPU_RESET_EN */
+ KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count);
+}
+KBASE_EXPORT_TEST_API(kbase_job_done);
+
+static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev,
+ u16 core_reqs)
+{
+ bool soft_stops_allowed = true;
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) {
+ if ((core_reqs & BASE_JD_REQ_T) != 0)
+ soft_stops_allowed = false;
+ }
+ return soft_stops_allowed;
+}
+
+static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev,
+ u16 core_reqs)
+{
+ bool hard_stops_allowed = true;
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) {
+ if ((core_reqs & BASE_JD_REQ_T) != 0)
+ hard_stops_allowed = false;
+ }
+ return hard_stops_allowed;
+}
+
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+ int js,
+ u32 action,
+ u16 core_reqs,
+ struct kbase_jd_atom *target_katom)
+{
+ struct kbase_context *kctx = target_katom->kctx;
+#if KBASE_TRACE_ENABLE
+ u32 status_reg_before;
+ u64 job_in_head_before;
+ u32 status_reg_after;
+
+ KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK)));
+
+ /* Check the head pointer */
+ job_in_head_before = ((u64) kbase_reg_read(kbdev,
+ JOB_SLOT_REG(js, JS_HEAD_LO), NULL))
+ | (((u64) kbase_reg_read(kbdev,
+ JOB_SLOT_REG(js, JS_HEAD_HI), NULL))
+ << 32);
+ status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+ NULL);
+#endif
+
+ if (action == JS_COMMAND_SOFT_STOP) {
+ bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev,
+ core_reqs);
+
+ if (!soft_stop_allowed) {
+#ifdef CONFIG_MALI_DEBUG
+ dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X",
+ (unsigned int)core_reqs);
+#endif /* CONFIG_MALI_DEBUG */
+ return;
+ }
+
+ /* We are about to issue a soft stop, so mark the atom as having
+ * been soft stopped */
+ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED;
+ }
+
+ if (action == JS_COMMAND_HARD_STOP) {
+ bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev,
+ core_reqs);
+
+ if (!hard_stop_allowed) {
+ /* Jobs can be hard-stopped for the following reasons:
+ * * CFS decides the job has been running too long (and
+ * soft-stop has not occurred). In this case the GPU
+ * will be reset by CFS if the job remains on the
+ * GPU.
+ *
+ * * The context is destroyed, kbase_jd_zap_context
+ * will attempt to hard-stop the job. However it also
+ * has a watchdog which will cause the GPU to be
+ * reset if the job remains on the GPU.
+ *
+ * * An (unhandled) MMU fault occurred. As long as
+ * BASE_HW_ISSUE_8245 is defined then the GPU will be
+ * reset.
+ *
+ * All three cases result in the GPU being reset if the
+ * hard-stop fails, so it is safe to just return and
+ * ignore the hard-stop request.
+ */
+ dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X",
+ (unsigned int)core_reqs);
+ return;
+ }
+ target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED;
+ }
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) &&
+ action == JS_COMMAND_SOFT_STOP) {
+ int i;
+
+ for (i = 0; i < kbase_backend_nr_atoms_submitted(kbdev, js);
+ i++) {
+ struct kbase_jd_atom *katom;
+
+ katom = kbase_gpu_inspect(kbdev, js, i);
+
+ KBASE_DEBUG_ASSERT(katom);
+
+ /* For HW_ISSUE_8316, only 'bad' jobs attacking the
+ * system can cause this issue: normally, all memory
+ * should be allocated in multiples of 4 pages, and
+ * growable memory should be changed size in multiples
+ * of 4 pages.
+ *
+ * Whilst such 'bad' jobs can be cleared by a GPU reset,
+ * the locking up of a uTLB entry caused by the bad job
+ * could also stall other ASs, meaning that other ASs'
+ * jobs don't complete in the 'grace' period before the
+ * reset. We don't want to lose other ASs' jobs when
+ * they would normally complete fine, so we must 'poke'
+ * the MMU regularly to help other ASs complete */
+ kbase_as_poking_timer_retain_atom(kbdev, katom->kctx,
+ katom);
+ }
+ }
+
+ if (kbase_hw_has_feature(kbdev,
+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) {
+ if (action == JS_COMMAND_SOFT_STOP)
+ action = (target_katom->atom_flags &
+ KBASE_KATOM_FLAGS_JOBCHAIN) ?
+ JS_COMMAND_SOFT_STOP_1 :
+ JS_COMMAND_SOFT_STOP_0;
+ else
+ action = (target_katom->atom_flags &
+ KBASE_KATOM_FLAGS_JOBCHAIN) ?
+ JS_COMMAND_HARD_STOP_1 :
+ JS_COMMAND_HARD_STOP_0;
+ }
+
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx);
+
+#if KBASE_TRACE_ENABLE
+ status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS),
+ NULL);
+ if (status_reg_after == BASE_JD_EVENT_ACTIVE) {
+ struct kbase_jd_atom *head;
+ struct kbase_context *head_kctx;
+
+ head = kbase_gpu_inspect(kbdev, js, 0);
+ head_kctx = head->kctx;
+
+ if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx,
+ head, job_in_head_before, js);
+ else
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+ 0, js);
+
+ switch (action) {
+ case JS_COMMAND_SOFT_STOP:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx,
+ head, head->jc, js);
+ break;
+ case JS_COMMAND_SOFT_STOP_0:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx,
+ head, head->jc, js);
+ break;
+ case JS_COMMAND_SOFT_STOP_1:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx,
+ head, head->jc, js);
+ break;
+ case JS_COMMAND_HARD_STOP:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx,
+ head, head->jc, js);
+ break;
+ case JS_COMMAND_HARD_STOP_0:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx,
+ head, head->jc, js);
+ break;
+ case JS_COMMAND_HARD_STOP_1:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx,
+ head, head->jc, js);
+ break;
+ default:
+ BUG();
+ break;
+ }
+ } else {
+ if (status_reg_before == BASE_JD_EVENT_ACTIVE)
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+ job_in_head_before, js);
+ else
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL,
+ 0, js);
+
+ switch (action) {
+ case JS_COMMAND_SOFT_STOP:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0,
+ js);
+ break;
+ case JS_COMMAND_SOFT_STOP_0:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL,
+ 0, js);
+ break;
+ case JS_COMMAND_SOFT_STOP_1:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL,
+ 0, js);
+ break;
+ case JS_COMMAND_HARD_STOP:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0,
+ js);
+ break;
+ case JS_COMMAND_HARD_STOP_0:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL,
+ 0, js);
+ break;
+ case JS_COMMAND_HARD_STOP_1:
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL,
+ 0, js);
+ break;
+ default:
+ BUG();
+ break;
+ }
+ }
+#endif
+}
+
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx)
+{
+ unsigned long flags;
+ struct kbase_device *kbdev;
+ struct kbasep_js_device_data *js_devdata;
+ int i;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ kbdev = kctx->kbdev;
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ js_devdata = &kbdev->js_data;
+
+ /* Cancel any remaining running jobs for this kctx */
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ /* Invalidate all jobs in context, to prevent re-submitting */
+ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+ if (!work_pending(&kctx->jctx.atoms[i].work))
+ kctx->jctx.atoms[i].event_code =
+ BASE_JD_EVENT_JOB_CANCELLED;
+ }
+
+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+ kbase_job_slot_hardstop(kctx, i, NULL);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+ struct kbase_jd_atom *target_katom)
+{
+ struct kbase_device *kbdev;
+ struct kbasep_js_device_data *js_devdata;
+ int js = target_katom->slot_nr;
+ int priority = target_katom->sched_priority;
+ int i;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ kbdev = kctx->kbdev;
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ js_devdata = &kbdev->js_data;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) {
+ struct kbase_jd_atom *katom;
+
+ katom = kbase_gpu_inspect(kbdev, js, i);
+ if (!katom)
+ continue;
+
+ if (katom->kctx != kctx)
+ continue;
+
+ if (katom->sched_priority > priority)
+ kbase_job_slot_softstop(kbdev, js, katom);
+ }
+}
+
+struct zap_reset_data {
+ /* The stages are:
+ * 1. The timer has never been called
+ * 2. The zap has timed out, all slots are soft-stopped - the GPU reset
+ * will happen. The GPU has been reset when
+ * kbdev->hwaccess.backend.reset_waitq is signalled
+ *
+ * (-1 - The timer has been cancelled)
+ */
+ int stage;
+ struct kbase_device *kbdev;
+ struct hrtimer timer;
+ spinlock_t lock; /* protects updates to stage member */
+};
+
+static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer)
+{
+ struct zap_reset_data *reset_data = container_of(timer,
+ struct zap_reset_data, timer);
+ struct kbase_device *kbdev = reset_data->kbdev;
+ unsigned long flags;
+
+ spin_lock_irqsave(&reset_data->lock, flags);
+
+ if (reset_data->stage == -1)
+ goto out;
+
+#if KBASE_GPU_RESET_EN
+ if (kbase_prepare_to_reset_gpu(kbdev)) {
+ dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n",
+ ZAP_TIMEOUT);
+ kbase_reset_gpu(kbdev);
+ }
+#endif /* KBASE_GPU_RESET_EN */
+ reset_data->stage = 2;
+
+ out:
+ spin_unlock_irqrestore(&reset_data->lock, flags);
+
+ return HRTIMER_NORESTART;
+}
+
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct zap_reset_data reset_data;
+ unsigned long flags;
+
+ hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ reset_data.timer.function = zap_timeout_callback;
+
+ spin_lock_init(&reset_data.lock);
+
+ reset_data.kbdev = kbdev;
+ reset_data.stage = 1;
+
+ hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT),
+ HRTIMER_MODE_REL);
+
+ /* Wait for all jobs to finish, and for the context to be not-scheduled
+ * (due to kbase_job_zap_context(), we also guarentee it's not in the JS
+ * policy queue either */
+ wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0);
+ wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+ kctx->jctx.sched_info.ctx.is_scheduled == false);
+
+ spin_lock_irqsave(&reset_data.lock, flags);
+ if (reset_data.stage == 1) {
+ /* The timer hasn't run yet - so cancel it */
+ reset_data.stage = -1;
+ }
+ spin_unlock_irqrestore(&reset_data.lock, flags);
+
+ hrtimer_cancel(&reset_data.timer);
+
+ if (reset_data.stage == 2) {
+ /* The reset has already started.
+ * Wait for the reset to complete
+ */
+ wait_event(kbdev->hwaccess.backend.reset_wait,
+ atomic_read(&kbdev->hwaccess.backend.reset_gpu)
+ == KBASE_RESET_GPU_NOT_PENDING);
+ }
+ destroy_hrtimer_on_stack(&reset_data.timer);
+
+ dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx);
+
+ /* Ensure that the signallers of the waitqs have finished */
+ mutex_lock(&kctx->jctx.lock);
+ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+ mutex_unlock(&kctx->jctx.lock);
+}
+
+int kbase_job_slot_init(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+ kbdev->hwaccess.backend.reset_workq = alloc_workqueue(
+ "Mali reset workqueue", 0, 1);
+ if (NULL == kbdev->hwaccess.backend.reset_workq)
+ return -EINVAL;
+
+ KBASE_DEBUG_ASSERT(0 ==
+ object_is_on_stack(&kbdev->hwaccess.backend.reset_work));
+ INIT_WORK(&kbdev->hwaccess.backend.reset_work,
+ kbasep_reset_timeout_worker);
+
+ hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ kbdev->hwaccess.backend.reset_timer.function =
+ kbasep_reset_timer_callback;
+#endif
+
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_init);
+
+void kbase_job_slot_halt(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+void kbase_job_slot_term(struct kbase_device *kbdev)
+{
+#if KBASE_GPU_RESET_EN
+ destroy_workqueue(kbdev->hwaccess.backend.reset_workq);
+#endif
+}
+KBASE_EXPORT_TEST_API(kbase_job_slot_term);
+
+/**
+ * kbase_job_slot_softstop_swflags - Soft-stop a job with flags
+ * @kbdev: The kbase device
+ * @js: The job slot to soft-stop
+ * @target_katom: The job that should be soft-stopped (or NULL for any job)
+ * @sw_flags: Flags to pass in about the soft-stop
+ *
+ * Context:
+ * The job slot lock must be held when calling this function.
+ * The job slot must not already be in the process of being soft-stopped.
+ *
+ * Soft-stop the specified job slot, with extra information about the stop
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+ struct kbase_jd_atom *target_katom, u32 sw_flags)
+{
+ KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK));
+ kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom,
+ JS_COMMAND_SOFT_STOP | sw_flags);
+}
+
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ * @kbdev: The kbase device
+ * @js: The job slot to soft-stop
+ * @target_katom: The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ * The job slot lock must be held when calling this function.
+ * The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+ struct kbase_jd_atom *target_katom)
+{
+ kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
+}
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx: The kbase context that contains the job(s) that should
+ * be hard-stopped
+ * @js: The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ * jobs from the context)
+ * Context:
+ * The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+ struct kbase_jd_atom *target_katom)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ bool stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js,
+ target_katom,
+ JS_COMMAND_HARD_STOP);
+#if KBASE_GPU_RESET_EN
+ if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) ||
+ kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) ||
+ (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_T76X_3542) &&
+ (target_katom == NULL || target_katom->core_req &
+ BASE_JD_REQ_FS_AFBC)))) {
+ /* MIDBASE-2916 if a fragment job with AFBC encoding is
+ * hardstopped, ensure to do a soft reset also in order to
+ * clear the GPU status.
+ * Workaround for HW issue 8401 has an issue,so after
+ * hard-stopping just reset the GPU. This will ensure that the
+ * jobs leave the GPU.*/
+ if (kbase_prepare_to_reset_gpu_locked(kbdev)) {
+ dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue");
+ kbase_reset_gpu_locked(kbdev);
+ }
+ }
+#endif
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft/hard-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+ u16 core_reqs, struct kbase_jd_atom *target_katom)
+{
+ u32 hw_action = action & JS_COMMAND_MASK;
+
+ /* For hard-stop, don't enter if hard-stop not allowed */
+ if (hw_action == JS_COMMAND_HARD_STOP &&
+ !kbasep_hard_stop_allowed(kbdev, core_reqs))
+ return;
+
+ /* For soft-stop, don't enter if soft-stop not allowed, or isn't
+ * causing disjoint */
+ if (hw_action == JS_COMMAND_SOFT_STOP &&
+ !(kbasep_soft_stop_allowed(kbdev, core_reqs) &&
+ (action & JS_COMMAND_SW_CAUSES_DISJOINT)))
+ return;
+
+ /* Nothing to do if already logged disjoint state on this atom */
+ if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT)
+ return;
+
+ target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT;
+ kbase_disjoint_state_up(kbdev);
+}
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+ struct kbase_jd_atom *target_katom)
+{
+ if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) {
+ target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT;
+ kbase_disjoint_state_down(kbdev);
+ }
+}
+
+
+#if KBASE_GPU_RESET_EN
+static void kbase_debug_dump_registers(struct kbase_device *kbdev)
+{
+ int i;
+
+ dev_err(kbdev->dev, "Register state:");
+ dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x",
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL));
+ dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x",
+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL),
+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL),
+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL));
+ for (i = 0; i < 3; i++) {
+ dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x",
+ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS),
+ NULL),
+ i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO),
+ NULL));
+ }
+ dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL));
+ dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x",
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL),
+ kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL),
+ kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL));
+ dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x",
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL));
+ dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x",
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL),
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL));
+}
+
+static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ struct kbase_uk_hwcnt_setup *hwcnt_setup)
+{
+ hwcnt_setup->dump_buffer =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) &
+ 0xffffffff;
+ hwcnt_setup->dump_buffer |= (u64)
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) <<
+ 32;
+ hwcnt_setup->jm_bm =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx);
+ hwcnt_setup->shader_bm =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx);
+ hwcnt_setup->tiler_bm =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx);
+ hwcnt_setup->mmu_l2_bm =
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx);
+}
+
+static void kbasep_reset_timeout_worker(struct work_struct *data)
+{
+ unsigned long flags;
+ struct kbase_device *kbdev;
+ int i;
+ ktime_t end_timestamp = ktime_get();
+ struct kbasep_js_device_data *js_devdata;
+ struct kbase_uk_hwcnt_setup hwcnt_setup = { {0} };
+ enum kbase_instr_state bckp_state;
+ bool try_schedule = false;
+ bool restore_hwc = false;
+
+ u32 mmu_irq_mask;
+
+ KBASE_DEBUG_ASSERT(data);
+
+ kbdev = container_of(data, struct kbase_device,
+ hwaccess.backend.reset_work);
+
+ KBASE_DEBUG_ASSERT(kbdev);
+ js_devdata = &kbdev->js_data;
+
+ KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0);
+
+ /* Make sure the timer has completed - this cannot be done from
+ * interrupt context, so this cannot be done within
+ * kbasep_try_reset_gpu_early. */
+ hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
+ if (kbase_pm_context_active_handle_suspend(kbdev,
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+ /* This would re-activate the GPU. Since it's already idle,
+ * there's no need to reset it */
+ atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+ KBASE_RESET_GPU_NOT_PENDING);
+ kbase_disjoint_state_down(kbdev);
+ wake_up(&kbdev->hwaccess.backend.reset_wait);
+ return;
+ }
+
+ mutex_lock(&kbdev->pm.lock);
+ /* We hold the pm lock, so there ought to be a current policy */
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+ /* All slot have been soft-stopped and we've waited
+ * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we
+ * assume that anything that is still left on the GPU is stuck there and
+ * we'll kill it when we reset the GPU */
+
+ dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
+ RESET_TIMEOUT);
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+
+ if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) {
+ /* the same interrupt handler preempted itself */
+ /* GPU is being reset */
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+ wait_event(kbdev->hwcnt.backend.wait,
+ kbdev->hwcnt.backend.triggered != 0);
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ }
+ /* Save the HW counters setup */
+ if (kbdev->hwcnt.kctx != NULL) {
+ struct kbase_context *kctx = kbdev->hwcnt.kctx;
+
+ if (kctx->jctx.sched_info.ctx.is_scheduled) {
+ kbasep_save_hwcnt_setup(kbdev, kctx, &hwcnt_setup);
+
+ restore_hwc = true;
+ }
+ }
+
+ /* Output the state of some interesting registers to help in the
+ * debugging of GPU resets */
+ kbase_debug_dump_registers(kbdev);
+
+ bckp_state = kbdev->hwcnt.backend.state;
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING;
+ kbdev->hwcnt.backend.triggered = 0;
+
+ mmu_irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+ /* Disable IRQ to avoid IRQ handlers to kick in after releasing the
+ * spinlock; this also clears any outstanding interrupts */
+ kbase_pm_disable_interrupts(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+ /* Ensure that any IRQ handlers have finished
+ * Must be done without any locks IRQ handlers will take */
+ kbase_synchronize_irqs(kbdev);
+
+ /* Reset the GPU */
+ kbase_pm_init_hw(kbdev, 0);
+ kbase_pm_enable_interrupts_mmu_mask(kbdev, mmu_irq_mask);
+ /* IRQs were re-enabled by kbase_pm_init_hw, and GPU is still powered */
+
+ spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+ /* Restore the HW counters setup */
+ if (restore_hwc) {
+ struct kbase_context *kctx = kbdev->hwcnt.kctx;
+
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+ (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
+ PRFCNT_CONFIG_MODE_OFF, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO),
+ hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI),
+ hwcnt_setup.dump_buffer >> 32, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
+ hwcnt_setup.jm_bm, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
+ hwcnt_setup.shader_bm, kctx);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN),
+ hwcnt_setup.mmu_l2_bm, kctx);
+
+ /* Due to PRLAM-8186 we need to disable the Tiler before we
+ * enable the HW counter dump. */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+ 0, kctx);
+ else
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+ hwcnt_setup.tiler_bm, kctx);
+
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG),
+ (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) |
+ PRFCNT_CONFIG_MODE_MANUAL, kctx);
+
+ /* If HW has PRLAM-8186 we can now re-enable the tiler HW
+ * counters dump */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186))
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN),
+ hwcnt_setup.tiler_bm, kctx);
+ }
+ kbdev->hwcnt.backend.state = bckp_state;
+ switch (kbdev->hwcnt.backend.state) {
+ /* Cases for waking kbasep_cache_clean_worker worker */
+ case KBASE_INSTR_STATE_CLEANED:
+ /* Cache-clean IRQ occurred, but we reset:
+ * Wakeup incase the waiter saw RESETTING */
+ case KBASE_INSTR_STATE_REQUEST_CLEAN:
+ /* After a clean was requested, but before the regs were
+ * written:
+ * Wakeup incase the waiter saw RESETTING */
+ wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+ break;
+ case KBASE_INSTR_STATE_CLEANING:
+ /* Either:
+ * 1) We've not got the Cache-clean IRQ yet: it was lost, or:
+ * 2) We got it whilst resetting: it was voluntarily lost
+ *
+ * So, move to the next state and wakeup: */
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED;
+ wake_up(&kbdev->hwcnt.backend.cache_clean_wait);
+ break;
+
+ /* Cases for waking anyone else */
+ case KBASE_INSTR_STATE_DUMPING:
+ /* If dumping, abort the dump, because we may've lost the IRQ */
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+ kbdev->hwcnt.backend.triggered = 1;
+ wake_up(&kbdev->hwcnt.backend.wait);
+ break;
+ case KBASE_INSTR_STATE_DISABLED:
+ case KBASE_INSTR_STATE_IDLE:
+ case KBASE_INSTR_STATE_FAULT:
+ /* Every other reason: wakeup in that state */
+ kbdev->hwcnt.backend.triggered = 1;
+ wake_up(&kbdev->hwcnt.backend.wait);
+ break;
+
+ /* Unhandled cases */
+ case KBASE_INSTR_STATE_RESETTING:
+ default:
+ BUG();
+ break;
+ }
+ spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+
+ /* Complete any jobs that were still on the GPU */
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ kbase_backend_reset(kbdev, &end_timestamp);
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ mutex_unlock(&kbdev->pm.lock);
+
+ mutex_lock(&js_devdata->runpool_mutex);
+
+ /* Reprogram the GPU's MMU */
+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+ struct kbase_as *as = &kbdev->as[i];
+
+ mutex_lock(&as->transaction_mutex);
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ if (js_devdata->runpool_irq.per_as_data[i].kctx)
+ kbase_mmu_update(
+ js_devdata->runpool_irq.per_as_data[i].kctx);
+ else
+ kbase_mmu_disable_as(kbdev, i);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ mutex_unlock(&as->transaction_mutex);
+ }
+
+ atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+ KBASE_RESET_GPU_NOT_PENDING);
+
+ kbase_disjoint_state_down(kbdev);
+
+ wake_up(&kbdev->hwaccess.backend.reset_wait);
+ dev_err(kbdev->dev, "Reset complete");
+
+ if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending)
+ try_schedule = true;
+
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ mutex_lock(&kbdev->pm.lock);
+
+ /* Find out what cores are required now */
+ kbase_pm_update_cores_state(kbdev);
+
+ /* Synchronously request and wait for those cores, because if
+ * instrumentation is enabled it would need them immediately. */
+ kbase_pm_check_transitions_sync(kbdev);
+
+ mutex_unlock(&kbdev->pm.lock);
+
+ /* Try submitting some jobs to restart processing */
+ if (try_schedule) {
+ KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u,
+ 0);
+ kbase_js_sched_all(kbdev);
+ }
+
+ kbase_pm_context_idle(kbdev);
+ KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0);
+}
+
+static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer)
+{
+ struct kbase_device *kbdev = container_of(timer, struct kbase_device,
+ hwaccess.backend.reset_timer);
+
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ /* Reset still pending? */
+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+ KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) ==
+ KBASE_RESET_GPU_COMMITTED)
+ queue_work(kbdev->hwaccess.backend.reset_workq,
+ &kbdev->hwaccess.backend.reset_work);
+
+ return HRTIMER_NORESTART;
+}
+
+/*
+ * If all jobs are evicted from the GPU then we can reset the GPU
+ * immediately instead of waiting for the timeout to elapse
+ */
+
+static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev)
+{
+ int i;
+ int pending_jobs = 0;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ /* Count the number of jobs */
+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+ pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i);
+
+ if (pending_jobs > 0) {
+ /* There are still jobs on the GPU - wait */
+ return;
+ }
+
+ /* Check that the reset has been committed to (i.e. kbase_reset_gpu has
+ * been called), and that no other thread beat this thread to starting
+ * the reset */
+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+ KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) !=
+ KBASE_RESET_GPU_COMMITTED) {
+ /* Reset has already occurred */
+ return;
+ }
+
+ queue_work(kbdev->hwaccess.backend.reset_workq,
+ &kbdev->hwaccess.backend.reset_work);
+}
+
+static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ struct kbasep_js_device_data *js_devdata;
+
+ js_devdata = &kbdev->js_data;
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ kbasep_try_reset_gpu_early_locked(kbdev);
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU
+ * @kbdev: kbase device
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return:
+ * The function returns a boolean which should be interpreted as follows:
+ * true - Prepared for reset, kbase_reset_gpu should be called.
+ * false - Another thread is performing a reset, kbase_reset_gpu should
+ * not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev)
+{
+ int i;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu,
+ KBASE_RESET_GPU_NOT_PENDING,
+ KBASE_RESET_GPU_PREPARED) !=
+ KBASE_RESET_GPU_NOT_PENDING) {
+ /* Some other thread is already resetting the GPU */
+ return false;
+ }
+
+ kbase_disjoint_state_up(kbdev);
+
+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
+ kbase_job_slot_softstop(kbdev, i, NULL);
+
+ return true;
+}
+
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ bool ret;
+ struct kbasep_js_device_data *js_devdata;
+
+ js_devdata = &kbdev->js_data;
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ ret = kbase_prepare_to_reset_gpu_locked(kbdev);
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);
+
+/*
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for
+ * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset
+ * has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ /* Note this is an assert/atomic_set because it is a software issue for
+ * a race to be occuring here */
+ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+ KBASE_RESET_GPU_PREPARED);
+ atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+ KBASE_RESET_GPU_COMMITTED);
+
+ dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+ kbdev->reset_timeout_ms);
+
+ hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+ HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+ HRTIMER_MODE_REL);
+
+ /* Try resetting early */
+ kbasep_try_reset_gpu_early(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_reset_gpu);
+
+void kbase_reset_gpu_locked(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ /* Note this is an assert/atomic_set because it is a software issue for
+ * a race to be occuring here */
+ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) ==
+ KBASE_RESET_GPU_PREPARED);
+ atomic_set(&kbdev->hwaccess.backend.reset_gpu,
+ KBASE_RESET_GPU_COMMITTED);
+
+ dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n",
+ kbdev->reset_timeout_ms);
+ hrtimer_start(&kbdev->hwaccess.backend.reset_timer,
+ HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms),
+ HRTIMER_MODE_REL);
+
+ /* Try resetting early */
+ kbasep_try_reset_gpu_early_locked(kbdev);
+}
+#endif /* KBASE_GPU_RESET_EN */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
new file mode 100644
index 00000000000..3b959d5b447
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h
@@ -0,0 +1,185 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Manager backend-specific low-level APIs.
+ */
+
+#ifndef _KBASE_JM_HWACCESS_H_
+#define _KBASE_JM_HWACCESS_H_
+
+#include <mali_kbase_hw.h>
+#include <mali_kbase_debug.h>
+#include <linux/atomic.h>
+
+#include <backend/gpu/mali_kbase_jm_rb.h>
+
+/**
+ * kbase_job_submit_nolock() - Submit a job to a certain job-slot
+ * @kbdev: Device pointer
+ * @katom: Atom to submit
+ * @js: Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_submit_nolock(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom, int js);
+
+/**
+ * kbase_job_done_slot() - Complete the head job on a particular job-slot
+ * @kbdev: Device pointer
+ * @s: Job slot
+ * @completion_code: Completion code of job reported by GPU
+ * @job_tail: Job tail address reported by GPU
+ * @end_timestamp: Timestamp of job completion
+ */
+void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
+ u64 job_tail, ktime_t *end_timestamp);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+static inline char *kbasep_make_job_slot_string(int js, char *js_string)
+{
+ sprintf(js_string, "job_slot_%i", js);
+ return js_string;
+}
+#endif
+
+/**
+ * kbase_job_hw_submit() - Submit a job to the GPU
+ * @kbdev: Device pointer
+ * @katom: Atom to submit
+ * @js: Job slot to submit on
+ *
+ * The caller must check kbasep_jm_is_submit_slots_free() != false before
+ * calling this.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpoool_irq::lock
+ */
+void kbase_job_hw_submit(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom,
+ int js);
+
+/**
+ * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop
+ * on the specified atom
+ * @kbdev: Device pointer
+ * @js: Job slot to stop on
+ * @action: The action to perform, either JSn_COMMAND_HARD_STOP or
+ * JSn_COMMAND_SOFT_STOP
+ * @core_reqs: Core requirements of atom to stop
+ * @target_katom: Atom to stop
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev,
+ int js,
+ u32 action,
+ u16 core_reqs,
+ struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job
+ * slot belonging to a given context.
+ * @kbdev: Device pointer
+ * @kctx: Context pointer. May be NULL
+ * @katom: Specific atom to stop. May be NULL
+ * @js: Job slot to hard stop
+ * @action: The action to perform, either JSn_COMMAND_HARD_STOP or
+ * JSn_COMMAND_SOFT_STOP
+ *
+ * If no context is provided then all jobs on the slot will be soft or hard
+ * stopped.
+ *
+ * If a katom is provided then only that specific atom will be stopped. In this
+ * case the kctx parameter is ignored.
+ *
+ * Jobs that are on the slot but are not yet on the GPU will be unpulled and
+ * returned to the job scheduler.
+ *
+ * Return: true if an atom was stopped, false otherwise
+ */
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ int js,
+ struct kbase_jd_atom *katom,
+ u32 action);
+
+/**
+ * kbase_job_slot_init - Initialise job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_job_slot_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_halt - Halt the job slot framework
+ * @kbdev: Device pointer
+ *
+ * Should prevent any further job slot processing
+ */
+void kbase_job_slot_halt(struct kbase_device *kbdev);
+
+/**
+ * kbase_job_slot_term - Terminate job slot framework
+ * @kbdev: Device pointer
+ *
+ * Called on driver termination
+ */
+void kbase_job_slot_term(struct kbase_device *kbdev);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ * not be called.
+ */
+bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu_locked - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it
+ * returns true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu_locked(struct kbase_device *kbdev);
+#endif
+
+#endif /* _KBASE_JM_HWACCESS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
new file mode 100644
index 00000000000..1bf9c709752
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c
@@ -0,0 +1,1407 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_10969_workaround.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* Return whether the specified ringbuffer is empty. HW access lock must be
+ * held */
+#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx)
+/* Return number of atoms currently in the specified ringbuffer. HW access lock
+ * must be held */
+#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom);
+
+/**
+ * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer
+ * @kbdev: Device pointer
+ * @katom: Atom to enqueue
+ *
+ * Context: Caller must hold the HW access lock
+ */
+static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr];
+
+ WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE);
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom;
+ rb->write_idx++;
+
+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+/**
+ * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once
+ * it has been completed
+ * @kbdev: Device pointer
+ * @js: Job slot to remove atom from
+ *
+ * Context: Caller must hold the HW access lock
+ *
+ * Return: Atom removed from ringbuffer
+ */
+static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev,
+ int js)
+{
+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+ struct kbase_jd_atom *katom;
+
+ if (SLOT_RB_EMPTY(rb)) {
+ WARN(1, "GPU ringbuffer unexpectedly empty\n");
+ return NULL;
+ }
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom;
+
+ kbase_gpu_release_atom(kbdev, katom);
+
+ rb->read_idx++;
+
+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB;
+
+ kbase_js_debug_log_current_affinities(kbdev);
+
+ return katom;
+}
+
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+ int idx)
+{
+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ if ((SLOT_RB_ENTRIES(rb) - 1) < idx)
+ return NULL; /* idx out of range */
+
+ return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom;
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+ int js)
+{
+ return kbase_gpu_inspect(kbdev, js, 0);
+}
+
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+ int js)
+{
+ struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js];
+
+ if (SLOT_RB_EMPTY(rb))
+ return NULL;
+
+ return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom;
+}
+
+/**
+ * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently
+ * on the GPU
+ * @kbdev: Device pointer
+ * @js: Job slot to inspect
+ *
+ * Return: true if there are atoms on the GPU for slot js,
+ * false otherwise
+ */
+static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+ int i;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ for (i = 0; i < SLOT_RB_SIZE; i++) {
+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+ if (!katom)
+ return false;
+ if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED ||
+ katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms
+ * currently on the GPU
+ * @kbdev: Device pointer
+ *
+ * Return: true if there are any atoms on the GPU, false otherwise
+ */
+static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev)
+{
+ int js;
+ int i;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ for (i = 0; i < SLOT_RB_SIZE; i++) {
+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+ if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+ return true;
+ }
+ }
+ return false;
+}
+
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js)
+{
+ int nr = 0;
+ int i;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ for (i = 0; i < SLOT_RB_SIZE; i++) {
+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+ if (katom && (katom->gpu_rb_state ==
+ KBASE_ATOM_GPU_RB_SUBMITTED))
+ nr++;
+ }
+
+ return nr;
+}
+
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js)
+{
+ int nr = 0;
+ int i;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ for (i = 0; i < SLOT_RB_SIZE; i++) {
+ if (kbase_gpu_inspect(kbdev, js, i))
+ nr++;
+ }
+
+ return nr;
+}
+
+static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js,
+ enum kbase_atom_gpu_rb_state min_rb_state)
+{
+ int nr = 0;
+ int i;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ for (i = 0; i < SLOT_RB_SIZE; i++) {
+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i);
+
+ if (katom && (katom->gpu_rb_state >= min_rb_state))
+ nr++;
+ }
+
+ return nr;
+}
+
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js)
+{
+ if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) !=
+ KBASE_RESET_GPU_NOT_PENDING) {
+ /* The GPU is being reset - so prevent submission */
+ return 0;
+ }
+
+ return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
+}
+
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom);
+
+static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev,
+ int js,
+ struct kbase_jd_atom *katom)
+{
+ /* The most recently checked affinity. Having this at this scope allows
+ * us to guarantee that we've checked the affinity in this function
+ * call.
+ */
+ u64 recently_chosen_affinity = 0;
+ bool chosen_affinity = false;
+ bool retry;
+
+ do {
+ retry = false;
+
+ /* NOTE: The following uses a number of FALLTHROUGHs to optimize
+ * the calls to this function. Ending of the function is
+ * indicated by BREAK OUT */
+ switch (katom->coreref_state) {
+ /* State when job is first attempted to be run */
+ case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+ KBASE_DEBUG_ASSERT(katom->affinity == 0);
+
+ /* Compute affinity */
+ if (false == kbase_js_choose_affinity(
+ &recently_chosen_affinity, kbdev, katom,
+ js)) {
+ /* No cores are currently available */
+ /* *** BREAK OUT: No state transition *** */
+ break;
+ }
+
+ chosen_affinity = true;
+
+ /* Request the cores */
+ kbase_pm_request_cores(kbdev,
+ katom->core_req & BASE_JD_REQ_T,
+ recently_chosen_affinity);
+
+ katom->affinity = recently_chosen_affinity;
+
+ /* Proceed to next state */
+ katom->coreref_state =
+ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+ case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+ {
+ enum kbase_pm_cores_ready cores_ready;
+
+ KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+ (katom->core_req & BASE_JD_REQ_T));
+
+ cores_ready = kbase_pm_register_inuse_cores(
+ kbdev,
+ katom->core_req & BASE_JD_REQ_T,
+ katom->affinity);
+ if (cores_ready == KBASE_NEW_AFFINITY) {
+ /* Affinity no longer valid - return to
+ * previous state */
+ kbasep_js_job_check_deref_cores(kbdev,
+ katom);
+ KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+ JS_CORE_REF_REGISTER_INUSE_FAILED,
+ katom->kctx, katom,
+ katom->jc, js,
+ (u32) katom->affinity);
+ /* *** BREAK OUT: Return to previous
+ * state, retry *** */
+ retry = true;
+ break;
+ }
+ if (cores_ready == KBASE_CORES_NOT_READY) {
+ /* Stay in this state and return, to
+ * retry at this state later */
+ KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+ JS_CORE_REF_REGISTER_INUSE_FAILED,
+ katom->kctx, katom,
+ katom->jc, js,
+ (u32) katom->affinity);
+ /* *** BREAK OUT: No state transition
+ * *** */
+ break;
+ }
+ /* Proceed to next state */
+ katom->coreref_state =
+ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+ }
+
+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+ case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+ KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+ (katom->core_req & BASE_JD_REQ_T));
+
+ /* Optimize out choosing the affinity twice in the same
+ * function call */
+ if (chosen_affinity == false) {
+ /* See if the affinity changed since a previous
+ * call. */
+ if (false == kbase_js_choose_affinity(
+ &recently_chosen_affinity,
+ kbdev, katom, js)) {
+ /* No cores are currently available */
+ kbasep_js_job_check_deref_cores(kbdev,
+ katom);
+ KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+ JS_CORE_REF_REQUEST_ON_RECHECK_FAILED,
+ katom->kctx, katom,
+ katom->jc, js,
+ (u32) recently_chosen_affinity);
+ /* *** BREAK OUT: Transition to lower
+ * state *** */
+ break;
+ }
+ chosen_affinity = true;
+ }
+
+ /* Now see if this requires a different set of cores */
+ if (recently_chosen_affinity != katom->affinity) {
+ enum kbase_pm_cores_ready cores_ready;
+
+ kbase_pm_request_cores(kbdev,
+ katom->core_req & BASE_JD_REQ_T,
+ recently_chosen_affinity);
+
+ /* Register new cores whilst we still hold the
+ * old ones, to minimize power transitions */
+ cores_ready =
+ kbase_pm_register_inuse_cores(kbdev,
+ katom->core_req & BASE_JD_REQ_T,
+ recently_chosen_affinity);
+ kbasep_js_job_check_deref_cores(kbdev, katom);
+
+ /* Fixup the state that was reduced by
+ * deref_cores: */
+ katom->coreref_state =
+ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+ katom->affinity = recently_chosen_affinity;
+ if (cores_ready == KBASE_NEW_AFFINITY) {
+ /* Affinity no longer valid - return to
+ * previous state */
+ katom->coreref_state =
+ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+
+ kbasep_js_job_check_deref_cores(kbdev,
+ katom);
+
+ KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+ JS_CORE_REF_REGISTER_INUSE_FAILED,
+ katom->kctx, katom,
+ katom->jc, js,
+ (u32) katom->affinity);
+ /* *** BREAK OUT: Return to previous
+ * state, retry *** */
+ retry = true;
+ break;
+ }
+ /* Now might be waiting for powerup again, with
+ * a new affinity */
+ if (cores_ready == KBASE_CORES_NOT_READY) {
+ /* Return to previous state */
+ katom->coreref_state =
+ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES;
+ KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+ JS_CORE_REF_REGISTER_ON_RECHECK_FAILED,
+ katom->kctx, katom,
+ katom->jc, js,
+ (u32) katom->affinity);
+ /* *** BREAK OUT: Transition to lower
+ * state *** */
+ break;
+ }
+ }
+ /* Proceed to next state */
+ katom->coreref_state =
+ KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS;
+
+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+ case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS:
+ KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+ (katom->core_req & BASE_JD_REQ_T));
+ KBASE_DEBUG_ASSERT(katom->affinity ==
+ recently_chosen_affinity);
+
+ /* Note: this is where the caller must've taken the
+ * runpool_irq.lock */
+
+ /* Check for affinity violations - if there are any,
+ * then we just ask the caller to requeue and try again
+ * later */
+ if (kbase_js_affinity_would_violate(kbdev, js,
+ katom->affinity) != false) {
+ /* Return to previous state */
+ katom->coreref_state =
+ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY;
+ /* *** BREAK OUT: Transition to lower state ***
+ */
+ KBASE_TRACE_ADD_SLOT_INFO(kbdev,
+ JS_CORE_REF_AFFINITY_WOULD_VIOLATE,
+ katom->kctx, katom, katom->jc, js,
+ (u32) katom->affinity);
+ break;
+ }
+
+ /* No affinity violations would result, so the cores are
+ * ready */
+ katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY;
+ /* *** BREAK OUT: Cores Ready *** */
+ break;
+
+ default:
+ KBASE_DEBUG_ASSERT_MSG(false,
+ "Unhandled kbase_atom_coreref_state %d",
+ katom->coreref_state);
+ break;
+ }
+ } while (retry != false);
+
+ return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY);
+}
+
+static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(katom != NULL);
+
+ switch (katom->coreref_state) {
+ case KBASE_ATOM_COREREF_STATE_READY:
+ /* State where atom was submitted to the HW - just proceed to
+ * power-down */
+ KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+ (katom->core_req & BASE_JD_REQ_T));
+
+ /* *** FALLTHROUGH *** */
+
+ case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY:
+ /* State where cores were registered */
+ KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+ (katom->core_req & BASE_JD_REQ_T));
+ kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+ katom->affinity);
+
+ break;
+
+ case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES:
+ /* State where cores were requested, but not registered */
+ KBASE_DEBUG_ASSERT(katom->affinity != 0 ||
+ (katom->core_req & BASE_JD_REQ_T));
+ kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T,
+ katom->affinity);
+ break;
+
+ case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED:
+ /* Initial state - nothing required */
+ KBASE_DEBUG_ASSERT(katom->affinity == 0);
+ break;
+
+ default:
+ KBASE_DEBUG_ASSERT_MSG(false,
+ "Unhandled coreref_state: %d",
+ katom->coreref_state);
+ break;
+ }
+
+ katom->affinity = 0;
+ katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+}
+
+
+static void kbase_gpu_release_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ switch (katom->gpu_rb_state) {
+ case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+ /* Should be impossible */
+ WARN(1, "Attempting to release atom not in ringbuffer\n");
+ break;
+
+ case KBASE_ATOM_GPU_RB_SUBMITTED:
+ /* Inform power management at start/finish of atom
+ * so it can update its GPU utilisation metrics. */
+ kbase_pm_metrics_release_atom(kbdev, katom);
+
+ if (katom->core_req & BASE_JD_REQ_PERMON)
+ kbase_pm_release_gpu_cycle_counter(kbdev);
+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+ case KBASE_ATOM_GPU_RB_READY:
+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+ case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+ case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+ kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr,
+ katom->affinity);
+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+ kbasep_js_job_check_deref_cores(kbdev, katom);
+ break;
+
+ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+ /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
+
+ case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+ break;
+ }
+
+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED;
+}
+
+static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ kbase_gpu_release_atom(kbdev, katom);
+ katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+}
+
+static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js)
+{
+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+ bool slot_busy[3];
+
+ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+ return true;
+ slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0,
+ KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+ slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1,
+ KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+ slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2,
+ KBASE_ATOM_GPU_RB_WAITING_AFFINITY);
+
+ if ((js == 2 && !(slot_busy[0] || slot_busy[1])) ||
+ (js != 2 && !slot_busy[2]))
+ return true;
+
+ /* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */
+ if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) ||
+ kbase_gpu_atoms_submitted(kbdev, 1) ||
+ backend->rmu_workaround_flag))
+ return false;
+
+ /* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */
+ if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) ||
+ !backend->rmu_workaround_flag))
+ return false;
+
+ backend->rmu_workaround_flag = !backend->rmu_workaround_flag;
+
+ return true;
+}
+
+static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev)
+{
+ return kbdev->js_data.runpool_irq.secure_mode;
+}
+
+static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev)
+{
+ int err = -EINVAL;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ WARN_ONCE(!kbdev->secure_ops,
+ "Cannot enable secure mode: secure callbacks not specified.\n");
+
+ if (kbdev->secure_ops) {
+ /* Switch GPU to secure mode */
+ err = kbdev->secure_ops->secure_mode_enable(kbdev);
+
+ if (err)
+ dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err);
+ else
+ kbdev->js_data.runpool_irq.secure_mode = true;
+ }
+
+ return err;
+}
+
+static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev)
+{
+ int err = -EINVAL;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ WARN_ONCE(!kbdev->secure_ops,
+ "Cannot disable secure mode: secure callbacks not specified.\n");
+
+ if (kbdev->secure_ops) {
+ /* Switch GPU to non-secure mode */
+ err = kbdev->secure_ops->secure_mode_disable(kbdev);
+
+ if (err)
+ dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err);
+ else
+ kbdev->js_data.runpool_irq.secure_mode = false;
+ }
+
+ return err;
+}
+
+void kbase_gpu_slot_update(struct kbase_device *kbdev)
+{
+ int js;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ struct kbase_jd_atom *katom[2];
+ int idx;
+
+ katom[0] = kbase_gpu_inspect(kbdev, js, 0);
+ katom[1] = kbase_gpu_inspect(kbdev, js, 1);
+ WARN_ON(katom[1] && !katom[0]);
+
+ for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+ bool cores_ready;
+
+ if (!katom[idx])
+ continue;
+
+ switch (katom[idx]->gpu_rb_state) {
+ case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB:
+ /* Should be impossible */
+ WARN(1, "Attempting to update atom not in ringbuffer\n");
+ break;
+
+ case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
+ if (katom[idx]->atom_flags &
+ KBASE_KATOM_FLAG_X_DEP_BLOCKED)
+ break;
+
+ katom[idx]->gpu_rb_state =
+ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE;
+
+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+ case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
+ cores_ready =
+ kbasep_js_job_check_ref_cores(kbdev, js,
+ katom[idx]);
+
+ if (katom[idx]->event_code ==
+ BASE_JD_EVENT_PM_EVENT) {
+ katom[idx]->gpu_rb_state =
+ KBASE_ATOM_GPU_RB_RETURN_TO_JS;
+ break;
+ }
+
+ if (!cores_ready)
+ break;
+
+ kbase_js_affinity_retain_slot_cores(kbdev, js,
+ katom[idx]->affinity);
+ katom[idx]->gpu_rb_state =
+ KBASE_ATOM_GPU_RB_WAITING_AFFINITY;
+
+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+ case KBASE_ATOM_GPU_RB_WAITING_AFFINITY:
+ if (!kbase_gpu_rmu_workaround(kbdev, js))
+ break;
+
+ katom[idx]->gpu_rb_state =
+ KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE;
+
+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+ case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE:
+ if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) {
+ int err = 0;
+
+ /* Not in correct mode, take action */
+ if (kbase_gpu_atoms_submitted_any(kbdev)) {
+ /*
+ * We are not in the correct
+ * GPU mode for this job, and
+ * we can't switch now because
+ * there are jobs already
+ * running.
+ */
+ break;
+ }
+
+ /* No jobs running, so we can switch GPU mode right now */
+ if (kbase_jd_katom_is_secure(katom[idx])) {
+ err = kbase_gpu_secure_mode_enable(kbdev);
+ } else {
+ err = kbase_gpu_secure_mode_disable(kbdev);
+ }
+
+ if (err) {
+ /* Failed to switch secure mode, fail atom */
+ katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID;
+ kbase_gpu_mark_atom_for_return(kbdev, katom[idx]);
+ break;
+ }
+ }
+
+ /* Secure mode sanity checks */
+ KBASE_DEBUG_ASSERT_MSG(
+ kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev),
+ "Secure mode of atom (%d) doesn't match secure mode of GPU (%d)",
+ kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev));
+ KBASE_DEBUG_ASSERT_MSG(
+ (kbase_jd_katom_is_secure(katom[idx]) && js == 0) ||
+ !kbase_jd_katom_is_secure(katom[idx]),
+ "Secure atom on JS%d not supported", js);
+
+ katom[idx]->gpu_rb_state =
+ KBASE_ATOM_GPU_RB_READY;
+
+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+ case KBASE_ATOM_GPU_RB_READY:
+ /* Only submit if head atom or previous atom
+ * already submitted */
+ if (idx == 1 &&
+ (katom[0]->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_SUBMITTED &&
+ katom[0]->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
+ break;
+
+ /* Check if this job needs the cycle counter
+ * enabled before submission */
+ if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+ kbase_pm_request_gpu_cycle_counter_l2_is_on(
+ kbdev);
+
+ /* Inform power management at start/finish of
+ * atom so it can update its GPU utilisation
+ * metrics. */
+ kbase_pm_metrics_run_atom(kbdev, katom[idx]);
+
+ kbase_job_hw_submit(kbdev, katom[idx], js);
+ katom[idx]->gpu_rb_state =
+ KBASE_ATOM_GPU_RB_SUBMITTED;
+
+ /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */
+
+ case KBASE_ATOM_GPU_RB_SUBMITTED:
+ /* Atom submitted to HW, nothing else to do */
+ break;
+
+ case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
+ /* Only return if head atom or previous atom
+ * already removed - as atoms must be returned
+ * in order */
+ if (idx == 0 || katom[0]->gpu_rb_state ==
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+ kbase_gpu_dequeue_atom(kbdev, js);
+ kbase_jm_return_atom_to_js(kbdev,
+ katom[idx]);
+ }
+ break;
+ }
+ }
+ }
+
+ /* Warn if PRLAM-8987 affinity restrictions are violated */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+ WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) ||
+ kbase_gpu_atoms_submitted(kbdev, 1)) &&
+ kbase_gpu_atoms_submitted(kbdev, 2));
+}
+
+
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ kbase_gpu_enqueue_atom(kbdev, katom);
+ kbase_gpu_slot_update(kbdev);
+}
+
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js)
+{
+ struct kbase_jd_atom *katom;
+ struct kbase_jd_atom *next_katom;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ katom = kbase_gpu_inspect(kbdev, js, 0);
+ next_katom = kbase_gpu_inspect(kbdev, js, 1);
+
+ if (next_katom && katom->kctx == next_katom->kctx &&
+ next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED &&
+ (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL)
+ != 0 ||
+ kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL)
+ != 0)) {
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
+ JS_COMMAND_NOP, NULL);
+ next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
+ return true;
+ }
+
+ return false;
+}
+
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+ u32 completion_code,
+ u64 job_tail,
+ ktime_t *end_timestamp)
+{
+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
+ struct kbase_context *kctx = katom->kctx;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) &&
+ completion_code != BASE_JD_EVENT_DONE &&
+ !(completion_code & BASE_JD_SW_EVENT)) {
+ katom->need_cache_flush_cores_retained = katom->affinity;
+ kbase_pm_request_cores(kbdev, false, katom->affinity);
+ } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) {
+ if (kbdev->gpu_props.num_core_groups > 1 &&
+ !(katom->affinity &
+ kbdev->gpu_props.props.coherency_info.group[0].core_mask
+ ) &&
+ (katom->affinity &
+ kbdev->gpu_props.props.coherency_info.group[1].core_mask
+ )) {
+ dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n");
+ katom->need_cache_flush_cores_retained =
+ katom->affinity;
+ kbase_pm_request_cores(kbdev, false,
+ katom->affinity);
+ }
+ }
+
+ katom = kbase_gpu_dequeue_atom(kbdev, js);
+
+ kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0);
+
+ if (completion_code == BASE_JD_EVENT_STOPPED) {
+ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+ 0);
+
+ /*
+ * Dequeue next atom from ringbuffers on same slot if required.
+ * This atom will already have been removed from the NEXT
+ * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that
+ * the atoms on this slot are returned in the correct order.
+ */
+ if (next_katom && katom->kctx == next_katom->kctx) {
+ kbase_gpu_dequeue_atom(kbdev, js);
+ kbase_jm_return_atom_to_js(kbdev, next_katom);
+ }
+ } else if (completion_code != BASE_JD_EVENT_DONE) {
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ int i;
+
+#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0
+ KBASE_TRACE_DUMP(kbdev);
+#endif
+ kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+ /*
+ * Remove all atoms on the same context from ringbuffers. This
+ * will not remove atoms that are already on the GPU, as these
+ * are guaranteed not to have fail dependencies on the failed
+ * atom.
+ */
+ for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) {
+ struct kbase_jd_atom *katom_idx0 =
+ kbase_gpu_inspect(kbdev, i, 0);
+ struct kbase_jd_atom *katom_idx1 =
+ kbase_gpu_inspect(kbdev, i, 1);
+
+ if (katom_idx0 && katom_idx0->kctx == katom->kctx &&
+ katom_idx0->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
+ /* Dequeue katom_idx0 from ringbuffer */
+ kbase_gpu_dequeue_atom(kbdev, i);
+
+ if (katom_idx1 &&
+ katom_idx1->kctx == katom->kctx &&
+ katom_idx0->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
+ /* Dequeue katom_idx1 from ringbuffer */
+ kbase_gpu_dequeue_atom(kbdev, i);
+
+ katom_idx1->event_code =
+ BASE_JD_EVENT_STOPPED;
+ kbase_jm_return_atom_to_js(kbdev,
+ katom_idx1);
+ }
+ katom_idx0->event_code = BASE_JD_EVENT_STOPPED;
+ kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+
+ } else if (katom_idx1 &&
+ katom_idx1->kctx == katom->kctx &&
+ katom_idx1->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
+ /* Can not dequeue this atom yet - will be
+ * dequeued when atom at idx0 completes */
+ katom_idx1->event_code = BASE_JD_EVENT_STOPPED;
+ kbase_gpu_mark_atom_for_return(kbdev,
+ katom_idx1);
+ }
+ }
+ }
+
+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc,
+ js, completion_code);
+
+ if (job_tail != 0 && job_tail != katom->jc) {
+ bool was_updated = (job_tail != katom->jc);
+
+ /* Some of the job has been executed, so we update the job chain
+ * address to where we should resume from */
+ katom->jc = job_tail;
+ if (was_updated)
+ KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx,
+ katom, job_tail, js);
+ }
+
+ /* Only update the event code for jobs that weren't cancelled */
+ if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED)
+ katom->event_code = (base_jd_event_code)completion_code;
+
+ kbase_device_trace_register_access(kctx, REG_WRITE,
+ JOB_CONTROL_REG(JOB_IRQ_CLEAR),
+ 1 << js);
+
+ /* Complete the job, and start new ones
+ *
+ * Also defer remaining work onto the workqueue:
+ * - Re-queue Soft-stopped jobs
+ * - For any other jobs, queue the job back into the dependency system
+ * - Schedule out the parent context if necessary, and schedule a new
+ * one in.
+ */
+#ifdef CONFIG_GPU_TRACEPOINTS
+ {
+ /* The atom in the HEAD */
+ struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js,
+ 0);
+
+ if (next_katom && next_katom->gpu_rb_state ==
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
+ char js_string[16];
+
+ trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+ js_string),
+ ktime_to_ns(*end_timestamp),
+ (u32)next_katom->kctx, 0,
+ next_katom->work_id);
+ kbdev->hwaccess.backend.slot_rb[js].last_context =
+ next_katom->kctx;
+ } else {
+ char js_string[16];
+
+ trace_gpu_sched_switch(kbasep_make_job_slot_string(js,
+ js_string),
+ ktime_to_ns(ktime_get()), 0, 0,
+ 0);
+ kbdev->hwaccess.backend.slot_rb[js].last_context = 0;
+ }
+ }
+#endif
+
+ if (completion_code == BASE_JD_EVENT_STOPPED)
+ kbase_jm_return_atom_to_js(kbdev, katom);
+ else
+ kbase_jm_complete(kbdev, katom, end_timestamp);
+
+ /* Job completion may have unblocked other atoms. Try to update all job
+ * slots */
+ kbase_gpu_slot_update(kbdev);
+}
+
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
+{
+ int js;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ int idx;
+
+ for (idx = 0; idx < 2; idx++) {
+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+ js, 0);
+
+ if (katom) {
+ enum kbase_atom_gpu_rb_state gpu_rb_state =
+ katom->gpu_rb_state;
+
+ kbase_gpu_release_atom(kbdev, katom);
+ kbase_gpu_dequeue_atom(kbdev, js);
+
+ if (gpu_rb_state ==
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
+ katom->event_code =
+ BASE_JD_EVENT_JOB_CANCELLED;
+ kbase_jm_complete(kbdev, katom,
+ end_timestamp);
+ } else {
+ katom->event_code =
+ BASE_JD_EVENT_STOPPED;
+ kbase_jm_return_atom_to_js(kbdev,
+ katom);
+ }
+ }
+ }
+ }
+}
+
+static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev,
+ int js,
+ struct kbase_jd_atom *katom,
+ u32 action)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ u32 hw_action = action & JS_COMMAND_MASK;
+
+ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom);
+ kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action,
+ katom->core_req, katom);
+ kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+}
+
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom,
+ u32 action,
+ bool disjoint)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+ kbase_gpu_mark_atom_for_return(kbdev, katom);
+ kbasep_js_clear_submit_allowed(js_devdata, katom->kctx);
+
+ if (disjoint)
+ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+ katom);
+}
+
+static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
+{
+ if (katom->x_post_dep) {
+ struct kbase_jd_atom *dep_atom = katom->x_post_dep;
+
+ if (dep_atom->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB &&
+ dep_atom->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_RETURN_TO_JS)
+ return dep_atom->slot_nr;
+ }
+ return -1;
+}
+
+bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ int js,
+ struct kbase_jd_atom *katom,
+ u32 action)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+ struct kbase_jd_atom *katom_idx0;
+ struct kbase_jd_atom *katom_idx1;
+
+ bool katom_idx0_valid, katom_idx1_valid;
+
+ bool ret = false;
+
+ int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ katom_idx0 = kbase_gpu_inspect(kbdev, js, 0);
+ katom_idx1 = kbase_gpu_inspect(kbdev, js, 1);
+
+ if (katom) {
+ katom_idx0_valid = (katom_idx0 == katom);
+ /* If idx0 is to be removed and idx1 is on the same context,
+ * then idx1 must also be removed otherwise the atoms might be
+ * returned out of order */
+ if (katom_idx1)
+ katom_idx1_valid = (katom_idx1 == katom) ||
+ (katom_idx0_valid &&
+ (katom_idx0->kctx ==
+ katom_idx1->kctx));
+ else
+ katom_idx1_valid = false;
+ } else {
+ katom_idx0_valid = (katom_idx0 &&
+ (!kctx || katom_idx0->kctx == kctx));
+ katom_idx1_valid = (katom_idx1 &&
+ (!kctx || katom_idx1->kctx == kctx));
+ }
+
+ if (katom_idx0_valid)
+ stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0);
+ if (katom_idx1_valid)
+ stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1);
+
+ if (katom_idx0_valid) {
+ if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+ /* Simple case - just dequeue and return */
+ kbase_gpu_dequeue_atom(kbdev, js);
+ if (katom_idx1_valid) {
+ kbase_gpu_dequeue_atom(kbdev, js);
+ katom_idx1->event_code =
+ BASE_JD_EVENT_REMOVED_FROM_NEXT;
+ kbase_jm_return_atom_to_js(kbdev, katom_idx1);
+ kbasep_js_clear_submit_allowed(js_devdata,
+ katom_idx1->kctx);
+ }
+
+ katom_idx0->event_code =
+ BASE_JD_EVENT_REMOVED_FROM_NEXT;
+ kbase_jm_return_atom_to_js(kbdev, katom_idx0);
+ kbasep_js_clear_submit_allowed(js_devdata,
+ katom_idx0->kctx);
+ } else {
+ /* katom_idx0 is on GPU */
+ if (katom_idx1 && katom_idx1->gpu_rb_state ==
+ KBASE_ATOM_GPU_RB_SUBMITTED) {
+ /* katom_idx0 and katom_idx1 are on GPU */
+
+ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+ JS_COMMAND_NEXT), NULL) == 0) {
+ /* idx0 has already completed - stop
+ * idx1 if needed*/
+ if (katom_idx1_valid) {
+ kbase_gpu_stop_atom(kbdev, js,
+ katom_idx1,
+ action);
+ ret = true;
+ }
+ } else {
+ /* idx1 is in NEXT registers - attempt
+ * to remove */
+ kbase_reg_write(kbdev,
+ JOB_SLOT_REG(js,
+ JS_COMMAND_NEXT),
+ JS_COMMAND_NOP, NULL);
+
+ if (kbase_reg_read(kbdev,
+ JOB_SLOT_REG(js,
+ JS_HEAD_NEXT_LO), NULL)
+ != 0 ||
+ kbase_reg_read(kbdev,
+ JOB_SLOT_REG(js,
+ JS_HEAD_NEXT_HI), NULL)
+ != 0) {
+ /* idx1 removed successfully,
+ * will be handled in IRQ */
+ kbase_gpu_remove_atom(kbdev,
+ katom_idx1,
+ action, true);
+ stop_x_dep_idx1 =
+ should_stop_x_dep_slot(katom_idx1);
+
+ /* stop idx0 if still on GPU */
+ kbase_gpu_stop_atom(kbdev, js,
+ katom_idx0,
+ action);
+ ret = true;
+ } else if (katom_idx1_valid) {
+ /* idx0 has already completed,
+ * stop idx1 if needed */
+ kbase_gpu_stop_atom(kbdev, js,
+ katom_idx1,
+ action);
+ ret = true;
+ }
+ }
+ } else if (katom_idx1_valid) {
+ /* idx1 not on GPU but must be dequeued*/
+
+ /* idx1 will be handled in IRQ */
+ kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+ false);
+ /* stop idx0 */
+ /* This will be repeated for anything removed
+ * from the next registers, since their normal
+ * flow was also interrupted, and this function
+ * might not enter disjoint state e.g. if we
+ * don't actually do a hard stop on the head
+ * atom */
+ kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+ action);
+ ret = true;
+ } else {
+ /* no atom in idx1 */
+ /* just stop idx0 */
+ kbase_gpu_stop_atom(kbdev, js, katom_idx0,
+ action);
+ ret = true;
+ }
+ }
+ } else if (katom_idx1_valid) {
+ if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) {
+ /* Mark for return */
+ /* idx1 will be returned once idx0 completes */
+ kbase_gpu_remove_atom(kbdev, katom_idx1, action,
+ false);
+ } else {
+ /* idx1 is on GPU */
+ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+ JS_COMMAND_NEXT), NULL) == 0) {
+ /* idx0 has already completed - stop idx1 */
+ kbase_gpu_stop_atom(kbdev, js, katom_idx1,
+ action);
+ ret = true;
+ } else {
+ /* idx1 is in NEXT registers - attempt to
+ * remove */
+ kbase_reg_write(kbdev, JOB_SLOT_REG(js,
+ JS_COMMAND_NEXT),
+ JS_COMMAND_NOP, NULL);
+
+ if (kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+ JS_HEAD_NEXT_LO), NULL) != 0 ||
+ kbase_reg_read(kbdev, JOB_SLOT_REG(js,
+ JS_HEAD_NEXT_HI), NULL) != 0) {
+ /* idx1 removed successfully, will be
+ * handled in IRQ once idx0 completes */
+ kbase_gpu_remove_atom(kbdev, katom_idx1,
+ action,
+ false);
+ } else {
+ /* idx0 has already completed - stop
+ * idx1 */
+ kbase_gpu_stop_atom(kbdev, js,
+ katom_idx1,
+ action);
+ ret = true;
+ }
+ }
+ }
+ }
+
+
+ if (stop_x_dep_idx0 != -1)
+ kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0,
+ NULL, action);
+
+ if (stop_x_dep_idx1 != -1)
+ kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1,
+ NULL, action);
+
+ return ret;
+}
+
+static void kbasep_gpu_cacheclean(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ /* Limit the number of loops to avoid a hang if the interrupt is missed
+ */
+ u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS;
+
+ mutex_lock(&kbdev->cacheclean_lock);
+
+ /* use GPU_COMMAND completion solution */
+ /* clean & invalidate the caches */
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_CLEAN_INV_CACHES, NULL);
+
+ /* wait for cache flush to complete before continuing */
+ while (--max_loops &&
+ (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+ CLEAN_CACHES_COMPLETED) == 0)
+ ;
+
+ /* clear the CLEAN_CACHES_COMPLETED irq */
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u,
+ CLEAN_CACHES_COMPLETED);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
+ CLEAN_CACHES_COMPLETED, NULL);
+ KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state !=
+ KBASE_INSTR_STATE_CLEANING,
+ "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang.");
+
+ mutex_unlock(&kbdev->cacheclean_lock);
+
+ kbase_pm_unrequest_cores(kbdev, false,
+ katom->need_cache_flush_cores_retained);
+}
+
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ /*
+ * If cache flush required due to HW workaround then perform the flush
+ * now
+ */
+ if (katom->need_cache_flush_cores_retained) {
+ kbasep_gpu_cacheclean(kbdev, katom);
+ katom->need_cache_flush_cores_retained = 0;
+ }
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) &&
+ (katom->core_req & BASE_JD_REQ_FS) &&
+ katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT &&
+ (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) &&
+ !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) {
+ dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n");
+ if (kbasep_10969_workaround_clamp_coordinates(katom)) {
+ /* The job had a TILE_RANGE_FAULT after was soft-stopped
+ * Due to an HW issue we try to execute the job again.
+ */
+ dev_dbg(kbdev->dev,
+ "Clamping has been executed, try to rerun the job\n"
+ );
+ katom->event_code = BASE_JD_EVENT_STOPPED;
+ katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN;
+ }
+ }
+}
+
+void kbase_gpu_dump_slots(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_devdata;
+ unsigned long flags;
+ int js;
+
+ js_devdata = &kbdev->js_data;
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ int idx;
+
+ for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
+ struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev,
+ js,
+ idx);
+
+ if (katom)
+ dev_info(kbdev->dev,
+ " js%d idx%d : katom=%p gpu_rb_state=%d\n",
+ js, idx, katom, katom->gpu_rb_state);
+ else
+ dev_info(kbdev->dev, " js%d idx%d : empty\n",
+ js, idx);
+ }
+ }
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
+
+
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
new file mode 100644
index 00000000000..13d600c47b1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu.h
+ * Register-based HW access backend specific APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPU_H_
+#define _KBASE_HWACCESS_GPU_H_
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/**
+ * Evict the atom in the NEXT slot for the specified job slot. This function is
+ * called from the job complete IRQ handler when the previous job has failed.
+ *
+ * @param[in] kbdev Device pointer
+ * @param[in] js Job slot to evict from
+ * @return true if job evicted from NEXT registers
+ * false otherwise
+ */
+bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js);
+
+/**
+ * Complete an atom on job slot js
+ *
+ * @param[in] kbdev Device pointer
+ * @param[in] js Job slot that has completed
+ * @param[in] event_code Event code from job that has completed
+ * @param[in] end_timestamp Time of completion
+ */
+void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js,
+ u32 completion_code,
+ u64 job_tail,
+ ktime_t *end_timestamp);
+
+/**
+ * Inspect the contents of the HW access ringbuffer
+ *
+ * @param[in] kbdev Device pointer
+ * @param[in] js Job slot to inspect
+ * @param[in] idx Index into ringbuffer. 0 is the job currently running on
+ * the slot, 1 is the job waiting, all other values are
+ * invalid.
+ * @return The atom at that position in the ringbuffer
+ * NULL if no atom present
+ */
+struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js,
+ int idx);
+
+/**
+ * Inspect the jobs in the slot ringbuffers and update state.
+ *
+ * This will cause jobs to be submitted to hardware if they are unblocked
+ *
+ * @param[in] kbdev Device pointer
+ */
+void kbase_gpu_slot_update(struct kbase_device *kbdev);
+
+/**
+ * Print the contents of the slot ringbuffers
+ *
+ * @param[in] kbdev Device pointer
+ */
+void kbase_gpu_dump_slots(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_GPU_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
new file mode 100644
index 00000000000..1e29d5fb223
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_affinity.c
+ * Base kernel affinity manager APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_js_affinity.h"
+
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+ int js)
+{
+ /*
+ * Here are the reasons for using job slot 2:
+ * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
+ * - In absence of the above, then:
+ * - Atoms with BASE_JD_REQ_COHERENT_GROUP
+ * - But, only when there aren't contexts with
+ * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
+ * all cores on slot 1 could be blocked by those using a coherent group
+ * on slot 2
+ * - And, only when you actually have 2 or more coregroups - if you
+ * only have 1 coregroup, then having jobs for slot 2 implies they'd
+ * also be for slot 1, meaning you'll get interference from them. Jobs
+ * able to run on slot 2 could also block jobs that can only run on
+ * slot 1 (tiler jobs)
+ */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+ return true;
+
+ if (js != 2)
+ return true;
+
+ /* Only deal with js==2 now: */
+ if (kbdev->gpu_props.num_core_groups > 1) {
+ /* Only use slot 2 in the 2+ coregroup case */
+ if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
+ KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
+ false) {
+ /* ...But only when we *don't* have atoms that run on
+ * all cores */
+
+ /* No specific check for BASE_JD_REQ_COHERENT_GROUP
+ * atoms - the policy will sort that out */
+ return true;
+ }
+ }
+
+ /* Above checks failed mean we shouldn't use slot 2 */
+ return false;
+}
+
+/*
+ * As long as it has been decided to have a deeper modification of
+ * what job scheduler, power manager and affinity manager will
+ * implement, this function is just an intermediate step that
+ * assumes:
+ * - all working cores will be powered on when this is called.
+ * - largest current configuration is 2 core groups.
+ * - It has been decided not to have hardcoded values so the low
+ * and high cores in a core split will be evently distributed.
+ * - Odd combinations of core requirements have been filtered out
+ * and do not get to this function (e.g. CS+T+NSS is not
+ * supported here).
+ * - This function is frequently called and can be optimized,
+ * (see notes in loops), but as the functionallity will likely
+ * be modified, optimization has not been addressed.
+*/
+bool kbase_js_choose_affinity(u64 * const affinity,
+ struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom, int js)
+{
+ base_jd_core_req core_req = katom->core_req;
+ unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+ u64 core_availability_mask;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
+
+ /*
+ * If no cores are currently available (core availability policy is
+ * transitioning) then fail.
+ */
+ if (0 == core_availability_mask) {
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+ *affinity = 0;
+ return false;
+ }
+
+ KBASE_DEBUG_ASSERT(js >= 0);
+
+ if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
+ BASE_JD_REQ_T) {
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+ /* Tiler only job, bit 0 needed to enable tiler but no shader
+ * cores required */
+ *affinity = 1;
+ return true;
+ }
+
+ if (1 == kbdev->gpu_props.num_cores) {
+ /* trivial case only one core, nothing to do */
+ *affinity = core_availability_mask;
+ } else {
+ if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
+ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
+ if (js == 0 || num_core_groups == 1) {
+ /* js[0] and single-core-group systems just get
+ * the first core group */
+ *affinity =
+ kbdev->gpu_props.props.coherency_info.group[0].core_mask
+ & core_availability_mask;
+ } else {
+ /* js[1], js[2] use core groups 0, 1 for
+ * dual-core-group systems */
+ u32 core_group_idx = ((u32) js) - 1;
+
+ KBASE_DEBUG_ASSERT(core_group_idx <
+ num_core_groups);
+ *affinity =
+ kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
+ & core_availability_mask;
+
+ /* If the job is specifically targeting core
+ * group 1 and the core availability policy is
+ * keeping that core group off, then fail */
+ if (*affinity == 0 && core_group_idx == 1 &&
+ kbdev->pm.backend.cg1_disabled
+ == true)
+ katom->event_code =
+ BASE_JD_EVENT_PM_EVENT;
+ }
+ } else {
+ /* All cores are available when no core split is
+ * required */
+ *affinity = core_availability_mask;
+ }
+ }
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ /*
+ * If no cores are currently available in the desired core group(s)
+ * (core availability policy is transitioning) then fail.
+ */
+ if (*affinity == 0)
+ return false;
+
+ /* Enable core 0 if tiler required */
+ if (core_req & BASE_JD_REQ_T)
+ *affinity = *affinity | 1;
+
+ return true;
+}
+
+static inline bool kbase_js_affinity_is_violating(
+ struct kbase_device *kbdev,
+ u64 *affinities)
+{
+ /* This implementation checks whether the two slots involved in Generic
+ * thread creation have intersecting affinity. This is due to micro-
+ * architectural issues where a job in slot A targetting cores used by
+ * slot B could prevent the job in slot B from making progress until the
+ * job in slot A has completed.
+ */
+ u64 affinity_set_left;
+ u64 affinity_set_right;
+ u64 intersection;
+
+ KBASE_DEBUG_ASSERT(affinities != NULL);
+
+ affinity_set_left = affinities[1];
+
+ affinity_set_right = affinities[2];
+
+ /* A violation occurs when any bit in the left_set is also in the
+ * right_set */
+ intersection = affinity_set_left & affinity_set_right;
+
+ return (bool) (intersection != (u64) 0u);
+}
+
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+ u64 affinity)
+{
+ struct kbasep_js_device_data *js_devdata;
+ u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+ js_devdata = &kbdev->js_data;
+
+ memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
+ sizeof(js_devdata->runpool_irq.slot_affinities));
+
+ new_affinities[js] |= affinity;
+
+ return kbase_js_affinity_is_violating(kbdev, new_affinities);
+}
+
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+ u64 affinity)
+{
+ struct kbasep_js_device_data *js_devdata;
+ u64 cores;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+ js_devdata = &kbdev->js_data;
+
+ KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
+ == false);
+
+ cores = affinity;
+ while (cores) {
+ int bitnum = fls64(cores) - 1;
+ u64 bit = 1ULL << bitnum;
+ s8 cnt;
+
+ cnt =
+ ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+ if (cnt == 1)
+ js_devdata->runpool_irq.slot_affinities[js] |= bit;
+
+ cores &= ~bit;
+ }
+}
+
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+ u64 affinity)
+{
+ struct kbasep_js_device_data *js_devdata;
+ u64 cores;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
+ js_devdata = &kbdev->js_data;
+
+ cores = affinity;
+ while (cores) {
+ int bitnum = fls64(cores) - 1;
+ u64 bit = 1ULL << bitnum;
+ s8 cnt;
+
+ KBASE_DEBUG_ASSERT(
+ js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
+
+ cnt =
+ --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
+
+ if (0 == cnt)
+ js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
+
+ cores &= ~bit;
+ }
+}
+
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_devdata;
+ int slot_nr;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ js_devdata = &kbdev->js_data;
+
+ for (slot_nr = 0; slot_nr < 3; ++slot_nr)
+ KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
+ NULL, 0u, slot_nr,
+ (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
+}
+#endif /* KBASE_TRACE_ENABLE */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
new file mode 100644
index 00000000000..541e554983e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h
@@ -0,0 +1,140 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_affinity.h
+ * Affinity Manager internal APIs.
+ */
+
+#ifndef _KBASE_JS_AFFINITY_H_
+#define _KBASE_JS_AFFINITY_H_
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+/* Import the external affinity mask variables */
+extern u64 mali_js0_affinity_mask;
+extern u64 mali_js1_affinity_mask;
+extern u64 mali_js2_affinity_mask;
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js_affinity Affinity Manager internal APIs.
+ * @{
+ *
+ */
+
+/**
+ * @brief Decide whether it is possible to submit a job to a particular job slot
+ * in the current status
+ *
+ * Will check if submitting to the given job slot is allowed in the current
+ * status. For example using job slot 2 while in soft-stoppable state and only
+ * having 1 coregroup is not allowed by the policy. This function should be
+ * called prior to submitting a job to a slot to make sure policy rules are not
+ * violated.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * @param kbdev The kbase device structure of the device
+ * @param js Job slot number to check for allowance
+ */
+bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
+ int js);
+
+/**
+ * @brief Compute affinity for a given job.
+ *
+ * Currently assumes an all-on/all-off power management policy.
+ * Also assumes there is at least one core with tiler available.
+ *
+ * Returns true if a valid affinity was chosen, false if
+ * no cores were available.
+ *
+ * @param[out] affinity Affinity bitmap computed
+ * @param kbdev The kbase device structure of the device
+ * @param katom Job chain of which affinity is going to be found
+ * @param js Slot the job chain is being submitted
+ */
+bool kbase_js_choose_affinity(u64 * const affinity,
+ struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom,
+ int js);
+
+/**
+ * @brief Determine whether a proposed \a affinity on job slot \a js would
+ * cause a violation of affinity restrictions.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
+ u64 affinity);
+
+/**
+ * @brief Affinity tracking: retain cores used by a slot
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
+ u64 affinity);
+
+/**
+ * @brief Affinity tracking: release cores used by a slot
+ *
+ * Cores \b must be released as soon as a job is dequeued from a slot's 'submit
+ * slots', and before another job is submitted to those slots. Otherwise, the
+ * refcount could exceed the maximum number submittable to a slot,
+ * BASE_JM_SUBMIT_SLOTS.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
+ u64 affinity);
+
+/**
+ * @brief Output to the Trace log the current tracked affinities on all slots
+ */
+#if KBASE_TRACE_ENABLE
+void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev);
+#else /* KBASE_TRACE_ENABLE */
+static inline void
+kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
+{
+}
+#endif /* KBASE_TRACE_ENABLE */
+
+ /** @} *//* end group kbase_js_affinity */
+ /** @} *//* end group base_kbase_api */
+ /** @} *//* end group base_api */
+
+
+#endif /* _KBASE_JS_AFFINITY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
new file mode 100644
index 00000000000..04bfa519039
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c
@@ -0,0 +1,316 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_jm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
+
+/*
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is
+ * enabled for a particular level is down to the integrator. However this is
+ * being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/*
+ * Hold the runpool_mutex for this
+ */
+static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+{
+ s8 nr_running_ctxs;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_mutex);
+
+ /* nr_contexts_pullable is updated with the runpool_mutex. However, the
+ * locking in the caller gives us a barrier that ensures
+ * nr_contexts_pullable is up-to-date for reading */
+ nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
+
+#ifdef CONFIG_MALI_DEBUG
+ if (kbdev->js_data.softstop_always) {
+ /* Debug support for allowing soft-stop on a single context */
+ return true;
+ }
+#endif /* CONFIG_MALI_DEBUG */
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) {
+ /* Timeouts would have to be 4x longer (due to micro-
+ * architectural design) to support OpenCL conformance tests, so
+ * only run the timer when there's:
+ * - 2 or more CL contexts
+ * - 1 or more GLES contexts
+ *
+ * NOTE: We will treat a context that has both Compute and Non-
+ * Compute jobs will be treated as an OpenCL context (hence, we
+ * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
+ */
+ {
+ s8 nr_compute_ctxs =
+ kbasep_js_ctx_attr_count_on_runpool(kbdev,
+ KBASEP_JS_CTX_ATTR_COMPUTE);
+ s8 nr_noncompute_ctxs = nr_running_ctxs -
+ nr_compute_ctxs;
+
+ return (bool) (nr_compute_ctxs >= 2 ||
+ nr_noncompute_ctxs > 0);
+ }
+ } else {
+ /* Run the timer callback whenever you have at least 1 context
+ */
+ return (bool) (nr_running_ctxs > 0);
+ }
+}
+
+static enum hrtimer_restart timer_callback(struct hrtimer *timer)
+{
+ unsigned long flags;
+ struct kbase_device *kbdev;
+ struct kbasep_js_device_data *js_devdata;
+ struct kbase_backend_data *backend;
+ int s;
+ bool reset_needed = false;
+
+ KBASE_DEBUG_ASSERT(timer != NULL);
+
+ backend = container_of(timer, struct kbase_backend_data,
+ scheduling_timer);
+ kbdev = container_of(backend, struct kbase_device, hwaccess.backend);
+ js_devdata = &kbdev->js_data;
+
+ /* Loop through the slots */
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) {
+ struct kbase_jd_atom *atom = NULL;
+
+ if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) {
+ atom = kbase_gpu_inspect(kbdev, s, 0);
+ KBASE_DEBUG_ASSERT(atom != NULL);
+ }
+
+ if (atom != NULL) {
+ /* The current version of the model doesn't support
+ * Soft-Stop */
+ if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) {
+ u32 ticks = atom->sched_info.cfs.ticks++;
+
+#if !CINSTR_DUMPING_ENABLED
+ u32 soft_stop_ticks, hard_stop_ticks,
+ gpu_reset_ticks;
+ if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+ soft_stop_ticks =
+ js_devdata->soft_stop_ticks_cl;
+ hard_stop_ticks =
+ js_devdata->hard_stop_ticks_cl;
+ gpu_reset_ticks =
+ js_devdata->gpu_reset_ticks_cl;
+ } else {
+ soft_stop_ticks =
+ js_devdata->soft_stop_ticks;
+ hard_stop_ticks =
+ js_devdata->hard_stop_ticks_ss;
+ gpu_reset_ticks =
+ js_devdata->gpu_reset_ticks_ss;
+ }
+
+ /* Job is Soft-Stoppable */
+ if (ticks == soft_stop_ticks) {
+ int disjoint_threshold =
+ KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD;
+ u32 softstop_flags = 0u;
+ /* Job has been scheduled for at least
+ * js_devdata->soft_stop_ticks ticks.
+ * Soft stop the slot so we can run
+ * other jobs.
+ */
+ dev_dbg(kbdev->dev, "Soft-stop");
+#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+ /* nr_user_contexts_running is updated
+ * with the runpool_mutex, but we can't
+ * take that here.
+ *
+ * However, if it's about to be
+ * increased then the new context can't
+ * run any jobs until they take the
+ * runpool_irq lock, so it's OK to
+ * observe the older value.
+ *
+ * Similarly, if it's about to be
+ * decreased, the last job from another
+ * context has already finished, so it's
+ * not too bad that we observe the older
+ * value and register a disjoint event
+ * when we try soft-stopping */
+ if (js_devdata->nr_user_contexts_running
+ >= disjoint_threshold)
+ softstop_flags |=
+ JS_COMMAND_SW_CAUSES_DISJOINT;
+
+ kbase_job_slot_softstop_swflags(kbdev,
+ s, atom, softstop_flags);
+#endif
+ } else if (ticks == hard_stop_ticks) {
+ /* Job has been scheduled for at least
+ * js_devdata->hard_stop_ticks_ss ticks.
+ * It should have been soft-stopped by
+ * now. Hard stop the slot.
+ */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+ int ms =
+ js_devdata->scheduling_period_ns
+ / 1000000u;
+ dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+ (unsigned long)ticks,
+ (unsigned long)ms);
+ kbase_job_slot_hardstop(atom->kctx, s,
+ atom);
+#endif
+ } else if (ticks == gpu_reset_ticks) {
+ /* Job has been scheduled for at least
+ * js_devdata->gpu_reset_ticks_ss ticks.
+ * It should have left the GPU by now.
+ * Signal that the GPU needs to be
+ * reset.
+ */
+ reset_needed = true;
+ }
+#else /* !CINSTR_DUMPING_ENABLED */
+ /* NOTE: During CINSTR_DUMPING_ENABLED, we use
+ * the alternate timeouts, which makes the hard-
+ * stop and GPU reset timeout much longer. We
+ * also ensure that we don't soft-stop at all.
+ */
+ if (ticks == js_devdata->soft_stop_ticks) {
+ /* Job has been scheduled for at least
+ * js_devdata->soft_stop_ticks. We do
+ * not soft-stop during
+ * CINSTR_DUMPING_ENABLED, however.
+ */
+ dev_dbg(kbdev->dev, "Soft-stop");
+ } else if (ticks ==
+ js_devdata->hard_stop_ticks_dumping) {
+ /* Job has been scheduled for at least
+ * js_devdata->hard_stop_ticks_dumping
+ * ticks. Hard stop the slot.
+ */
+#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS
+ int ms =
+ js_devdata->scheduling_period_ns
+ / 1000000u;
+ dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)",
+ (unsigned long)ticks,
+ (unsigned long)ms);
+ kbase_job_slot_hardstop(atom->kctx, s,
+ atom);
+#endif
+ } else if (ticks ==
+ js_devdata->gpu_reset_ticks_dumping) {
+ /* Job has been scheduled for at least
+ * js_devdata->gpu_reset_ticks_dumping
+ * ticks. It should have left the GPU by
+ * now. Signal that the GPU needs to be
+ * reset.
+ */
+ reset_needed = true;
+ }
+#endif /* !CINSTR_DUMPING_ENABLED */
+ }
+ }
+ }
+#if KBASE_GPU_RESET_EN
+ if (reset_needed) {
+ dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve.");
+
+ if (kbase_prepare_to_reset_gpu_locked(kbdev))
+ kbase_reset_gpu_locked(kbdev);
+ }
+#endif /* KBASE_GPU_RESET_EN */
+ /* the timer is re-issued if there is contexts in the run-pool */
+
+ if (backend->timer_running)
+ hrtimer_start(&backend->scheduling_timer,
+ HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+ HRTIMER_MODE_REL);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return HRTIMER_NORESTART;
+}
+
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+ unsigned long flags;
+
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+
+ if (!timer_callback_should_run(kbdev)) {
+ /* Take spinlock to force synchronisation with timer */
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ backend->timer_running = false;
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ /* From now on, return value of timer_callback_should_run() will
+ * also cause the timer to not requeue itself. Its return value
+ * cannot change, because it depends on variables updated with
+ * the runpool_mutex held, which the caller of this must also
+ * hold */
+ hrtimer_cancel(&backend->scheduling_timer);
+ }
+
+ if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+ /* Take spinlock to force synchronisation with timer */
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ backend->timer_running = true;
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ hrtimer_start(&backend->scheduling_timer,
+ HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns),
+ HRTIMER_MODE_REL);
+
+ KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u,
+ 0u);
+ }
+}
+
+int kbase_backend_timer_init(struct kbase_device *kbdev)
+{
+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+ hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ backend->scheduling_timer.function = timer_callback;
+
+ backend->timer_running = false;
+
+ return 0;
+}
+
+void kbase_backend_timer_term(struct kbase_device *kbdev)
+{
+ struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
+
+ hrtimer_cancel(&backend->scheduling_timer);
+}
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
new file mode 100644
index 00000000000..3c101e4320d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Register-based HW access backend specific job scheduler APIs
+ */
+
+#ifndef _KBASE_JS_BACKEND_H_
+#define _KBASE_JS_BACKEND_H_
+
+/**
+ * kbase_backend_timer_init() - Initialise the JS scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called at driver initialisation
+ *
+ * Return: 0 on success
+ */
+int kbase_backend_timer_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_timer_term() - Terminate the JS scheduling timer
+ * @kbdev: Device pointer
+ *
+ * This function should be called at driver termination
+ */
+void kbase_backend_timer_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_JS_BACKEND_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
new file mode 100644
index 00000000000..e06dc58621f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c
@@ -0,0 +1,295 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/bitops.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mmu_hw.h>
+#include <backend/gpu/mali_kbase_mmu_hw_direct.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+
+static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn,
+ u32 num_pages)
+{
+ u64 region;
+
+ /* can't lock a zero sized range */
+ KBASE_DEBUG_ASSERT(num_pages);
+
+ region = pfn << PAGE_SHIFT;
+ /*
+ * fls returns (given the ASSERT above):
+ * 1 .. 32
+ *
+ * 10 + fls(num_pages)
+ * results in the range (11 .. 42)
+ */
+
+ /* gracefully handle num_pages being zero */
+ if (0 == num_pages) {
+ region |= 11;
+ } else {
+ u8 region_width;
+
+ region_width = 10 + fls(num_pages);
+ if (num_pages != (1ul << (region_width - 11))) {
+ /* not pow2, so must go up to the next pow2 */
+ region_width += 1;
+ }
+ KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE);
+ KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE);
+ region |= region_width;
+ }
+
+ return region;
+}
+
+static int wait_ready(struct kbase_device *kbdev,
+ unsigned int as_nr, struct kbase_context *kctx)
+{
+ unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+
+ /* Wait for the MMU status to indicate there is no active command. */
+ while (--max_loops && kbase_reg_read(kbdev,
+ MMU_AS_REG(as_nr, AS_STATUS),
+ kctx) & AS_STATUS_AS_ACTIVE) {
+ ;
+ }
+
+ if (max_loops == 0) {
+ dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd,
+ struct kbase_context *kctx)
+{
+ int status;
+
+ /* write AS_COMMAND when MMU is ready to accept another command */
+ status = wait_ready(kbdev, as_nr, kctx);
+ if (status == 0)
+ kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd,
+ kctx);
+
+ return status;
+}
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
+{
+ const int num_as = 16;
+ const int busfault_shift = MMU_PAGE_FAULT_FLAGS;
+ const int pf_shift = 0;
+ const unsigned long as_bit_mask = (1UL << num_as) - 1;
+ unsigned long flags;
+ u32 new_mask;
+ u32 tmp;
+
+ /* bus faults */
+ u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask;
+ /* page faults (note: Ignore ASes with both pf and bf) */
+ u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+ /* remember current mask */
+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+ new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+ /* mask interrupts for now */
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+
+ while (bf_bits | pf_bits) {
+ struct kbase_as *as;
+ int as_no;
+ struct kbase_context *kctx;
+
+ /*
+ * the while logic ensures we have a bit set, no need to check
+ * for not-found here
+ */
+ as_no = ffs(bf_bits | pf_bits) - 1;
+ as = &kbdev->as[as_no];
+
+ /*
+ * Refcount the kctx ASAP - it shouldn't disappear anyway, since
+ * Bus/Page faults _should_ only occur whilst jobs are running,
+ * and a job causing the Bus/Page fault shouldn't complete until
+ * the MMU is updated
+ */
+ kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no);
+
+ /* find faulting address */
+ as->fault_addr = kbase_reg_read(kbdev,
+ MMU_AS_REG(as_no,
+ AS_FAULTADDRESS_HI),
+ kctx);
+ as->fault_addr <<= 32;
+ as->fault_addr |= kbase_reg_read(kbdev,
+ MMU_AS_REG(as_no,
+ AS_FAULTADDRESS_LO),
+ kctx);
+
+ /* record the fault status */
+ as->fault_status = kbase_reg_read(kbdev,
+ MMU_AS_REG(as_no,
+ AS_FAULTSTATUS),
+ kctx);
+
+ /* find the fault type */
+ as->fault_type = (bf_bits & (1 << as_no)) ?
+ KBASE_MMU_FAULT_TYPE_BUS :
+ KBASE_MMU_FAULT_TYPE_PAGE;
+
+
+ if (kbase_as_has_bus_fault(as)) {
+ /* Mark bus fault as handled.
+ * Note that a bus fault is processed first in case
+ * where both a bus fault and page fault occur.
+ */
+ bf_bits &= ~(1UL << as_no);
+
+ /* remove the queued BF (and PF) from the mask */
+ new_mask &= ~(MMU_BUS_ERROR(as_no) |
+ MMU_PAGE_FAULT(as_no));
+ } else {
+ /* Mark page fault as handled */
+ pf_bits &= ~(1UL << as_no);
+
+ /* remove the queued PF from the mask */
+ new_mask &= ~MMU_PAGE_FAULT(as_no);
+ }
+
+ /* Process the interrupt for this address space */
+ spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+ kbase_mmu_interrupt_process(kbdev, kctx, as);
+ spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock,
+ flags);
+ }
+
+ /* reenable interrupts */
+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+ tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL);
+ new_mask |= tmp;
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL);
+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
+
+void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as,
+ struct kbase_context *kctx)
+{
+ struct kbase_mmu_setup *current_setup = &as->current_setup;
+
+
+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+ current_setup->transtab & 0xFFFFFFFFUL, kctx);
+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+ (current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx);
+
+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+ current_setup->memattr & 0xFFFFFFFFUL, kctx);
+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+ (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx);
+
+ write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx);
+}
+
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+ struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op,
+ unsigned int handling_irq)
+{
+ int ret;
+
+ if (op == AS_COMMAND_UNLOCK) {
+ /* Unlock doesn't require a lock first */
+ ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+ } else {
+ u64 lock_addr = lock_region(kbdev, vpfn, nr);
+
+ /* Lock the region that needs to be updated */
+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO),
+ lock_addr & 0xFFFFFFFFUL, kctx);
+ kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI),
+ (lock_addr >> 32) & 0xFFFFFFFFUL, kctx);
+ write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx);
+
+ /* Run the MMU operation */
+ write_cmd(kbdev, as->number, op, kctx);
+
+ /* Wait for the flush to complete */
+ ret = wait_ready(kbdev, as->number, kctx);
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) {
+ /* Issue an UNLOCK command to ensure that valid page
+ tables are re-read by the GPU after an update.
+ Note that, the FLUSH command should perform all the
+ actions necessary, however the bus logs show that if
+ multiple page faults occur within an 8 page region
+ the MMU does not always re-read the updated page
+ table entries for later faults or is only partially
+ read, it subsequently raises the page fault IRQ for
+ the same addresses, the unlock ensures that the MMU
+ cache is flushed, so updates can be re-read. As the
+ region is now unlocked we need to issue 2 UNLOCK
+ commands in order to flush the MMU/uTLB,
+ see PRLAM-8812.
+ */
+ write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+ write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx);
+ }
+ }
+
+ return ret;
+}
+
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+ struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+ u32 pf_bf_mask;
+
+ /* Clear the page (and bus fault IRQ as well in case one occurred) */
+ pf_bf_mask = MMU_PAGE_FAULT(as->number);
+ if (type == KBASE_MMU_FAULT_TYPE_BUS)
+ pf_bf_mask |= MMU_BUS_ERROR(as->number);
+
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx);
+}
+
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+ struct kbase_context *kctx, enum kbase_mmu_fault_type type)
+{
+ unsigned long flags;
+ u32 irq_mask;
+
+ /* Enable the page fault IRQ (and bus fault IRQ as well in case one
+ * occurred) */
+ spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
+
+ irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) |
+ MMU_PAGE_FAULT(as->number);
+
+ if (type == KBASE_MMU_FAULT_TYPE_BUS)
+ irq_mask |= MMU_BUS_ERROR(as->number);
+
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx);
+
+ spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
new file mode 100644
index 00000000000..a8b3b76f47e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h
@@ -0,0 +1,59 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for the direct implementation for MMU hardware access
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_direct_page Direct MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_direct_intro_sec Introduction
+ * This module provides the interface(s) that are required by the direct
+ * register access implementation of the MMU hardware interface
+ * @ref mali_kbase_mmu_hw_page .
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_
+#define _MALI_KBASE_MMU_HW_DIRECT_H_
+
+#include <mali_kbase_defs.h>
+
+/**
+ * @addtogroup mali_kbase_mmu_hw
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw_direct Direct register access to MMU
+ * @{
+ */
+
+/** @brief Process an MMU interrupt.
+ *
+ * Process the MMU interrupt that was reported by the @ref kbase_device.
+ *
+ * @param[in] kbdev kbase context to clear the fault from.
+ * @param[in] irq_stat Value of the MMU_IRQ_STATUS register
+ */
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** @} *//* end group mali_kbase_mmu_hw_direct */
+/** @} *//* end group mali_kbase_mmu_hw */
+
+#endif /* _MALI_KBASE_MMU_HW_DIRECT_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
new file mode 100644
index 00000000000..d18ae86f8dd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 always_on_get_core_mask(struct kbase_device *kbdev)
+{
+ return kbdev->shader_present_bitmap;
+}
+
+static bool always_on_get_core_active(struct kbase_device *kbdev)
+{
+ return true;
+}
+
+static void always_on_init(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+static void always_on_term(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+/*
+ * The struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_always_on_policy_ops = {
+ "always_on", /* name */
+ always_on_init, /* init */
+ always_on_term, /* term */
+ always_on_get_core_mask, /* get_core_mask */
+ always_on_get_core_active, /* get_core_active */
+ 0u, /* flags */
+ KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
new file mode 100644
index 00000000000..f9d244b01bc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h
@@ -0,0 +1,77 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * "Always on" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_ALWAYS_ON_H
+#define MALI_KBASE_PM_ALWAYS_ON_H
+
+/**
+ * DOC:
+ * The "Always on" power management policy has the following
+ * characteristics:
+ *
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ * know which Job Chains are to be run:
+ * All Shader Cores are powered up, regardless of whether or not they will
+ * be needed later.
+ *
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ * currently queued Job Chains:
+ * All Shader Cores are kept powered, regardless of whether or not they will
+ * be needed
+ *
+ * - When KBase indicates that the GPU need not be powered:
+ * The Shader Cores are kept powered, regardless of whether or not they will
+ * be needed. The GPU itself is also kept powered, even though it is not
+ * needed.
+ *
+ * This policy is automatically overridden during system suspend: the desired
+ * core state is ignored, and the cores are forced off regardless of what the
+ * policy requests. After resuming from suspend, new changes to the desired
+ * core state made by the policy are honored.
+ *
+ * Note:
+ *
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ * has just started to submit Job Chains.
+ *
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ * User Processes have finished, and it is waiting for a User Process to
+ * submit some more Job Chains.
+ */
+
+/**
+ * struct kbasep_pm_policy_always_on - Private struct for policy instance data
+ * @dummy: unused dummy variable
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_always_on {
+ int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops;
+
+#endif /* MALI_KBASE_PM_ALWAYS_ON_H */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
new file mode 100644
index 00000000000..bbf39b0ce03
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c
@@ -0,0 +1,375 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_pm_hwaccess.c
+ * GPU backend implementation of base kernel power management APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_jm_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_pm_register_access_enable(struct kbase_device *kbdev)
+{
+ struct kbase_pm_callback_conf *callbacks;
+
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+ pm_runtime_enable(kbdev->dev);
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+ if (callbacks)
+ callbacks->power_on_callback(kbdev);
+
+ kbdev->pm.backend.gpu_powered = true;
+}
+
+void kbase_pm_register_access_disable(struct kbase_device *kbdev)
+{
+ struct kbase_pm_callback_conf *callbacks;
+
+ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+
+ if (callbacks)
+ callbacks->power_off_callback(kbdev);
+
+ kbdev->pm.backend.gpu_powered = false;
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+ pm_runtime_disable(kbdev->dev);
+#endif
+}
+
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
+{
+ int ret = 0;
+ struct kbase_pm_callback_conf *callbacks;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ mutex_init(&kbdev->pm.lock);
+
+ kbdev->pm.backend.gpu_powered = false;
+ kbdev->pm.suspending = false;
+#ifdef CONFIG_MALI_DEBUG
+ kbdev->pm.backend.driver_ready_for_irqs = false;
+#endif /* CONFIG_MALI_DEBUG */
+ kbdev->pm.backend.gpu_in_desired_state = true;
+ init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
+
+ callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS;
+ if (callbacks) {
+ kbdev->pm.backend.callback_power_on =
+ callbacks->power_on_callback;
+ kbdev->pm.backend.callback_power_off =
+ callbacks->power_off_callback;
+ kbdev->pm.backend.callback_power_suspend =
+ callbacks->power_suspend_callback;
+ kbdev->pm.backend.callback_power_resume =
+ callbacks->power_resume_callback;
+ kbdev->pm.callback_power_runtime_init =
+ callbacks->power_runtime_init_callback;
+ kbdev->pm.callback_power_runtime_term =
+ callbacks->power_runtime_term_callback;
+ kbdev->pm.backend.callback_power_runtime_on =
+ callbacks->power_runtime_on_callback;
+ kbdev->pm.backend.callback_power_runtime_off =
+ callbacks->power_runtime_off_callback;
+ } else {
+ kbdev->pm.backend.callback_power_on = NULL;
+ kbdev->pm.backend.callback_power_off = NULL;
+ kbdev->pm.backend.callback_power_suspend = NULL;
+ kbdev->pm.backend.callback_power_resume = NULL;
+ kbdev->pm.callback_power_runtime_init = NULL;
+ kbdev->pm.callback_power_runtime_term = NULL;
+ kbdev->pm.backend.callback_power_runtime_on = NULL;
+ kbdev->pm.backend.callback_power_runtime_off = NULL;
+ }
+
+ /* Initialise the metrics subsystem */
+ ret = kbasep_pm_metrics_init(kbdev);
+ if (ret)
+ return ret;
+
+ init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait);
+ kbdev->pm.backend.l2_powered = 0;
+
+ init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
+ kbdev->pm.backend.reset_done = false;
+
+ init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+ kbdev->pm.active_count = 0;
+
+ spin_lock_init(&kbdev->pm.power_change_lock);
+ spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
+ spin_lock_init(&kbdev->pm.backend.gpu_powered_lock);
+
+ if (kbase_pm_ca_init(kbdev) != 0)
+ goto workq_fail;
+
+ if (kbase_pm_policy_init(kbdev) != 0)
+ goto pm_policy_fail;
+
+ return 0;
+
+pm_policy_fail:
+ kbase_pm_ca_term(kbdev);
+workq_fail:
+ kbasep_pm_metrics_term(kbdev);
+ return -EINVAL;
+}
+
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume)
+{
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ /* Turn clocks and interrupts on - no-op if we haven't done a previous
+ * kbase_pm_clock_off() */
+ kbase_pm_clock_on(kbdev, is_resume);
+
+ /* Update core status as required by the policy */
+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+ SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START);
+ kbase_pm_update_cores_state(kbdev);
+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+ SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END);
+
+ /* NOTE: We don't wait to reach the desired state, since running atoms
+ * will wait for that state to be reached anyway */
+}
+
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend)
+{
+ unsigned long flags;
+ bool cores_are_available;
+
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ /* Force all cores off */
+ kbdev->pm.backend.desired_shader_state = 0;
+
+ /* Force all cores to be unavailable, in the situation where
+ * transitions are in progress for some cores but not others,
+ * and kbase_pm_check_transitions_nolock can not immediately
+ * power off the cores */
+ kbdev->shader_available_bitmap = 0;
+ kbdev->tiler_available_bitmap = 0;
+ kbdev->l2_available_bitmap = 0;
+
+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+ SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START);
+ cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+ SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END);
+ /* Don't need 'cores_are_available', because we don't return anything */
+ CSTD_UNUSED(cores_are_available);
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ /* NOTE: We won't wait to reach the core's desired state, even if we're
+ * powering off the GPU itself too. It's safe to cut the power whilst
+ * they're transitioning to off, because the cores should be idle and
+ * all cache flushes should already have occurred */
+
+ /* Consume any change-state events */
+ kbase_timeline_pm_check_handle_event(kbdev,
+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+ /* Disable interrupts and turn the clock off */
+ return kbase_pm_clock_off(kbdev, is_suspend);
+}
+
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+ unsigned int flags)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ unsigned long irq_flags;
+ int ret;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ mutex_lock(&js_devdata->runpool_mutex);
+ mutex_lock(&kbdev->pm.lock);
+
+ /* A suspend won't happen during startup/insmod */
+ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+ /* Power up the GPU, don't enable IRQs as we are not ready to receive
+ * them. */
+ ret = kbase_pm_init_hw(kbdev, flags);
+ if (ret) {
+ mutex_unlock(&kbdev->pm.lock);
+ mutex_unlock(&js_devdata->runpool_mutex);
+ return ret;
+ }
+
+ kbasep_pm_read_present_cores(kbdev);
+
+ kbdev->pm.debug_core_mask = kbdev->shader_present_bitmap;
+
+ /* Pretend the GPU is active to prevent a power policy turning the GPU
+ * cores off */
+ kbdev->pm.active_count = 1;
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+ irq_flags);
+ /* Ensure cycle counter is off */
+ kbdev->pm.backend.gpu_cycle_counter_requests = 0;
+ spin_unlock_irqrestore(
+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+ irq_flags);
+
+ /* We are ready to receive IRQ's now as power policy is set up, so
+ * enable them now. */
+#ifdef CONFIG_MALI_DEBUG
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+ kbdev->pm.backend.driver_ready_for_irqs = true;
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags);
+#endif
+ kbase_pm_enable_interrupts(kbdev);
+
+ /* Turn on the GPU and any cores needed by the policy */
+ kbase_pm_do_poweron(kbdev, false);
+ mutex_unlock(&kbdev->pm.lock);
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ /* Idle the GPU and/or cores, if the policy wants it to */
+ kbase_pm_context_idle(kbdev);
+
+ return 0;
+}
+
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ mutex_lock(&kbdev->pm.lock);
+ kbase_pm_cancel_deferred_poweroff(kbdev);
+ if (!kbase_pm_do_poweroff(kbdev, false)) {
+ /* Page/bus faults are pending, must drop pm.lock to process.
+ * Interrupts are disabled so no more faults should be
+ * generated at this point */
+ mutex_unlock(&kbdev->pm.lock);
+ kbase_flush_mmu_wqs(kbdev);
+ mutex_lock(&kbdev->pm.lock);
+ WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+ }
+ mutex_unlock(&kbdev->pm.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt);
+
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0);
+
+ /* Free any resources the policy allocated */
+ kbase_pm_policy_term(kbdev);
+ kbase_pm_ca_term(kbdev);
+
+ /* Shut down the metrics subsystem */
+ kbasep_pm_metrics_term(kbdev);
+}
+
+void kbase_pm_power_changed(struct kbase_device *kbdev)
+{
+ bool cores_are_available;
+ unsigned long flags;
+
+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+ SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START);
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev,
+ SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END);
+
+ if (cores_are_available) {
+ /* Log timelining information that a change in state has
+ * completed */
+ kbase_timeline_pm_handle_event(kbdev,
+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+
+ spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+ kbase_gpu_slot_update(kbdev);
+ spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+ }
+}
+
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask)
+{
+ kbdev->pm.debug_core_mask = new_core_mask;
+
+ kbase_pm_update_cores_state_nolock(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
+{
+ kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev)
+{
+ kbase_pm_update_active(kbdev);
+}
+
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+ /* Force power off the GPU and all cores (regardless of policy), only
+ * after the PM active count reaches zero (otherwise, we risk turning it
+ * off prematurely) */
+ mutex_lock(&js_devdata->runpool_mutex);
+ mutex_lock(&kbdev->pm.lock);
+ kbase_pm_cancel_deferred_poweroff(kbdev);
+ if (!kbase_pm_do_poweroff(kbdev, true)) {
+ /* Page/bus faults are pending, must drop pm.lock to process.
+ * Interrupts are disabled so no more faults should be
+ * generated at this point */
+ mutex_unlock(&kbdev->pm.lock);
+ kbase_flush_mmu_wqs(kbdev);
+ mutex_lock(&kbdev->pm.lock);
+ WARN_ON(!kbase_pm_do_poweroff(kbdev, false));
+ }
+
+ mutex_unlock(&kbdev->pm.lock);
+ mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+ mutex_lock(&js_devdata->runpool_mutex);
+ mutex_lock(&kbdev->pm.lock);
+ kbdev->pm.suspending = false;
+ kbase_pm_do_poweron(kbdev, true);
+ mutex_unlock(&kbdev->pm.lock);
+ mutex_unlock(&js_devdata->runpool_mutex);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
new file mode 100644
index 00000000000..521958aa174
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c
@@ -0,0 +1,179 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_ca.c
+ * Base kernel core availability APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_ca_policy *const policy_list[] = {
+ &kbase_pm_ca_fixed_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+ &kbase_pm_ca_random_policy_ops
+#endif
+};
+
+/** The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+int kbase_pm_ca_init(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ kbdev->pm.backend.ca_current_policy = policy_list[0];
+
+ kbdev->pm.backend.ca_current_policy->init(kbdev);
+
+ return 0;
+}
+
+void kbase_pm_ca_term(struct kbase_device *kbdev)
+{
+ kbdev->pm.backend.ca_current_policy->term(kbdev);
+}
+
+int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list)
+{
+ if (!list)
+ return POLICY_COUNT;
+
+ *list = policy_list;
+
+ return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies);
+
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ return kbdev->pm.backend.ca_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy);
+
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+ const struct kbase_pm_ca_policy *new_policy)
+{
+ const struct kbase_pm_ca_policy *old_policy;
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+ KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u,
+ new_policy->id);
+
+ /* During a policy change we pretend the GPU is active */
+ /* A suspend won't happen here, because we're in a syscall from a
+ * userspace thread */
+ kbase_pm_context_active(kbdev);
+
+ mutex_lock(&kbdev->pm.lock);
+
+ /* Remove the policy to prevent IRQ handlers from working on it */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ old_policy = kbdev->pm.backend.ca_current_policy;
+ kbdev->pm.backend.ca_current_policy = NULL;
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ if (old_policy->term)
+ old_policy->term(kbdev);
+
+ if (new_policy->init)
+ new_policy->init(kbdev);
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ kbdev->pm.backend.ca_current_policy = new_policy;
+
+ /* If any core power state changes were previously attempted, but
+ * couldn't be made because the policy was changing (current_policy was
+ * NULL), then re-try them here. */
+ kbase_pm_update_cores_state_nolock(kbdev);
+
+ kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+ kbdev->shader_ready_bitmap,
+ kbdev->shader_transitioning_bitmap);
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ mutex_unlock(&kbdev->pm.lock);
+
+ /* Now the policy change is finished, we release our fake context active
+ * reference */
+ kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy);
+
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+ /* All cores must be enabled when instrumentation is in use */
+ if (kbdev->pm.backend.instr_enabled)
+ return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask;
+
+ if (kbdev->pm.backend.ca_current_policy == NULL)
+ return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask;
+
+ return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) &
+ kbdev->pm.debug_core_mask;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask);
+
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+ u64 cores_transitioning)
+{
+ lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+ if (kbdev->pm.backend.ca_current_policy != NULL)
+ kbdev->pm.backend.ca_current_policy->update_core_status(kbdev,
+ cores_ready,
+ cores_transitioning);
+}
+
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ kbdev->pm.backend.instr_enabled = true;
+
+ kbase_pm_update_cores_state_nolock(kbdev);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ kbdev->pm.backend.instr_enabled = false;
+
+ kbase_pm_update_cores_state_nolock(kbdev);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
new file mode 100644
index 00000000000..ee9e751f2d7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel core availability APIs
+ */
+
+#ifndef _KBASE_PM_CA_H_
+#define _KBASE_PM_CA_H_
+
+/**
+ * kbase_pm_ca_init - Initialize core availability framework
+ *
+ * Must be called before calling any other core availability function
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: 0 if the core availability framework was successfully initialized,
+ * -errno otherwise
+ */
+int kbase_pm_ca_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_term - Terminate core availability framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_ca_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_get_core_mask - Get currently available shaders core mask
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Returns a mask of the currently available shader cores.
+ * Calls into the core availability policy
+ *
+ * Return: The bit mask of available cores
+ */
+u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_update_core_status - Update core status
+ *
+ * @kbdev: The kbase device structure for the device (must be
+ * a valid pointer)
+ * @cores_ready: The bit mask of cores ready for job submission
+ * @cores_transitioning: The bit mask of cores that are transitioning power
+ * state
+ *
+ * Update core availability policy with current core power status
+ *
+ * Calls into the core availability policy
+ */
+void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready,
+ u64 cores_transitioning);
+
+/**
+ * kbase_pm_ca_instr_enable - Enable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This overrides the output of the core availability policy, ensuring that all
+ * cores are available
+ */
+void kbase_pm_ca_instr_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_ca_instr_disable - Disable override for instrumentation
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This disables any previously enabled override, and resumes normal policy
+ * functionality
+ */
+void kbase_pm_ca_instr_disable(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_CA_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
new file mode 100644
index 00000000000..f890a64e8bb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_ca_fixed.c
+ * A power policy implementing fixed core availability
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static void fixed_init(struct kbase_device *kbdev)
+{
+ kbdev->pm.backend.ca_in_transition = false;
+}
+
+static void fixed_term(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+static u64 fixed_get_core_mask(struct kbase_device *kbdev)
+{
+ return kbdev->shader_present_bitmap;
+}
+
+static void fixed_update_core_status(struct kbase_device *kbdev,
+ u64 cores_ready,
+ u64 cores_transitioning)
+{
+ CSTD_UNUSED(kbdev);
+ CSTD_UNUSED(cores_ready);
+ CSTD_UNUSED(cores_transitioning);
+}
+
+/** The @ref struct kbase_pm_policy structure for the fixed power policy.
+ *
+ * This is the static structure that defines the fixed power policy's callback
+ * and name.
+ */
+const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = {
+ "fixed", /* name */
+ fixed_init, /* init */
+ fixed_term, /* term */
+ fixed_get_core_mask, /* get_core_mask */
+ fixed_update_core_status, /* update_core_status */
+ 0u, /* flags */
+ KBASE_PM_CA_POLICY_ID_FIXED, /* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
new file mode 100644
index 00000000000..a763155cb70
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * A power policy implementing fixed core availability
+ */
+
+#ifndef MALI_KBASE_PM_CA_FIXED_H
+#define MALI_KBASE_PM_CA_FIXED_H
+
+/**
+ * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data
+ *
+ * @dummy: Dummy member - no state is needed
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_ca_policy_fixed {
+ int dummy;
+};
+
+extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops;
+
+#endif /* MALI_KBASE_PM_CA_FIXED_H */
+
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
new file mode 100644
index 00000000000..3decd0312b6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_coarse_demand.c
+ * "Coarse Demand" power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev)
+{
+ if (kbdev->pm.active_count == 0)
+ return 0;
+
+ return kbdev->shader_present_bitmap;
+}
+
+static bool coarse_demand_get_core_active(struct kbase_device *kbdev)
+{
+ if (kbdev->pm.active_count == 0)
+ return false;
+
+ return true;
+}
+
+static void coarse_demand_init(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+static void coarse_demand_term(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+/** The @ref struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = {
+ "coarse_demand", /* name */
+ coarse_demand_init, /* init */
+ coarse_demand_term, /* term */
+ coarse_demand_get_core_mask, /* get_core_mask */
+ coarse_demand_get_core_active, /* get_core_active */
+ 0u, /* flags */
+ KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
new file mode 100644
index 00000000000..dd1e8f710b3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_coarse_demand.h
+ * "Coarse Demand" power management policy
+ */
+
+#ifndef MALI_KBASE_PM_COARSE_DEMAND_H
+#define MALI_KBASE_PM_COARSE_DEMAND_H
+
+/**
+ * The "Coarse" demand power management policy has the following
+ * characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ * know which Job Chains are to be run:
+ * - All Shader Cores are powered up, regardless of whether or not they will
+ * be needed later.
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ * currently queued Job Chains:
+ * - All Shader Cores are kept powered, regardless of whether or not they will
+ * be needed
+ * - When KBase indicates that the GPU need not be powered:
+ * - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ * has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ * User Processes have finished, and it is waiting for a User Process to
+ * submit some more Job Chains.
+ */
+
+/**
+ * Private structure for policy instance data.
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_coarse_demand {
+ /** No state needed - just have a dummy variable here */
+ int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
new file mode 100644
index 00000000000..aad6c49b0c3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h
@@ -0,0 +1,555 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_hwaccess_defs.h
+ * Backend-specific Power Manager definitions
+ */
+
+#ifndef _KBASE_PM_HWACCESS_DEFS_H_
+#define _KBASE_PM_HWACCESS_DEFS_H_
+
+#include "mali_kbase_pm_ca_fixed.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_ca_random.h"
+#endif
+
+#include "mali_kbase_pm_always_on.h"
+#include "mali_kbase_pm_coarse_demand.h"
+#include "mali_kbase_pm_demand.h"
+#if !MALI_CUSTOMER_RELEASE
+#include "mali_kbase_pm_demand_always_powered.h"
+#include "mali_kbase_pm_fast_start.h"
+#endif
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+struct kbase_jd_atom;
+
+/** The types of core in a GPU.
+ *
+ * These enumerated values are used in calls to:
+ * - @ref kbase_pm_get_present_cores
+ * - @ref kbase_pm_get_active_cores
+ * - @ref kbase_pm_get_trans_cores
+ * - @ref kbase_pm_get_ready_cores.
+ *
+ * They specify which type of core should be acted on. These values are set in
+ * a manner that allows @ref core_type_to_reg function to be simpler and more
+ * efficient.
+ */
+enum kbase_pm_core_type {
+ KBASE_PM_CORE_L2 = L2_PRESENT_LO, /**< The L2 cache */
+ KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, /**< Shader cores */
+ KBASE_PM_CORE_TILER = TILER_PRESENT_LO /**< Tiler cores */
+};
+
+/**
+ * struct kbasep_pm_metrics_data - Metrics data collected for use by the power management framework.
+ *
+ * @vsync_hit: indicates if a framebuffer update occured since the last vsync.
+ * A framebuffer driver is expected to provide this information by
+ * checking at each vsync if the framebuffer was updated and calling
+ * kbase_pm_vsync_callback() if there was a change of status.
+ * @utilisation: percentage indicating GPU load (0-100).
+ * The utilisation is the fraction of time the GPU was powered up
+ * and busy. This is based on the time_busy and time_idle metrics.
+ * @util_gl_share: percentage of GPU load related to OpenGL jobs (0-100).
+ * This is based on the busy_gl and time_busy metrics.
+ * @util_cl_share: percentage of GPU load related to OpenCL jobs (0-100).
+ * This is based on the busy_cl and time_busy metrics.
+ * @time_period_start: time at which busy/idle measurements started
+ * @time_busy: number of ns the GPU was busy executing jobs since the
+ * @time_period_start timestamp.
+ * @time_idle: number of ns since time_period_start the GPU was not executing
+ * jobs since the @time_period_start timestamp.
+ * @prev_busy: busy time in ns of previous time period.
+ * Updated when metrics are reset.
+ * @prev_idle: idle time in ns of previous time period
+ * Updated when metrics are reset.
+ * @gpu_active: true when the GPU is executing jobs. false when
+ * not. Updated when the job scheduler informs us a job in submitted
+ * or removed from a GPU slot.
+ * @busy_cl: number of ns the GPU was busy executing CL jobs. Note that
+ * if two CL jobs were active for 400ns, this value would be updated
+ * with 800.
+ * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that
+ * if two GL jobs were active for 400ns, this value would be updated
+ * with 800.
+ * @active_cl_ctx: number of CL jobs active on the GPU. This is a portion of
+ * the @nr_in_slots value.
+ * @active_gl_ctx: number of GL jobs active on the GPU. This is a portion of
+ * the @nr_in_slots value.
+ * @nr_in_slots: Total number of jobs currently submitted to the GPU across
+ * all job slots. Maximum value would be 2*BASE_JM_MAX_NR_SLOTS
+ * (one in flight and one in the JSn_HEAD_NEXT register for each
+ * job slot).
+ * @lock: spinlock protecting the kbasep_pm_metrics_data structure
+ * @timer: timer to regularly make DVFS decisions based on the power
+ * management metrics.
+ * @timer_active: boolean indicating @timer is running
+ * @platform_data: pointer to data controlled by platform specific code
+ * @kbdev: pointer to kbase device for which metrics are collected
+ *
+ */
+struct kbasep_pm_metrics_data {
+ int vsync_hit;
+ int utilisation;
+ int util_gl_share;
+ int util_cl_share[2]; /* 2 is a max number of core groups we can have */
+ ktime_t time_period_start;
+ u32 time_busy;
+ u32 time_idle;
+ u32 prev_busy;
+ u32 prev_idle;
+ bool gpu_active;
+ u32 busy_cl[2];
+ u32 busy_gl;
+ u32 active_cl_ctx[2];
+ u32 active_gl_ctx;
+ u8 nr_in_slots;
+ spinlock_t lock;
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+ struct hrtimer timer;
+ bool timer_active;
+#endif
+
+ void *platform_data;
+ struct kbase_device *kbdev;
+};
+
+/** Actions for DVFS.
+ *
+ * kbase_pm_get_dvfs_action will return one of these enumerated values to
+ * describe the action that the DVFS system should take.
+ */
+enum kbase_pm_dvfs_action {
+ KBASE_PM_DVFS_NOP, /* < No change in clock frequency is
+ * requested */
+ KBASE_PM_DVFS_CLOCK_UP, /* < The clock frequency should be increased
+ * if possible */
+ KBASE_PM_DVFS_CLOCK_DOWN /* < The clock frequency should be decreased
+ * if possible */
+};
+
+union kbase_pm_policy_data {
+ struct kbasep_pm_policy_always_on always_on;
+ struct kbasep_pm_policy_coarse_demand coarse_demand;
+ struct kbasep_pm_policy_demand demand;
+#if !MALI_CUSTOMER_RELEASE
+ struct kbasep_pm_policy_demand_always_powered demand_always_powered;
+ struct kbasep_pm_policy_fast_start fast_start;
+#endif
+};
+
+union kbase_pm_ca_policy_data {
+ struct kbasep_pm_ca_policy_fixed fixed;
+#if !MALI_CUSTOMER_RELEASE
+ struct kbasep_pm_ca_policy_random random;
+#endif
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_backend_data {
+ /**
+ * The policy that is currently actively controlling core availability.
+ *
+ * @note: During an IRQ, this can be NULL when the policy is being
+ * changed with kbase_pm_ca_set_policy(). The change is protected under
+ * kbase_device::pm::power_change_lock. Direct access to this from IRQ
+ * context must therefore check for NULL. If NULL, then
+ * kbase_pm_ca_set_policy() will re-issue the policy functions that
+ * would've been done under IRQ.
+ */
+ const struct kbase_pm_ca_policy *ca_current_policy;
+
+ /**
+ * The policy that is currently actively controlling the power state.
+ *
+ * @note: During an IRQ, this can be NULL when the policy is being
+ * changed with kbase_pm_set_policy(). The change is protected under
+ * kbase_device::pm::power_change_lock. Direct access to this from IRQ
+ * context must therefore check for NULL. If NULL, then
+ * kbase_pm_set_policy() will re-issue the policy functions that
+ * would've been done under IRQ.
+ */
+ const struct kbase_pm_policy *pm_current_policy;
+
+ /** Private data for current CA policy */
+ union kbase_pm_ca_policy_data ca_policy_data;
+
+ /** Private data for current PM policy */
+ union kbase_pm_policy_data pm_policy_data;
+
+ /**
+ * Flag indicating when core availability policy is transitioning cores.
+ * The core availability policy must set this when a change in core
+ * availability is occuring.
+ *
+ * power_change_lock must be held when accessing this. */
+ bool ca_in_transition;
+
+ /** Waiting for reset and a queue to wait for changes */
+ bool reset_done;
+ wait_queue_head_t reset_done_wait;
+
+ /** Wait queue for whether the l2 cache has been powered as requested */
+ wait_queue_head_t l2_powered_wait;
+ /** State indicating whether all the l2 caches are powered.
+ * Non-zero indicates they're *all* powered
+ * Zero indicates that some (or all) are not powered */
+ int l2_powered;
+
+ /** The reference count of active gpu cycle counter users */
+ int gpu_cycle_counter_requests;
+ /** Lock to protect gpu_cycle_counter_requests */
+ spinlock_t gpu_cycle_counter_requests_lock;
+
+ /**
+ * A bit mask identifying the shader cores that the power policy would
+ * like to be on. The current state of the cores may be different, but
+ * there should be transitions in progress that will eventually achieve
+ * this state (assuming that the policy doesn't change its mind in the
+ * mean time).
+ */
+ u64 desired_shader_state;
+ /**
+ * A bit mask indicating which shader cores are currently in a power-on
+ * transition
+ */
+ u64 powering_on_shader_state;
+ /**
+ * A bit mask identifying the tiler cores that the power policy would
+ * like to be on. @see kbase_pm_device_data:desired_shader_state
+ */
+ u64 desired_tiler_state;
+ /**
+ * A bit mask indicating which tiler core are currently in a power-on
+ * transition
+ */
+ u64 powering_on_tiler_state;
+
+ /**
+ * A bit mask indicating which l2-caches are currently in a power-on
+ * transition
+ */
+ u64 powering_on_l2_state;
+
+ /**
+ * This flag is set if the GPU is powered as requested by the
+ * desired_xxx_state variables
+ */
+ bool gpu_in_desired_state;
+ /* Wait queue set when gpu_in_desired_state != 0 */
+ wait_queue_head_t gpu_in_desired_state_wait;
+
+ /**
+ * Set to true when the GPU is powered and register accesses are
+ * possible, false otherwise
+ */
+ bool gpu_powered;
+
+ /** Set to true when instrumentation is enabled, false otherwise */
+ bool instr_enabled;
+
+ bool cg1_disabled;
+
+#ifdef CONFIG_MALI_DEBUG
+ /**
+ * Debug state indicating whether sufficient initialization of the
+ * driver has occurred to handle IRQs
+ */
+ bool driver_ready_for_irqs;
+#endif /* CONFIG_MALI_DEBUG */
+
+ /**
+ * Spinlock that must be held when:
+ * - writing gpu_powered
+ * - accessing driver_ready_for_irqs (in CONFIG_MALI_DEBUG builds)
+ */
+ spinlock_t gpu_powered_lock;
+
+ /** Structure to hold metrics for the GPU */
+
+ struct kbasep_pm_metrics_data metrics;
+
+ /**
+ * Set to the number of poweroff timer ticks until the GPU is powered
+ * off
+ */
+ int gpu_poweroff_pending;
+
+ /**
+ * Set to the number of poweroff timer ticks until shaders are powered
+ * off
+ */
+ int shader_poweroff_pending_time;
+
+ /** Timer for powering off GPU */
+ struct hrtimer gpu_poweroff_timer;
+
+ struct workqueue_struct *gpu_poweroff_wq;
+
+ struct work_struct gpu_poweroff_work;
+
+ /** Bit mask of shaders to be powered off on next timer callback */
+ u64 shader_poweroff_pending;
+
+ /**
+ * Set to true if the poweroff timer is currently running,
+ * false otherwise
+ */
+ bool poweroff_timer_needed;
+
+ /**
+ * Callback when the GPU needs to be turned on. See
+ * @ref kbase_pm_callback_conf
+ *
+ * @param kbdev The kbase device
+ *
+ * @return 1 if GPU state was lost, 0 otherwise
+ */
+ int (*callback_power_on)(struct kbase_device *kbdev);
+
+ /**
+ * Callback when the GPU may be turned off. See
+ * @ref kbase_pm_callback_conf
+ *
+ * @param kbdev The kbase device
+ */
+ void (*callback_power_off)(struct kbase_device *kbdev);
+
+ /**
+ * Callback when a suspend occurs and the GPU needs to be turned off.
+ * See @ref kbase_pm_callback_conf
+ *
+ * @param kbdev The kbase device
+ */
+ void (*callback_power_suspend)(struct kbase_device *kbdev);
+
+ /**
+ * Callback when a resume occurs and the GPU needs to be turned on.
+ * See @ref kbase_pm_callback_conf
+ *
+ * @param kbdev The kbase device
+ */
+ void (*callback_power_resume)(struct kbase_device *kbdev);
+
+ /**
+ * Callback when the GPU needs to be turned on. See
+ * @ref kbase_pm_callback_conf
+ *
+ * @param kbdev The kbase device
+ *
+ * @return 1 if GPU state was lost, 0 otherwise
+ */
+ int (*callback_power_runtime_on)(struct kbase_device *kbdev);
+
+ /**
+ * Callback when the GPU may be turned off. See
+ * @ref kbase_pm_callback_conf
+ *
+ * @param kbdev The kbase device
+ */
+ void (*callback_power_runtime_off)(struct kbase_device *kbdev);
+
+};
+
+
+/** List of policy IDs */
+enum kbase_pm_policy_id {
+ KBASE_PM_POLICY_ID_DEMAND = 1,
+ KBASE_PM_POLICY_ID_ALWAYS_ON,
+ KBASE_PM_POLICY_ID_COARSE_DEMAND,
+#if !MALI_CUSTOMER_RELEASE
+ KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED,
+ KBASE_PM_POLICY_ID_FAST_START
+#endif
+};
+
+typedef u32 kbase_pm_policy_flags;
+
+/**
+ * Power policy structure.
+ *
+ * Each power policy exposes a (static) instance of this structure which
+ * contains function pointers to the policy's methods.
+ */
+struct kbase_pm_policy {
+ /** The name of this policy */
+ char *name;
+
+ /**
+ * Function called when the policy is selected
+ *
+ * This should initialize the kbdev->pm.pm_policy_data structure. It
+ * should not attempt to make any changes to hardware state.
+ *
+ * It is undefined what state the cores are in when the function is
+ * called.
+ *
+ * @param kbdev The kbase device structure for the device (must be a
+ * valid pointer)
+ */
+ void (*init)(struct kbase_device *kbdev);
+
+ /**
+ * Function called when the policy is unselected.
+ *
+ * @param kbdev The kbase device structure for the device (must be a
+ * valid pointer)
+ */
+ void (*term)(struct kbase_device *kbdev);
+
+ /**
+ * Function called to get the current shader core mask
+ *
+ * The returned mask should meet or exceed (kbdev->shader_needed_bitmap
+ * | kbdev->shader_inuse_bitmap).
+ *
+ * @param kbdev The kbase device structure for the device (must be a
+ * valid pointer)
+ *
+ * @return The mask of shader cores to be powered
+ */
+ u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+ /**
+ * Function called to get the current overall GPU power state
+ *
+ * This function should consider the state of kbdev->pm.active_count. If
+ * this count is greater than 0 then there is at least one active
+ * context on the device and the GPU should be powered. If it is equal
+ * to 0 then there are no active contexts and the GPU could be powered
+ * off if desired.
+ *
+ * @param kbdev The kbase device structure for the device (must be a
+ * valid pointer)
+ *
+ * @return true if the GPU should be powered, false otherwise
+ */
+ bool (*get_core_active)(struct kbase_device *kbdev);
+
+ /** Field indicating flags for this policy */
+ kbase_pm_policy_flags flags;
+
+ /**
+ * Field indicating an ID for this policy. This is not necessarily the
+ * same as its index in the list returned by kbase_pm_list_policies().
+ * It is used purely for debugging.
+ */
+ enum kbase_pm_policy_id id;
+};
+
+
+enum kbase_pm_ca_policy_id {
+ KBASE_PM_CA_POLICY_ID_FIXED = 1,
+ KBASE_PM_CA_POLICY_ID_RANDOM
+};
+
+typedef u32 kbase_pm_ca_policy_flags;
+
+/**
+ * Core availability policy structure.
+ *
+ * Each core availability policy exposes a (static) instance of this structure
+ * which contains function pointers to the policy's methods.
+ */
+struct kbase_pm_ca_policy {
+ /** The name of this policy */
+ char *name;
+
+ /**
+ * Function called when the policy is selected
+ *
+ * This should initialize the kbdev->pm.ca_policy_data structure. It
+ * should not attempt to make any changes to hardware state.
+ *
+ * It is undefined what state the cores are in when the function is
+ * called.
+ *
+ * @param kbdev The kbase device structure for the device (must be a
+ * valid pointer)
+ */
+ void (*init)(struct kbase_device *kbdev);
+
+ /**
+ * Function called when the policy is unselected.
+ *
+ * @param kbdev The kbase device structure for the device (must be a
+ * valid pointer)
+ */
+ void (*term)(struct kbase_device *kbdev);
+
+ /**
+ * Function called to get the current shader core availability mask
+ *
+ * When a change in core availability is occuring, the policy must set
+ * kbdev->pm.ca_in_transition to true. This is to indicate that
+ * reporting changes in power state cannot be optimized out, even if
+ * kbdev->pm.desired_shader_state remains unchanged. This must be done
+ * by any functions internal to the Core Availability Policy that change
+ * the return value of kbase_pm_ca_policy::get_core_mask.
+ *
+ * @param kbdev The kbase device structure for the device (must be a
+ * valid pointer)
+ *
+ * @return The current core availability mask
+ */
+ u64 (*get_core_mask)(struct kbase_device *kbdev);
+
+ /**
+ * Function called to update the current core status
+ *
+ * If none of the cores in core group 0 are ready or transitioning, then
+ * the policy must ensure that the next call to get_core_mask does not
+ * return 0 for all cores in core group 0. It is an error to disable
+ * core group 0 through the core availability policy.
+ *
+ * When a change in core availability has finished, the policy must set
+ * kbdev->pm.ca_in_transition to false. This is to indicate that
+ * changes in power state can once again be optimized out when
+ * kbdev->pm.desired_shader_state is unchanged.
+ *
+ * @param kbdev The kbase device structure for the device
+ * (must be a valid pointer)
+ * @param cores_ready The mask of cores currently powered and
+ * ready to run jobs
+ * @param cores_transitioning The mask of cores currently transitioning
+ * power state
+ */
+ void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready,
+ u64 cores_transitioning);
+
+ /** Field indicating flags for this policy */
+ kbase_pm_ca_policy_flags flags;
+
+ /**
+ * Field indicating an ID for this policy. This is not necessarily the
+ * same as its index in the list returned by kbase_pm_list_policies().
+ * It is used purely for debugging.
+ */
+ enum kbase_pm_ca_policy_id id;
+};
+
+#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
new file mode 100644
index 00000000000..4e5b79c9c44
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c
@@ -0,0 +1,72 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_demand.c
+ * A simple demand based power management policy
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+static u64 demand_get_core_mask(struct kbase_device *kbdev)
+{
+ u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap;
+
+ if (0 == kbdev->pm.active_count)
+ return 0;
+
+ return desired;
+}
+
+static bool demand_get_core_active(struct kbase_device *kbdev)
+{
+ if (0 == kbdev->pm.active_count)
+ return false;
+
+ return true;
+}
+
+static void demand_init(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+static void demand_term(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+/**
+ * The @ref struct kbase_pm_policy structure for the demand power policy.
+ *
+ * This is the static structure that defines the demand power policy's callback
+ * and name.
+ */
+const struct kbase_pm_policy kbase_pm_demand_policy_ops = {
+ "demand", /* name */
+ demand_init, /* init */
+ demand_term, /* term */
+ demand_get_core_mask, /* get_core_mask */
+ demand_get_core_active, /* get_core_active */
+ 0u, /* flags */
+ KBASE_PM_POLICY_ID_DEMAND, /* id */
+};
+
+KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
new file mode 100644
index 00000000000..8b7a17ab984
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h
@@ -0,0 +1,60 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_demand.h
+ * A simple demand based power management policy
+ */
+
+#ifndef MALI_KBASE_PM_DEMAND_H
+#define MALI_KBASE_PM_DEMAND_H
+
+/**
+ * The demand power management policy has the following characteristics:
+ * - When KBase indicates that the GPU will be powered up, but we don't yet
+ * know which Job Chains are to be run:
+ * - The Shader Cores are not powered up
+ * - When KBase indicates that a set of Shader Cores are needed to submit the
+ * currently queued Job Chains:
+ * - Only those Shader Cores are powered up
+ * - When KBase indicates that the GPU need not be powered:
+ * - The Shader Cores are powered off, and the GPU itself is powered off too.
+ *
+ * @note:
+ * - KBase indicates the GPU will be powered up when it has a User Process that
+ * has just started to submit Job Chains.
+ * - KBase indicates the GPU need not be powered when all the Job Chains from
+ * User Processes have finished, and it is waiting for a User Process to
+ * submit some more Job Chains.
+ */
+
+/**
+ * Private structure for policy instance data.
+ *
+ * This contains data that is private to the particular power policy that is
+ * active.
+ */
+struct kbasep_pm_policy_demand {
+ /** No state needed - just have a dummy variable here */
+ int dummy;
+};
+
+extern const struct kbase_pm_policy kbase_pm_demand_policy_ops;
+
+#endif /* MALI_KBASE_PM_DEMAND_H */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
new file mode 100644
index 00000000000..1d72fd60143
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c
@@ -0,0 +1,1264 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_driver.c
+ * Base kernel Power Management hardware control
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+#include <mali_kbase_pm.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_smc.h>
+#include <backend/gpu/mali_kbase_cache_policy_backend.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_irq_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+#if MALI_MOCK_TEST
+#define MOCKABLE(function) function##_original
+#else
+#define MOCKABLE(function) function
+#endif /* MALI_MOCK_TEST */
+
+/**
+ * Actions that can be performed on a core.
+ *
+ * This enumeration is private to the file. Its values are set to allow
+ * @ref core_type_to_reg function, which decodes this enumeration, to be simpler
+ * and more efficient.
+ */
+enum kbasep_pm_action {
+ ACTION_PRESENT = 0,
+ ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO),
+ ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO),
+ ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO),
+ ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO),
+ ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO)
+};
+
+static u64 kbase_pm_get_state(
+ struct kbase_device *kbdev,
+ enum kbase_pm_core_type core_type,
+ enum kbasep_pm_action action);
+
+/**
+ * Decode a core type and action to a register.
+ *
+ * Given a core type (defined by @ref kbase_pm_core_type) and an action (defined
+ * by @ref kbasep_pm_action) this function will return the register offset that
+ * will perform the action on the core type. The register returned is the \c _LO
+ * register and an offset must be applied to use the \c _HI register.
+ *
+ * @param core_type The type of core
+ * @param action The type of action
+ *
+ * @return The register offset of the \c _LO register that performs an action of
+ * type \c action on a core of type \c core_type.
+ */
+static u32 core_type_to_reg(enum kbase_pm_core_type core_type,
+ enum kbasep_pm_action action)
+{
+ return (u32)core_type + (u32)action;
+}
+
+
+/** Invokes an action on a core set
+ *
+ * This function performs the action given by \c action on a set of cores of a
+ * type given by \c core_type. It is a static function used by
+ * @ref kbase_pm_transition_core_type
+ *
+ * @param kbdev The kbase device structure of the device
+ * @param core_type The type of core that the action should be performed on
+ * @param cores A bit mask of cores to perform the action on (low 32 bits)
+ * @param action The action to perform on the cores
+ */
+static void kbase_pm_invoke(struct kbase_device *kbdev,
+ enum kbase_pm_core_type core_type,
+ u64 cores,
+ enum kbasep_pm_action action)
+{
+ u32 reg;
+ u32 lo = cores & 0xFFFFFFFF;
+ u32 hi = (cores >> 32) & 0xFFFFFFFF;
+
+ lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+ reg = core_type_to_reg(core_type, action);
+
+ KBASE_DEBUG_ASSERT(reg);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+ if (cores) {
+ if (action == ACTION_PWRON)
+ kbase_trace_mali_pm_power_on(core_type, cores);
+ else if (action == ACTION_PWROFF)
+ kbase_trace_mali_pm_power_off(core_type, cores);
+ }
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ if (cores) {
+ u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY);
+
+ if (action == ACTION_PWRON)
+ state |= cores;
+ else if (action == ACTION_PWROFF)
+ state &= ~cores;
+ kbase_tlstream_aux_pm_state(core_type, state);
+ }
+#endif
+ /* Tracing */
+ if (cores) {
+ if (action == ACTION_PWRON)
+ switch (core_type) {
+ case KBASE_PM_CORE_SHADER:
+ KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u,
+ lo);
+ break;
+ case KBASE_PM_CORE_TILER:
+ KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL,
+ NULL, 0u, lo);
+ break;
+ case KBASE_PM_CORE_L2:
+ KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL,
+ 0u, lo);
+ break;
+ default:
+ break;
+ }
+ else if (action == ACTION_PWROFF)
+ switch (core_type) {
+ case KBASE_PM_CORE_SHADER:
+ KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL,
+ 0u, lo);
+ break;
+ case KBASE_PM_CORE_TILER:
+ KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL,
+ NULL, 0u, lo);
+ break;
+ case KBASE_PM_CORE_L2:
+ KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL,
+ 0u, lo);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (lo != 0)
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL);
+
+ if (hi != 0)
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL);
+}
+
+/**
+ * Get information about a core set
+ *
+ * This function gets information (chosen by \c action) about a set of cores of
+ * a type given by \c core_type. It is a static function used by @ref
+ * kbase_pm_get_present_cores, @ref kbase_pm_get_active_cores, @ref
+ * kbase_pm_get_trans_cores and @ref kbase_pm_get_ready_cores.
+ *
+ * @param kbdev The kbase device structure of the device
+ * @param core_type The type of core that the should be queried
+ * @param action The property of the cores to query
+ *
+ * @return A bit mask specifying the state of the cores
+ */
+static u64 kbase_pm_get_state(struct kbase_device *kbdev,
+ enum kbase_pm_core_type core_type,
+ enum kbasep_pm_action action)
+{
+ u32 reg;
+ u32 lo, hi;
+
+ reg = core_type_to_reg(core_type, action);
+
+ KBASE_DEBUG_ASSERT(reg);
+
+ lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL);
+ hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL);
+
+ return (((u64) hi) << 32) | ((u64) lo);
+}
+
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev)
+{
+ kbdev->shader_present_bitmap =
+ kbase_pm_get_state(kbdev, KBASE_PM_CORE_SHADER, ACTION_PRESENT);
+ kbdev->tiler_present_bitmap =
+ kbase_pm_get_state(kbdev, KBASE_PM_CORE_TILER, ACTION_PRESENT);
+ kbdev->l2_present_bitmap =
+ kbase_pm_get_state(kbdev, KBASE_PM_CORE_L2, ACTION_PRESENT);
+ kbdev->shader_inuse_bitmap = 0;
+ kbdev->shader_needed_bitmap = 0;
+ kbdev->shader_available_bitmap = 0;
+ kbdev->tiler_available_bitmap = 0;
+ kbdev->l2_users_count = 0;
+ kbdev->l2_available_bitmap = 0;
+ kbdev->tiler_needed_cnt = 0;
+ kbdev->tiler_inuse_cnt = 0;
+
+ memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt));
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores);
+
+/**
+ * Get the cores that are present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+ enum kbase_pm_core_type type)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ switch (type) {
+ case KBASE_PM_CORE_L2:
+ return kbdev->l2_present_bitmap;
+ case KBASE_PM_CORE_SHADER:
+ return kbdev->shader_present_bitmap;
+ case KBASE_PM_CORE_TILER:
+ return kbdev->tiler_present_bitmap;
+ }
+ KBASE_DEBUG_ASSERT(0);
+ return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores);
+
+/**
+ * Get the cores that are "active" (busy processing work)
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+ enum kbase_pm_core_type type)
+{
+ return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores);
+
+/**
+ * Get the cores that are transitioning between power states
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+ enum kbase_pm_core_type type)
+{
+ return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores);
+
+/**
+ * Get the cores that are powered on
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+ enum kbase_pm_core_type type)
+{
+ u64 result;
+
+ result = kbase_pm_get_state(kbdev, type, ACTION_READY);
+
+ switch (type) {
+ case KBASE_PM_CORE_SHADER:
+ KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u,
+ (u32) result);
+ break;
+ case KBASE_PM_CORE_TILER:
+ KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u,
+ (u32) result);
+ break;
+ case KBASE_PM_CORE_L2:
+ KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u,
+ (u32) result);
+ break;
+ default:
+ break;
+ }
+
+ return result;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores);
+
+/**
+ * Perform power transitions for a particular core type.
+ *
+ * This function will perform any available power transitions to make the actual
+ * hardware state closer to the desired state. If a core is currently
+ * transitioning then changes to the power state of that call cannot be made
+ * until the transition has finished. Cores which are not present in the
+ * hardware are ignored if they are specified in the desired_state bitmask,
+ * however the return value will always be 0 in this case.
+ *
+ * @param kbdev The kbase device
+ * @param type The core type to perform transitions for
+ * @param desired_state A bit mask of the desired state of the cores
+ * @param in_use A bit mask of the cores that are currently running
+ * jobs. These cores have to be kept powered up because
+ * there are jobs running (or about to run) on them.
+ * @param[out] available Receives a bit mask of the cores that the job
+ * scheduler can use to submit jobs to. May be NULL if
+ * this is not needed.
+ * @param[in,out] powering_on Bit mask to update with cores that are
+ * transitioning to a power-on state.
+ *
+ * @return true if the desired state has been reached, false otherwise
+ */
+static bool kbase_pm_transition_core_type(struct kbase_device *kbdev,
+ enum kbase_pm_core_type type,
+ u64 desired_state,
+ u64 in_use,
+ u64 * const available,
+ u64 *powering_on)
+{
+ u64 present;
+ u64 ready;
+ u64 trans;
+ u64 powerup;
+ u64 powerdown;
+ u64 powering_on_trans;
+ u64 desired_state_in_use;
+
+ lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+ /* Get current state */
+ present = kbase_pm_get_present_cores(kbdev, type);
+ trans = kbase_pm_get_trans_cores(kbdev, type);
+ ready = kbase_pm_get_ready_cores(kbdev, type);
+ /* mask off ready from trans in case transitions finished between the
+ * register reads */
+ trans &= ~ready;
+
+ powering_on_trans = trans & *powering_on;
+ *powering_on = powering_on_trans;
+
+ if (available != NULL)
+ *available = (ready | powering_on_trans) & desired_state;
+
+ /* Update desired state to include the in-use cores. These have to be
+ * kept powered up because there are jobs running or about to run on
+ * these cores
+ */
+ desired_state_in_use = desired_state | in_use;
+
+ /* Update state of whether l2 caches are powered */
+ if (type == KBASE_PM_CORE_L2) {
+ if ((ready == present) && (desired_state_in_use == ready) &&
+ (trans == 0)) {
+ /* All are ready, none will be turned off, and none are
+ * transitioning */
+ kbdev->pm.backend.l2_powered = 1;
+ if (kbdev->l2_users_count > 0) {
+ /* Notify any registered l2 cache users
+ * (optimized out when no users waiting) */
+ wake_up(&kbdev->pm.backend.l2_powered_wait);
+ }
+ } else
+ kbdev->pm.backend.l2_powered = 0;
+ }
+
+ if (desired_state_in_use == ready && (trans == 0))
+ return true;
+
+ /* Restrict the cores to those that are actually present */
+ powerup = desired_state_in_use & present;
+ powerdown = (~desired_state_in_use) & present;
+
+ /* Restrict to cores that are not already in the desired state */
+ powerup &= ~ready;
+ powerdown &= ready;
+
+ /* Don't transition any cores that are already transitioning, except for
+ * Mali cores that support the following case:
+ *
+ * If the SHADER_PWRON or TILER_PWRON registers are written to turn on
+ * a core that is currently transitioning to power off, then this is
+ * remembered and the shader core is automatically powered up again once
+ * the original transition completes. Once the automatic power on is
+ * complete any job scheduled on the shader core should start.
+ */
+ powerdown &= ~trans;
+
+ if (kbase_hw_has_feature(kbdev,
+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS))
+ if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type)
+ trans = powering_on_trans; /* for exception cases, only
+ * mask off cores in power on
+ * transitions */
+
+ powerup &= ~trans;
+
+ /* Perform transitions if any */
+ kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON);
+ kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF);
+
+ /* Recalculate cores transitioning on, and re-evaluate our state */
+ powering_on_trans |= powerup;
+ *powering_on = powering_on_trans;
+ if (available != NULL)
+ *available = (ready | powering_on_trans) & desired_state;
+
+ return false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type);
+
+/**
+ * Determine which caches should be on for a particular core state.
+ *
+ * This function takes a bit mask of the present caches and the cores (or
+ * caches) that are attached to the caches that will be powered. It then
+ * computes which caches should be turned on to allow the cores requested to be
+ * powered up.
+ *
+ * @param present The bit mask of present caches
+ * @param cores_powered A bit mask of cores (or L2 caches) that are desired to
+ * be powered
+ *
+ * @return A bit mask of the caches that should be turned on
+ */
+static u64 get_desired_cache_status(u64 present, u64 cores_powered)
+{
+ u64 desired = 0;
+
+ while (present) {
+ /* Find out which is the highest set bit */
+ u64 bit = fls64(present) - 1;
+ u64 bit_mask = 1ull << bit;
+ /* Create a mask which has all bits from 'bit' upwards set */
+
+ u64 mask = ~(bit_mask - 1);
+
+ /* If there are any cores powered at this bit or above (that
+ * haven't previously been processed) then we need this core on
+ */
+ if (cores_powered & mask)
+ desired |= bit_mask;
+
+ /* Remove bits from cores_powered and present */
+ cores_powered &= ~mask;
+ present &= ~bit_mask;
+ }
+
+ return desired;
+}
+
+KBASE_EXPORT_TEST_API(get_desired_cache_status);
+
+bool
+MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev)
+{
+ bool cores_are_available = false;
+ bool in_desired_state = true;
+ u64 desired_l2_state;
+ u64 cores_powered;
+ u64 tiler_available_bitmap;
+ u64 shader_available_bitmap;
+ u64 shader_ready_bitmap;
+ u64 shader_transitioning_bitmap;
+ u64 l2_available_bitmap;
+ u64 prev_l2_available_bitmap;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+ spin_lock(&kbdev->pm.backend.gpu_powered_lock);
+ if (kbdev->pm.backend.gpu_powered == false) {
+ spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+ if (kbdev->pm.backend.desired_shader_state == 0 &&
+ kbdev->pm.backend.desired_tiler_state == 0)
+ return true;
+ return false;
+ }
+
+ /* Trace that a change-state is being requested, and that it took
+ * (effectively) no time to start it. This is useful for counting how
+ * many state changes occurred, in a way that's backwards-compatible
+ * with processing the trace data */
+ kbase_timeline_pm_send_event(kbdev,
+ KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+ kbase_timeline_pm_handle_event(kbdev,
+ KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE);
+
+ /* If any cores are already powered then, we must keep the caches on */
+ cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER);
+
+ cores_powered |= kbdev->pm.backend.desired_shader_state;
+
+ /* If there are l2 cache users registered, keep all l2s powered even if
+ * all other cores are off. */
+ if (kbdev->l2_users_count > 0)
+ cores_powered |= kbdev->l2_present_bitmap;
+
+ desired_l2_state = get_desired_cache_status(kbdev->l2_present_bitmap,
+ cores_powered);
+
+ /* If any l2 cache is on, then enable l2 #0, for use by job manager */
+ if (0 != desired_l2_state) {
+ desired_l2_state |= 1;
+ /* Also enable tiler if l2 cache is powered */
+ kbdev->pm.backend.desired_tiler_state =
+ kbdev->tiler_present_bitmap;
+ } else {
+ kbdev->pm.backend.desired_tiler_state = 0;
+ }
+
+ prev_l2_available_bitmap = kbdev->l2_available_bitmap;
+ in_desired_state &= kbase_pm_transition_core_type(kbdev,
+ KBASE_PM_CORE_L2, desired_l2_state, 0,
+ &l2_available_bitmap,
+ &kbdev->pm.backend.powering_on_l2_state);
+
+ if (kbdev->l2_available_bitmap != l2_available_bitmap)
+ KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap);
+
+ kbdev->l2_available_bitmap = l2_available_bitmap;
+
+ if (in_desired_state) {
+ in_desired_state &= kbase_pm_transition_core_type(kbdev,
+ KBASE_PM_CORE_TILER,
+ kbdev->pm.backend.desired_tiler_state,
+ 0, &tiler_available_bitmap,
+ &kbdev->pm.backend.powering_on_tiler_state);
+ in_desired_state &= kbase_pm_transition_core_type(kbdev,
+ KBASE_PM_CORE_SHADER,
+ kbdev->pm.backend.desired_shader_state,
+ kbdev->shader_inuse_bitmap,
+ &shader_available_bitmap,
+ &kbdev->pm.backend.powering_on_shader_state);
+
+ if (kbdev->shader_available_bitmap != shader_available_bitmap) {
+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+ NULL, 0u,
+ (u32) shader_available_bitmap);
+ KBASE_TIMELINE_POWER_SHADER(kbdev,
+ shader_available_bitmap);
+ }
+
+ kbdev->shader_available_bitmap = shader_available_bitmap;
+
+ if (kbdev->tiler_available_bitmap != tiler_available_bitmap) {
+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+ NULL, NULL, 0u,
+ (u32) tiler_available_bitmap);
+ KBASE_TIMELINE_POWER_TILER(kbdev,
+ tiler_available_bitmap);
+ }
+
+ kbdev->tiler_available_bitmap = tiler_available_bitmap;
+
+ } else if ((l2_available_bitmap & kbdev->tiler_present_bitmap) !=
+ kbdev->tiler_present_bitmap) {
+ tiler_available_bitmap = 0;
+
+ if (kbdev->tiler_available_bitmap != tiler_available_bitmap)
+ KBASE_TIMELINE_POWER_TILER(kbdev,
+ tiler_available_bitmap);
+
+ kbdev->tiler_available_bitmap = tiler_available_bitmap;
+ }
+
+ /* State updated for slow-path waiters */
+ kbdev->pm.backend.gpu_in_desired_state = in_desired_state;
+
+ shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev,
+ KBASE_PM_CORE_SHADER);
+ shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev,
+ KBASE_PM_CORE_SHADER);
+
+ /* Determine whether the cores are now available (even if the set of
+ * available cores is empty). Note that they can be available even if
+ * we've not finished transitioning to the desired state */
+ if ((kbdev->shader_available_bitmap &
+ kbdev->pm.backend.desired_shader_state)
+ == kbdev->pm.backend.desired_shader_state &&
+ (kbdev->tiler_available_bitmap &
+ kbdev->pm.backend.desired_tiler_state)
+ == kbdev->pm.backend.desired_tiler_state) {
+ cores_are_available = true;
+
+ KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u,
+ (u32)(kbdev->shader_available_bitmap &
+ kbdev->pm.backend.desired_shader_state));
+ KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u,
+ (u32)(kbdev->tiler_available_bitmap &
+ kbdev->pm.backend.desired_tiler_state));
+
+ /* Log timelining information about handling events that power
+ * up cores, to match up either with immediate submission either
+ * because cores already available, or from PM IRQ */
+ if (!in_desired_state)
+ kbase_timeline_pm_send_event(kbdev,
+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+ }
+
+ if (in_desired_state) {
+ KBASE_DEBUG_ASSERT(cores_are_available);
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+ kbase_trace_mali_pm_status(KBASE_PM_CORE_L2,
+ kbase_pm_get_ready_cores(kbdev,
+ KBASE_PM_CORE_L2));
+ kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER,
+ kbase_pm_get_ready_cores(kbdev,
+ KBASE_PM_CORE_SHADER));
+ kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER,
+ kbase_pm_get_ready_cores(kbdev,
+ KBASE_PM_CORE_TILER));
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_aux_pm_state(
+ KBASE_PM_CORE_L2,
+ kbase_pm_get_ready_cores(
+ kbdev, KBASE_PM_CORE_L2));
+ kbase_tlstream_aux_pm_state(
+ KBASE_PM_CORE_SHADER,
+ kbase_pm_get_ready_cores(
+ kbdev, KBASE_PM_CORE_SHADER));
+ kbase_tlstream_aux_pm_state(
+ KBASE_PM_CORE_TILER,
+ kbase_pm_get_ready_cores(
+ kbdev,
+ KBASE_PM_CORE_TILER));
+#endif
+
+ KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL,
+ kbdev->pm.backend.gpu_in_desired_state,
+ (u32)kbdev->pm.backend.desired_shader_state);
+ KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u,
+ (u32)kbdev->pm.backend.desired_tiler_state);
+
+ /* Log timelining information for synchronous waiters */
+ kbase_timeline_pm_send_event(kbdev,
+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+ /* Wake slow-path waiters. Job scheduler does not use this. */
+ KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0);
+ wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait);
+ }
+
+ spin_unlock(&kbdev->pm.backend.gpu_powered_lock);
+
+ /* kbase_pm_ca_update_core_status can cause one-level recursion into
+ * this function, so it must only be called once all changes to kbdev
+ * have been committed, and after the gpu_powered_lock has been
+ * dropped. */
+ if (kbdev->shader_ready_bitmap != shader_ready_bitmap ||
+ kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) {
+ kbdev->shader_ready_bitmap = shader_ready_bitmap;
+ kbdev->shader_transitioning_bitmap =
+ shader_transitioning_bitmap;
+
+ kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap,
+ shader_transitioning_bitmap);
+ }
+
+ /* The core availability policy is not allowed to keep core group 0
+ * turned off (unless it was changing the l2 power state) */
+ if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+ kbdev->gpu_props.props.coherency_info.group[0].core_mask) &&
+ (prev_l2_available_bitmap == desired_l2_state) &&
+ !(kbase_pm_ca_get_core_mask(kbdev) &
+ kbdev->gpu_props.props.coherency_info.group[0].core_mask))
+ BUG();
+
+ /* The core availability policy is allowed to keep core group 1 off,
+ * but all jobs specifically targeting CG1 must fail */
+ if (!((shader_ready_bitmap | shader_transitioning_bitmap) &
+ kbdev->gpu_props.props.coherency_info.group[1].core_mask) &&
+ !(kbase_pm_ca_get_core_mask(kbdev) &
+ kbdev->gpu_props.props.coherency_info.group[1].core_mask))
+ kbdev->pm.backend.cg1_disabled = true;
+ else
+ kbdev->pm.backend.cg1_disabled = false;
+
+ return cores_are_available;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock);
+
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ bool cores_are_available;
+ /* Force the transition to be checked and reported - the cores may be
+ * 'available' (for job submission) but not fully powered up. */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+ /* Don't need 'cores_are_available', because we don't return anything */
+ CSTD_UNUSED(cores_are_available);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ /* Wait for cores */
+ wait_event(kbdev->pm.backend.gpu_in_desired_state_wait,
+ kbdev->pm.backend.gpu_in_desired_state);
+
+ /* Log timelining information that a change in state has completed */
+ kbase_timeline_pm_handle_event(kbdev,
+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync);
+
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ /*
+ * Clear all interrupts,
+ * and unmask them all.
+ */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+ NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+ NULL);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+ NULL);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts);
+
+void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask)
+{
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ /*
+ * Clear all interrupts,
+ * and unmask them all.
+ */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+ NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL,
+ NULL);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+ NULL);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL);
+
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), mask, NULL);
+}
+
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ /*
+ * Mask all interrupts,
+ * and clear them all.
+ */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL,
+ NULL);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF,
+ NULL);
+
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL);
+ kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
+
+/*
+ * pmu layout:
+ * 0x0000: PMU TAG (RO) (0xCAFECAFE)
+ * 0x0004: PMU VERSION ID (RO) (0x00000000)
+ * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE)
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
+{
+ bool reset_required = is_resume;
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ unsigned long flags;
+ int i;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ if (kbdev->pm.backend.gpu_powered) {
+ /* Already turned on */
+ if (kbdev->poweroff_pending)
+ kbase_pm_enable_interrupts(kbdev);
+ kbdev->poweroff_pending = false;
+ KBASE_DEBUG_ASSERT(!is_resume);
+ return;
+ }
+
+ kbdev->poweroff_pending = false;
+
+ KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u);
+
+ if (is_resume && kbdev->pm.backend.callback_power_resume) {
+ kbdev->pm.backend.callback_power_resume(kbdev);
+ } else if (kbdev->pm.backend.callback_power_on) {
+ kbdev->pm.backend.callback_power_on(kbdev);
+ /* If your platform properly keeps the GPU state you may use the
+ * return value of the callback_power_on function to
+ * conditionally reset the GPU on power up. Currently we are
+ * conservative and always reset the GPU. */
+ reset_required = true;
+ }
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+ kbdev->pm.backend.gpu_powered = true;
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (reset_required) {
+ /* GPU state was lost, reset GPU to ensure it is in a
+ * consistent state */
+ kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS);
+ }
+
+ /* Reprogram the GPU's MMU */
+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+ struct kbase_as *as = &kbdev->as[i];
+
+ mutex_lock(&as->transaction_mutex);
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ if (js_devdata->runpool_irq.per_as_data[i].kctx)
+ kbase_mmu_update(
+ js_devdata->runpool_irq.per_as_data[i].kctx);
+ else
+ kbase_mmu_disable_as(kbdev, i);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ mutex_unlock(&as->transaction_mutex);
+ }
+
+ /* Lastly, enable the interrupts */
+ kbase_pm_enable_interrupts(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_on);
+
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend)
+{
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ /* ASSERT that the cores should now be unavailable. No lock needed. */
+ KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u);
+
+ kbdev->poweroff_pending = true;
+
+ if (!kbdev->pm.backend.gpu_powered) {
+ /* Already turned off */
+ if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+ kbdev->pm.backend.callback_power_suspend(kbdev);
+ return true;
+ }
+
+ KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u);
+
+ /* Disable interrupts. This also clears any outstanding interrupts */
+ kbase_pm_disable_interrupts(kbdev);
+ /* Ensure that any IRQ handlers have finished */
+ kbase_synchronize_irqs(kbdev);
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (atomic_read(&kbdev->faults_pending)) {
+ /* Page/bus faults are still being processed. The GPU can not
+ * be powered off until they have completed */
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+ flags);
+ return false;
+ }
+
+ /* The GPU power may be turned off from this point */
+ kbdev->pm.backend.gpu_powered = false;
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags);
+
+ if (is_suspend && kbdev->pm.backend.callback_power_suspend)
+ kbdev->pm.backend.callback_power_suspend(kbdev);
+ else if (kbdev->pm.backend.callback_power_off)
+ kbdev->pm.backend.callback_power_off(kbdev);
+ return true;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_clock_off);
+
+struct kbasep_reset_timeout_data {
+ struct hrtimer timer;
+ bool timed_out;
+ struct kbase_device *kbdev;
+};
+
+void kbase_pm_reset_done(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ kbdev->pm.backend.reset_done = true;
+ wake_up(&kbdev->pm.backend.reset_done_wait);
+}
+
+/**
+ * Wait for the RESET_COMPLETED IRQ to occur, then reset the waiting state.
+ */
+static void kbase_pm_wait_for_reset(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ wait_event(kbdev->pm.backend.reset_done_wait,
+ (kbdev->pm.backend.reset_done));
+ kbdev->pm.backend.reset_done = false;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_reset_done);
+
+static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer)
+{
+ struct kbasep_reset_timeout_data *rtdata =
+ container_of(timer, struct kbasep_reset_timeout_data, timer);
+
+ rtdata->timed_out = 1;
+
+ /* Set the wait queue to wake up kbase_pm_init_hw even though the reset
+ * hasn't completed */
+ kbase_pm_reset_done(rtdata->kbdev);
+
+ return HRTIMER_NORESTART;
+}
+
+static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
+{
+ kbdev->hw_quirks_sc = 0;
+
+ /* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443.
+ * and needed due to MIDGLES-3539. See PRLAM-11035 */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) ||
+ kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035))
+ kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE;
+
+ /* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327.
+ */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327))
+ kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD;
+
+ /* Enable alternative hardware counter selection if configured. */
+ if (DEFAULT_ALTERNATIVE_HWC)
+ kbdev->hw_quirks_sc |= SC_ALT_COUNTERS;
+
+ /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797))
+ kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS;
+
+ kbdev->hw_quirks_tiler = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(TILER_CONFIG), NULL);
+
+ /* Set tiler clock gate override if required */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953))
+ kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE;
+
+ /* Limit the GPU bus bandwidth if the platform needs this. */
+ kbdev->hw_quirks_mmu = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(L2_MMU_CONFIG), NULL);
+
+ /* Limit read ID width for AXI */
+ kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS);
+ kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) <<
+ L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT;
+
+ /* Limit write ID width for AXI */
+ kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+ kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) <<
+ L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT;
+}
+
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
+{
+ if (kbdev->hw_quirks_sc)
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG),
+ kbdev->hw_quirks_sc, NULL);
+
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG),
+ kbdev->hw_quirks_tiler, NULL);
+
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG),
+ kbdev->hw_quirks_mmu, NULL);
+}
+
+
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags)
+{
+ unsigned long irq_flags;
+ struct kbasep_reset_timeout_data rtdata;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ /* Ensure the clock is on before attempting to access the hardware */
+ if (!kbdev->pm.backend.gpu_powered) {
+ if (kbdev->pm.backend.callback_power_on)
+ kbdev->pm.backend.callback_power_on(kbdev);
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock,
+ irq_flags);
+ kbdev->pm.backend.gpu_powered = true;
+ spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock,
+ irq_flags);
+ }
+
+ /* Ensure interrupts are off to begin with, this also clears any
+ * outstanding interrupts */
+ kbase_pm_disable_interrupts(kbdev);
+ /* Prepare for the soft-reset */
+ kbdev->pm.backend.reset_done = false;
+
+ /* The cores should be made unavailable due to the reset */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags);
+ if (kbdev->shader_available_bitmap != 0u)
+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL,
+ NULL, 0u, (u32)0u);
+ if (kbdev->tiler_available_bitmap != 0u)
+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER,
+ NULL, NULL, 0u, (u32)0u);
+ kbdev->shader_available_bitmap = 0u;
+ kbdev->tiler_available_bitmap = 0u;
+ kbdev->l2_available_bitmap = 0u;
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags);
+
+ /* Soft reset the GPU */
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_SOFT_RESET, NULL);
+
+ /* Unmask the reset complete interrupt only */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED,
+ NULL);
+
+ /* Initialize a structure for tracking the status of the reset */
+ rtdata.kbdev = kbdev;
+ rtdata.timed_out = 0;
+
+ /* Create a timer to use as a timeout on the reset */
+ hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ rtdata.timer.function = kbasep_reset_timeout;
+
+ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+ HRTIMER_MODE_REL);
+
+ /* Wait for the RESET_COMPLETED interrupt to be raised */
+ kbase_pm_wait_for_reset(kbdev);
+
+ if (rtdata.timed_out == 0) {
+ /* GPU has been reset */
+ hrtimer_cancel(&rtdata.timer);
+ destroy_hrtimer_on_stack(&rtdata.timer);
+ goto out;
+ }
+
+ /* No interrupt has been received - check if the RAWSTAT register says
+ * the reset has completed */
+ if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) &
+ RESET_COMPLETED) {
+ /* The interrupt is set in the RAWSTAT; this suggests that the
+ * interrupts are not getting to the CPU */
+ dev_warn(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n");
+ /* If interrupts aren't working we can't continue. */
+ destroy_hrtimer_on_stack(&rtdata.timer);
+ goto out;
+ }
+
+ /* The GPU doesn't seem to be responding to the reset so try a hard
+ * reset */
+ dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n",
+ RESET_TIMEOUT);
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_HARD_RESET, NULL);
+
+ /* Restart the timer to wait for the hard reset to complete */
+ rtdata.timed_out = 0;
+
+ hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT),
+ HRTIMER_MODE_REL);
+
+ /* Wait for the RESET_COMPLETED interrupt to be raised */
+ kbase_pm_wait_for_reset(kbdev);
+
+ if (rtdata.timed_out == 0) {
+ /* GPU has been reset */
+ hrtimer_cancel(&rtdata.timer);
+ destroy_hrtimer_on_stack(&rtdata.timer);
+ goto out;
+ }
+
+ destroy_hrtimer_on_stack(&rtdata.timer);
+
+ dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n",
+ RESET_TIMEOUT);
+
+ /* The GPU still hasn't reset, give up */
+ return -EINVAL;
+
+out:
+
+ if (flags & PM_HW_ISSUES_DETECT)
+ kbase_pm_hw_issues_detect(kbdev);
+
+ kbase_pm_hw_issues_apply(kbdev);
+
+
+ /* If cycle counter was in use re-enable it, enable_irqs will only be
+ * false when called from kbase_pm_powerup */
+ if (kbdev->pm.backend.gpu_cycle_counter_requests &&
+ (flags & PM_ENABLE_IRQS)) {
+ /* enable interrupts as the L2 may have to be powered on */
+ kbase_pm_enable_interrupts(kbdev);
+ kbase_pm_request_l2_caches(kbdev);
+
+ /* Re-enable the counters if we need to */
+ spin_lock_irqsave(
+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+ irq_flags);
+ if (kbdev->pm.backend.gpu_cycle_counter_requests)
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_CYCLE_COUNT_START, NULL);
+ spin_unlock_irqrestore(
+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+ irq_flags);
+
+ kbase_pm_release_l2_caches(kbdev);
+ kbase_pm_disable_interrupts(kbdev);
+ }
+
+ if (flags & PM_ENABLE_IRQS)
+ kbase_pm_enable_interrupts(kbdev);
+
+ return 0;
+}
+
+/**
+ * Increase the count of cycle counter users and turn the cycle counters on if
+ * they were previously off
+ *
+ * This function is designed to be called by
+ * @ref kbase_pm_request_gpu_cycle_counter or
+ * @ref kbase_pm_request_gpu_cycle_counter_l2_is_on only
+ *
+ * When this function is called the l2 cache must be on and the l2 cache users
+ * count must have been incremented by a call to (@ref
+ * kbase_pm_request_l2_caches or @ref kbase_pm_request_l2_caches_l2_on)
+ *
+ * @param kbdev The kbase device structure of the device
+ *
+ */
+static void
+kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+ flags);
+
+ ++kbdev->pm.backend.gpu_cycle_counter_requests;
+
+ if (1 == kbdev->pm.backend.gpu_cycle_counter_requests)
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_CYCLE_COUNT_START, NULL);
+
+ spin_unlock_irqrestore(
+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+ flags);
+}
+
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+ INT_MAX);
+
+ kbase_pm_request_l2_caches(kbdev);
+
+ kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter);
+
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered);
+
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests <
+ INT_MAX);
+
+ kbase_pm_request_l2_caches_l2_is_on(kbdev);
+
+ kbase_pm_request_gpu_cycle_counter_do_request(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on);
+
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+ flags);
+
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0);
+
+ --kbdev->pm.backend.gpu_cycle_counter_requests;
+
+ if (0 == kbdev->pm.backend.gpu_cycle_counter_requests)
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
+ GPU_COMMAND_CYCLE_COUNT_STOP, NULL);
+
+ spin_unlock_irqrestore(
+ &kbdev->pm.backend.gpu_cycle_counter_requests_lock,
+ flags);
+
+ kbase_pm_release_l2_caches(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
new file mode 100644
index 00000000000..a4bd11a1c0f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h
@@ -0,0 +1,557 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_hwaccess_internal.h
+ * Power management API definitions used internally by GPU backend
+ */
+
+#ifndef _KBASE_BACKEND_PM_INTERNAL_H_
+#define _KBASE_BACKEND_PM_INTERNAL_H_
+
+#include <mali_kbase_hwaccess_pm.h>
+
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_pm_policy.h"
+
+
+/**
+ * The GPU is idle.
+ *
+ * The OS may choose to turn off idle devices
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_dev_idle(struct kbase_device *kbdev);
+
+/**
+ * The GPU is active.
+ *
+ * The OS should avoid opportunistically turning off the GPU while it is active
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_dev_activate(struct kbase_device *kbdev);
+
+/**
+ * Get details of the cores that are present in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) present in the GPU device and also a count of
+ * the number of cores.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param type The type of core (see the @ref enum kbase_pm_core_type
+ * enumeration)
+ *
+ * @return The bit mask of cores present
+ */
+u64 kbase_pm_get_present_cores(struct kbase_device *kbdev,
+ enum kbase_pm_core_type type);
+
+/**
+ * Get details of the cores that are currently active in the device.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are actively processing work (i.e.
+ * turned on *and* busy).
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param type The type of core (see the @ref enum kbase_pm_core_type
+ * enumeration)
+ *
+ * @return The bit mask of active cores
+ */
+u64 kbase_pm_get_active_cores(struct kbase_device *kbdev,
+ enum kbase_pm_core_type type);
+
+/**
+ * Get details of the cores that are currently transitioning between power
+ * states.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are currently transitioning between
+ * power states.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param type The type of core (see the @ref enum kbase_pm_core_type
+ * enumeration)
+ *
+ * @return The bit mask of transitioning cores
+ */
+u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev,
+ enum kbase_pm_core_type type);
+
+/**
+ * Get details of the cores that are currently powered and ready for jobs.
+ *
+ * This function can be called by the active power policy to return a bitmask of
+ * the cores (of a specified type) that are powered and ready for jobs (they may
+ * or may not be currently executing jobs).
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param type The type of core (see the @ref enum kbase_pm_core_type
+ * enumeration)
+ *
+ * @return The bit mask of ready cores
+ */
+u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev,
+ enum kbase_pm_core_type type);
+
+/**
+ * Turn the clock for the device on, and enable device interrupts.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU on.
+ * It should be modified during integration to perform the necessary actions to
+ * ensure that the GPU is fully powered and clocked.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param is_resume true if clock on due to resume after suspend,
+ * false otherwise
+ */
+void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * Disable device interrupts, and turn the clock for the device off.
+ *
+ * This function can be used by a power policy to turn the clock for the GPU
+ * off. It should be modified during integration to perform the necessary
+ * actions to turn the clock off (if this is possible in the integration).
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param is_suspend true if clock off due to suspend, false otherwise
+ *
+ * @return true if clock was turned off
+ * false if clock can not be turned off due to pending page/bus fault
+ * workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend);
+
+/**
+ * Enable interrupts on the device.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_enable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * Enable interrupts on the device, using the provided mask to set MMU_IRQ_MASK.
+ *
+ * Interrupts are also enabled after a call to kbase_pm_clock_on().
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param mask The mask to use for MMU_IRQ_MASK
+ */
+void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask);
+
+/**
+ * Disable interrupts on the device.
+ *
+ * This prevents delivery of Power Management interrupts to the CPU so that
+ * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler
+ * until @ref kbase_pm_enable_interrupts or kbase_pm_clock_on() is called.
+ *
+ * Interrupts are also disabled after a call to kbase_pm_clock_off().
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_disable_interrupts(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_init_hw - Initialize the hardware.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags specifying the type of PM init
+ *
+ * This function checks the GPU ID register to ensure that the GPU is supported
+ * by the driver and performs a reset on the device so that it is in a known
+ * state before the device is used.
+ *
+ * Return: 0 if the device is supported and successfully reset.
+ */
+int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * The GPU has been reset successfully.
+ *
+ * This function must be called by the GPU interrupt handler when the
+ * RESET_COMPLETED bit is set. It signals to the power management initialization
+ * code that the GPU has been successfully reset.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_reset_done(struct kbase_device *kbdev);
+
+
+/**
+ * Check if there are any power transitions to make, and if so start them.
+ *
+ * This function will check the desired_xx_state members of
+ * struct kbase_pm_device_data and the actual status of the hardware to see if
+ * any power transitions can be made at this time to make the hardware state
+ * closer to the state desired by the power policy.
+ *
+ * The return value can be used to check whether all the desired cores are
+ * available, and so whether it's worth submitting a job (e.g. from a Power
+ * Management IRQ).
+ *
+ * Note that this still returns true when desired_xx_state has no
+ * cores. That is: of the no cores desired, none were <em>un</em>available. In
+ * this case, the caller may still need to try submitting jobs. This is because
+ * the Core Availability Policy might have taken us to an intermediate state
+ * where no cores are powered, before powering on more cores (e.g. for core
+ * rotation)
+ *
+ * The caller must hold kbase_device::pm::power_change_lock
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @return non-zero when all desired cores are available. That is,
+ * it's worthwhile for the caller to submit a job.
+ * @return false otherwise
+ */
+bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev);
+
+/**
+ * Synchronous and locking variant of kbase_pm_check_transitions_nolock()
+ *
+ * On returning, the desired state at the time of the call will have been met.
+ *
+ * @note There is nothing to stop the core being switched off by calls to
+ * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the
+ * caller must have already made a call to
+ * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously.
+ *
+ * The usual use-case for this is to ensure cores are 'READY' after performing
+ * a GPU Reset.
+ *
+ * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold
+ * kbase_device::pm::power_change_lock, because this function will take that
+ * lock itself.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_check_transitions_sync(struct kbase_device *kbdev);
+
+/**
+ * Variant of kbase_pm_update_cores_state() where the caller must hold
+ * kbase_device::pm::power_change_lock
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev);
+
+/**
+ * Update the desired state of shader cores from the Power Policy, and begin
+ * any power transitions.
+ *
+ * This function will update the desired_xx_state members of
+ * struct kbase_pm_device_data by calling into the current Power Policy. It will
+ * then begin power transitions to make the hardware acheive the desired shader
+ * core state.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_update_cores_state(struct kbase_device *kbdev);
+
+/**
+ * Cancel any pending requests to power off the GPU and/or shader cores.
+ *
+ * This should be called by any functions which directly power off the GPU.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev);
+
+/**
+ * Read the bitmasks of present cores.
+ *
+ * This information is cached to avoid having to perform register reads whenever
+ * the information is required.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbasep_pm_read_present_cores(struct kbase_device *kbdev);
+
+/**
+ * Initialize the metrics gathering framework.
+ *
+ * This must be called before other metric gathering APIs are called.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ *
+ * @return 0 on success, error code on error
+ */
+int kbasep_pm_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the metrics gathering framework.
+ *
+ * This must be called when metric gathering is no longer required. It is an
+ * error to call any metrics gathering function (other than
+ * kbasep_pm_metrics_init) after calling this function.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbasep_pm_metrics_term(struct kbase_device *kbdev);
+
+/**
+ * Function to be called by the frame buffer driver to update the vsync metric.
+ *
+ * This function should be called by the frame buffer driver to update whether
+ * the system is hitting the vsync target or not. buffer_updated should be true
+ * if the vsync corresponded with a new frame being displayed, otherwise it
+ * should be false. This function does not need to be called every vsync, but
+ * only when the value of buffer_updated differs from a previous call.
+ *
+ * @param kbdev The kbase device structure for the device (must be a
+ * valid pointer)
+ * @param buffer_updated True if the buffer has been updated on this VSync,
+ * false otherwise
+ */
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated);
+
+/**
+ * Configure the frame buffer device to set the vsync callback.
+ *
+ * This function should do whatever is necessary for this integration to ensure
+ * that kbase_pm_report_vsync is called appropriately.
+ *
+ * This function will need porting as part of the integration for a device.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_register_vsync_callback(struct kbase_device *kbdev);
+
+/**
+ * Free any resources that kbase_pm_register_vsync_callback allocated.
+ *
+ * This function should perform any cleanup required from the call to
+ * kbase_pm_register_vsync_callback. No call backs should occur after this
+ * function has returned.
+ *
+ * This function will need porting as part of the integration for a device.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev);
+
+/**
+ * Determine whether the DVFS system should change the clock speed of the GPU.
+ *
+ * This function should be called regularly by the DVFS system to check whether
+ * the clock speed of the GPU needs updating. It will return one of three
+ * enumerated values of kbase_pm_dvfs_action:
+ *
+ * @param kbdev The kbase device structure for the device
+ * (must be a valid pointer)
+ * @retval KBASE_PM_DVFS_NOP The clock does not need changing
+ * @retval KBASE_PM_DVFS_CLOCK_UP The clock frequency should be increased if
+ * possible.
+ * @retval KBASE_PM_DVFS_CLOCK_DOWN The clock frequency should be decreased if
+ * possible.
+ */
+enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev);
+
+/**
+ * Mark that the GPU cycle counter is needed, if the caller is the first caller
+ * then the GPU cycle counters will be enabled along with the l2 cache
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * @ref kbase_pm_context_active must have been called).
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * This is a version of the above function
+ * (@ref kbase_pm_request_gpu_cycle_counter) suitable for being called when the
+ * l2 cache is known to be on and assured to be on until the subsequent call of
+ * kbase_pm_release_gpu_cycle_counter such as when a job is submitted. It does
+ * not sleep and can be called from atomic functions.
+ *
+ * The GPU must be powered when calling this function (i.e.
+ * @ref kbase_pm_context_active must have been called) and the l2 cache must be
+ * powered on.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * Mark that the GPU cycle counter is no longer in use, if the caller is the
+ * last caller then the GPU cycle counters will be disabled. A request must have
+ * been made before a call to this.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev);
+
+/**
+ * Enables access to the GPU registers before power management has powered up
+ * the GPU with kbase_pm_powerup().
+ *
+ * Access to registers should be done using kbase_os_reg_read/write() at this
+ * stage, not kbase_reg_read/write().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn on power and/or clocks to the GPU. See
+ * @ref kbase_pm_callback_conf.
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_register_access_enable(struct kbase_device *kbdev);
+
+/**
+ * Disables access to the GPU registers enabled earlier by a call to
+ * kbase_pm_register_access_enable().
+ *
+ * This results in the power management callbacks provided in the driver
+ * configuration to get called to turn off power and/or clocks to the GPU. See
+ * @ref kbase_pm_callback_conf
+ *
+ * This should only be used before power management is powered up with
+ * kbase_pm_powerup()
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_pm_register_access_disable(struct kbase_device *kbdev);
+
+/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline
+ * function */
+
+/**
+ * Check if the power management metrics collection is active.
+ *
+ * Note that this returns if the power management metrics collection was
+ * active at the time of calling, it is possible that after the call the metrics
+ * collection enable may have changed state.
+ *
+ * The caller must handle the consequence that the state may have changed.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @return true if metrics collection was active else false.
+ */
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev);
+
+/**
+ * Power on the GPU, and any cores that are requested.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param is_resume true if power on due to resume after suspend,
+ * false otherwise
+ */
+void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume);
+
+/**
+ * Power off the GPU, and any cores that have been requested.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param is_suspend true if power off due to suspend,
+ * false otherwise
+ * @return true if power was turned off
+ * false if power can not be turned off due to pending page/bus
+ * fault workers. Caller must flush MMU workqueues and retry
+ */
+bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend);
+
+#ifdef CONFIG_PM_DEVFREQ
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+ unsigned long *total, unsigned long *busy);
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev);
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/**
+ * Function provided by platform specific code when DVFS is enabled to allow
+ * the power management metrics system to report utilisation.
+ *
+ * @param kbdev The kbase device structure for the device (must be a
+ * valid pointer)
+ * @param utilisation The current calculated utilisation by the metrics
+ * system.
+ * @param util_gl_share The current calculated gl share of utilisation.
+ * @param util_cl_share The current calculated cl share of utilisation per core
+ * group.
+ * @return Returns 0 on failure and non zero on success.
+ */
+
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
+ u32 util_gl_share, u32 util_cl_share[2]);
+#endif
+
+void kbase_pm_power_changed(struct kbase_device *kbdev);
+
+/**
+ * Inform the metrics system that an atom is about to be run.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param katom The atom that is about to be run
+ */
+void kbase_pm_metrics_run_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom);
+
+/**
+ * Inform the metrics system that an atom has been run and is being released.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param katom The atom that is about to be released
+ */
+void kbase_pm_metrics_release_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom);
+
+
+#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
new file mode 100644
index 00000000000..b209c505bbd
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -0,0 +1,532 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_metrics.c
+ * Metrics for power management
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+/* When VSync is being hit aim for utilisation between 70-90% */
+#define KBASE_PM_VSYNC_MIN_UTILISATION 70
+#define KBASE_PM_VSYNC_MAX_UTILISATION 90
+/* Otherwise aim for 10-40% */
+#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10
+#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40
+
+/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
+ * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
+ * under 11s. Exceeding this will cause overflow */
+#define KBASE_PM_TIME_SHIFT 8
+
+/* Maximum time between sampling of utilization data, without resetting the
+ * counters. */
+#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
+{
+ unsigned long flags;
+ enum kbase_pm_dvfs_action action;
+ struct kbasep_pm_metrics_data *metrics;
+
+ KBASE_DEBUG_ASSERT(timer != NULL);
+
+ metrics = container_of(timer, struct kbasep_pm_metrics_data, timer);
+ action = kbase_pm_get_dvfs_action(metrics->kbdev);
+
+ spin_lock_irqsave(&metrics->lock, flags);
+
+ if (metrics->timer_active)
+ hrtimer_start(timer,
+ HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period),
+ HRTIMER_MODE_REL);
+
+ spin_unlock_irqrestore(&metrics->lock, flags);
+
+ return HRTIMER_NORESTART;
+}
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+int kbasep_pm_metrics_init(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ kbdev->pm.backend.metrics.kbdev = kbdev;
+ kbdev->pm.backend.metrics.vsync_hit = 0;
+ kbdev->pm.backend.metrics.utilisation = 0;
+ kbdev->pm.backend.metrics.util_cl_share[0] = 0;
+ kbdev->pm.backend.metrics.util_cl_share[1] = 0;
+ kbdev->pm.backend.metrics.util_gl_share = 0;
+
+ kbdev->pm.backend.metrics.time_period_start = ktime_get();
+ kbdev->pm.backend.metrics.time_busy = 0;
+ kbdev->pm.backend.metrics.time_idle = 0;
+ kbdev->pm.backend.metrics.prev_busy = 0;
+ kbdev->pm.backend.metrics.prev_idle = 0;
+ kbdev->pm.backend.metrics.gpu_active = false;
+ kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
+ kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
+ kbdev->pm.backend.metrics.active_gl_ctx = 0;
+ kbdev->pm.backend.metrics.busy_cl[0] = 0;
+ kbdev->pm.backend.metrics.busy_cl[1] = 0;
+ kbdev->pm.backend.metrics.busy_gl = 0;
+ kbdev->pm.backend.metrics.nr_in_slots = 0;
+
+ spin_lock_init(&kbdev->pm.backend.metrics.lock);
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+ kbdev->pm.backend.metrics.timer_active = true;
+ hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ kbdev->pm.backend.metrics.timer.function = dvfs_callback;
+
+ hrtimer_start(&kbdev->pm.backend.metrics.timer,
+ HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
+ HRTIMER_MODE_REL);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+ kbase_pm_register_vsync_callback(kbdev);
+
+ return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
+
+void kbasep_pm_metrics_term(struct kbase_device *kbdev)
+{
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+ kbdev->pm.backend.metrics.timer_active = false;
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+ hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+ kbase_pm_unregister_vsync_callback(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
+
+/**
+ * kbasep_pm_record_gpu_active - update metrics tracking GPU active time
+ *
+ * This function updates the time the GPU was busy executing jobs in
+ * general and specifically for CL and GL jobs. Call this function when
+ * a job is submitted or removed from the GPU (job issue) slots.
+ *
+ * The busy time recorded is the time passed since the last time the
+ * busy/idle metrics were updated (e.g. by this function,
+ * kbasep_pm_record_gpu_idle or others).
+ *
+ * Note that the time we record towards CL and GL jobs accounts for
+ * the total number of CL and GL jobs active at that time. If 20ms
+ * has passed and 3 GL jobs were active, we account 3*20 ms towards
+ * the GL busy time. The number of CL/GL jobs active is tracked by
+ * kbase_pm_metrics_run_atom() / kbase_pm_metrics_release_atom().
+ *
+ * The kbdev->pm.backend.metrics.lock needs to be held when calling
+ * this function.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+static void kbasep_pm_record_gpu_active(struct kbase_device *kbdev)
+{
+ ktime_t now = ktime_get();
+ ktime_t diff;
+ u32 ns_time;
+
+ lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+ /* Record active time */
+ diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+ ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+ kbdev->pm.backend.metrics.time_busy += ns_time;
+ kbdev->pm.backend.metrics.busy_gl += ns_time *
+ kbdev->pm.backend.metrics.active_gl_ctx;
+ kbdev->pm.backend.metrics.busy_cl[0] += ns_time *
+ kbdev->pm.backend.metrics.active_cl_ctx[0];
+ kbdev->pm.backend.metrics.busy_cl[1] += ns_time *
+ kbdev->pm.backend.metrics.active_cl_ctx[1];
+ /* Reset time period */
+ kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+/**
+ * kbasep_pm_record_gpu_idle - update metrics tracking GPU idle time
+ *
+ * This function updates the time the GPU was idle (not executing any
+ * jobs) based on the time passed when kbasep_pm_record_gpu_active()
+ * was called last to record the last job on the GPU finishing.
+ *
+ * Call this function when no jobs are in the job slots of the GPU and
+ * a job is about to be submitted to a job slot.
+ *
+ * The kbdev->pm.backend.metrics.lock needs to be held when calling
+ * this function.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+static void kbasep_pm_record_gpu_idle(struct kbase_device *kbdev)
+{
+ ktime_t now = ktime_get();
+ ktime_t diff;
+ u32 ns_time;
+
+ lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+ /* Record idle time */
+ diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+ ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+ kbdev->pm.backend.metrics.time_idle += ns_time;
+ /* Reset time period */
+ kbdev->pm.backend.metrics.time_period_start = now;
+}
+
+void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated)
+{
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+ kbdev->pm.backend.metrics.vsync_hit = buffer_updated;
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_report_vsync);
+
+#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+ ktime_t now)
+{
+ ktime_t diff;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+
+ if (kbdev->pm.backend.metrics.gpu_active) {
+ u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+ kbdev->pm.backend.metrics.time_busy += ns_time;
+ kbdev->pm.backend.metrics.busy_cl[0] += ns_time *
+ kbdev->pm.backend.metrics.active_cl_ctx[0];
+ kbdev->pm.backend.metrics.busy_cl[1] += ns_time *
+ kbdev->pm.backend.metrics.active_cl_ctx[1];
+ kbdev->pm.backend.metrics.busy_gl += ns_time *
+ kbdev->pm.backend.metrics.active_gl_ctx;
+ } else {
+ kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff)
+ >> KBASE_PM_TIME_SHIFT);
+ }
+}
+
+/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function.
+ */
+static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev,
+ ktime_t now)
+{
+ /* Store previous value */
+ kbdev->pm.backend.metrics.prev_idle =
+ kbdev->pm.backend.metrics.time_idle;
+ kbdev->pm.backend.metrics.prev_busy =
+ kbdev->pm.backend.metrics.time_busy;
+
+ /* Reset current values */
+ kbdev->pm.backend.metrics.time_period_start = now;
+ kbdev->pm.backend.metrics.time_idle = 0;
+ kbdev->pm.backend.metrics.time_busy = 0;
+ kbdev->pm.backend.metrics.busy_cl[0] = 0;
+ kbdev->pm.backend.metrics.busy_cl[1] = 0;
+ kbdev->pm.backend.metrics.busy_gl = 0;
+}
+
+void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+ kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get());
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev,
+ unsigned long *total_out, unsigned long *busy_out)
+{
+ ktime_t now = ktime_get();
+ unsigned long flags, busy, total;
+
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+ kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+ busy = kbdev->pm.backend.metrics.time_busy;
+ total = busy + kbdev->pm.backend.metrics.time_idle;
+
+ /* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default
+ * 100ms) */
+ if (total >= MALI_UTILIZATION_MAX_PERIOD) {
+ kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+ } else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) {
+ total += kbdev->pm.backend.metrics.prev_idle +
+ kbdev->pm.backend.metrics.prev_busy;
+ busy += kbdev->pm.backend.metrics.prev_busy;
+ }
+
+ *total_out = total;
+ *busy_out = busy;
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+#endif
+
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+
+/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
+ * function
+ */
+int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev,
+ int *util_gl_share,
+ int util_cl_share[2])
+{
+ int utilisation;
+ int busy;
+ ktime_t now = ktime_get();
+
+ kbase_pm_get_dvfs_utilisation_calc(kbdev, now);
+
+ if (kbdev->pm.backend.metrics.time_idle +
+ kbdev->pm.backend.metrics.time_busy == 0) {
+ /* No data - so we return NOP */
+ utilisation = -1;
+ if (util_gl_share)
+ *util_gl_share = -1;
+ if (util_cl_share) {
+ util_cl_share[0] = -1;
+ util_cl_share[1] = -1;
+ }
+ goto out;
+ }
+
+ utilisation = (100 * kbdev->pm.backend.metrics.time_busy) /
+ (kbdev->pm.backend.metrics.time_idle +
+ kbdev->pm.backend.metrics.time_busy);
+
+ busy = kbdev->pm.backend.metrics.busy_gl +
+ kbdev->pm.backend.metrics.busy_cl[0] +
+ kbdev->pm.backend.metrics.busy_cl[1];
+
+ if (busy != 0) {
+ if (util_gl_share)
+ *util_gl_share =
+ (100 * kbdev->pm.backend.metrics.busy_gl) /
+ busy;
+ if (util_cl_share) {
+ util_cl_share[0] =
+ (100 * kbdev->pm.backend.metrics.busy_cl[0]) /
+ busy;
+ util_cl_share[1] =
+ (100 * kbdev->pm.backend.metrics.busy_cl[1]) /
+ busy;
+ }
+ } else {
+ if (util_gl_share)
+ *util_gl_share = -1;
+ if (util_cl_share) {
+ util_cl_share[0] = -1;
+ util_cl_share[1] = -1;
+ }
+ }
+
+out:
+ kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now);
+
+ return utilisation;
+}
+
+enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ int utilisation, util_gl_share;
+ int util_cl_share[2];
+ enum kbase_pm_dvfs_action action;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+ utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share,
+ util_cl_share);
+
+ if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 ||
+ util_cl_share[1] < 0) {
+ action = KBASE_PM_DVFS_NOP;
+ utilisation = 0;
+ util_gl_share = 0;
+ util_cl_share[0] = 0;
+ util_cl_share[1] = 0;
+ goto out;
+ }
+
+ if (kbdev->pm.backend.metrics.vsync_hit) {
+ /* VSync is being met */
+ if (utilisation < KBASE_PM_VSYNC_MIN_UTILISATION)
+ action = KBASE_PM_DVFS_CLOCK_DOWN;
+ else if (utilisation > KBASE_PM_VSYNC_MAX_UTILISATION)
+ action = KBASE_PM_DVFS_CLOCK_UP;
+ else
+ action = KBASE_PM_DVFS_NOP;
+ } else {
+ /* VSync is being missed */
+ if (utilisation < KBASE_PM_NO_VSYNC_MIN_UTILISATION)
+ action = KBASE_PM_DVFS_CLOCK_DOWN;
+ else if (utilisation > KBASE_PM_NO_VSYNC_MAX_UTILISATION)
+ action = KBASE_PM_DVFS_CLOCK_UP;
+ else
+ action = KBASE_PM_DVFS_NOP;
+ }
+
+ kbdev->pm.backend.metrics.utilisation = utilisation;
+ kbdev->pm.backend.metrics.util_cl_share[0] = util_cl_share[0];
+ kbdev->pm.backend.metrics.util_cl_share[1] = util_cl_share[1];
+ kbdev->pm.backend.metrics.util_gl_share = util_gl_share;
+out:
+#ifdef CONFIG_MALI_MIDGARD_DVFS
+ kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+ util_cl_share);
+#endif /*CONFIG_MALI_MIDGARD_DVFS */
+
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+ return action;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_action);
+
+bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
+{
+ bool isactive;
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+ isactive = kbdev->pm.backend.metrics.timer_active;
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+
+ return isactive;
+}
+KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
+
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+/* called when job is submitted to a GPU slot */
+void kbase_pm_metrics_run_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+ /* We may have been idle before */
+ if (kbdev->pm.backend.metrics.nr_in_slots == 0) {
+ WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[0] != 0);
+ WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[1] != 0);
+ WARN_ON(kbdev->pm.backend.metrics.active_gl_ctx != 0);
+
+ /* Record idle time */
+ kbasep_pm_record_gpu_idle(kbdev);
+
+ /* We are now active */
+ WARN_ON(kbdev->pm.backend.metrics.gpu_active);
+ kbdev->pm.backend.metrics.gpu_active = true;
+
+ } else {
+ /* Record active time */
+ kbasep_pm_record_gpu_active(kbdev);
+ }
+
+ /* Track number of jobs in GPU slots */
+ WARN_ON(kbdev->pm.backend.metrics.nr_in_slots == U8_MAX);
+ kbdev->pm.backend.metrics.nr_in_slots++;
+
+ /* Track if it was a CL or GL one that was submitted to a GPU slot */
+ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+ int device_nr = (katom->core_req &
+ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+ ? katom->device_nr : 0;
+ KBASE_DEBUG_ASSERT(device_nr < 2);
+ kbdev->pm.backend.metrics.active_cl_ctx[device_nr]++;
+ } else {
+ kbdev->pm.backend.metrics.active_gl_ctx++;
+ }
+
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
+
+/* called when job is removed from a GPU slot */
+void kbase_pm_metrics_release_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+
+ /* Track how long CL and/or GL jobs have been busy for */
+ kbasep_pm_record_gpu_active(kbdev);
+
+ /* Track number of jobs in GPU slots */
+ WARN_ON(kbdev->pm.backend.metrics.nr_in_slots == 0);
+ kbdev->pm.backend.metrics.nr_in_slots--;
+
+ /* We may become idle */
+ if (kbdev->pm.backend.metrics.nr_in_slots == 0) {
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.metrics.gpu_active);
+ kbdev->pm.backend.metrics.gpu_active = false;
+ }
+
+ /* Track of the GPU jobs that are active which ones are CL and
+ * which GL */
+ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+ int device_nr = (katom->core_req &
+ BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
+ ? katom->device_nr : 0;
+ KBASE_DEBUG_ASSERT(device_nr < 2);
+
+ WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[device_nr]
+ == 0);
+ kbdev->pm.backend.metrics.active_cl_ctx[device_nr]--;
+ } else {
+ WARN_ON(kbdev->pm.backend.metrics.active_gl_ctx == 0);
+ kbdev->pm.backend.metrics.active_gl_ctx--;
+ }
+
+ spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c
new file mode 100644
index 00000000000..4ee35bbc427
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm_metrics_dummy.c
+ * Dummy Metrics for power management.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_pm.h>
+
+void kbase_pm_register_vsync_callback(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ /* no VSync metrics will be available */
+ kbdev->pm.backend.metrics.platform_data = NULL;
+}
+
+void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+}
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
new file mode 100644
index 00000000000..ba5c23928b8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c
@@ -0,0 +1,895 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_pm_policy.c
+ * Power policy API implementations
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_pm.h>
+#include <mali_kbase_config_defaults.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static const struct kbase_pm_policy *const policy_list[] = {
+#ifdef CONFIG_MALI_NO_MALI
+ &kbase_pm_always_on_policy_ops,
+ &kbase_pm_demand_policy_ops,
+ &kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+ &kbase_pm_demand_always_powered_policy_ops,
+ &kbase_pm_fast_start_policy_ops,
+#endif
+#else /* CONFIG_MALI_NO_MALI */
+ &kbase_pm_demand_policy_ops,
+ &kbase_pm_always_on_policy_ops,
+ &kbase_pm_coarse_demand_policy_ops,
+#if !MALI_CUSTOMER_RELEASE
+ &kbase_pm_demand_always_powered_policy_ops,
+ &kbase_pm_fast_start_policy_ops,
+#endif
+#endif /* CONFIG_MALI_NO_MALI */
+};
+
+/** The number of policies available in the system.
+ * This is derived from the number of functions listed in policy_get_functions.
+ */
+#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list))
+
+
+/* Function IDs for looking up Timeline Trace codes in
+ * kbase_pm_change_state_trace_code */
+enum kbase_pm_func_id {
+ KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+ KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+ KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+ KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+ /* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither
+ * expect to hit it nor tend to hit it very much anyway. We can detect
+ * whether we need more instrumentation by a difference between
+ * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */
+
+ /* Must be the last */
+ KBASE_PM_FUNC_ID_COUNT
+};
+
+
+/* State changes during request/unrequest/release-ing cores */
+enum {
+ KBASE_PM_CHANGE_STATE_SHADER = (1u << 0),
+ KBASE_PM_CHANGE_STATE_TILER = (1u << 1),
+
+ /* These two must be last */
+ KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER |
+ KBASE_PM_CHANGE_STATE_SHADER),
+ KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1
+};
+typedef u32 kbase_pm_change_state;
+
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+/* Timeline Trace code lookups for each function */
+static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT]
+ [KBASE_PM_CHANGE_STATE_COUNT] = {
+ /* kbase_pm_request_cores */
+ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0,
+ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START,
+ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START,
+ [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+ KBASE_PM_CHANGE_STATE_TILER] =
+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START,
+
+ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0,
+ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END,
+ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END,
+ [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+ KBASE_PM_CHANGE_STATE_TILER] =
+ SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END,
+
+ /* kbase_pm_release_cores */
+ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0,
+ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] =
+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START,
+ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] =
+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START,
+ [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER |
+ KBASE_PM_CHANGE_STATE_TILER] =
+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START,
+
+ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0,
+ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] =
+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END,
+ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] =
+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END,
+ [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER |
+ KBASE_PM_CHANGE_STATE_TILER] =
+ SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END
+};
+
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+ enum kbase_pm_func_id func_id,
+ kbase_pm_change_state state)
+{
+ int trace_code;
+
+ KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT);
+ KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) ==
+ state);
+
+ trace_code = kbase_pm_change_state_trace_code[func_id][state];
+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code);
+}
+
+#else /* CONFIG_MALI_TRACE_TIMELINE */
+static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev,
+ enum kbase_pm_func_id func_id, kbase_pm_change_state state)
+{
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+static enum hrtimer_restart
+kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer)
+{
+ struct kbase_device *kbdev;
+
+ kbdev = container_of(timer, struct kbase_device,
+ pm.backend.gpu_poweroff_timer);
+
+ /* It is safe for this call to do nothing if the work item is already
+ * queued. The worker function will read the must up-to-date state of
+ * kbdev->pm.backend.gpu_poweroff_pending under lock.
+ *
+ * If a state change occurs while the worker function is processing,
+ * this call will succeed as a work item can be requeued once it has
+ * started processing.
+ */
+ if (kbdev->pm.backend.gpu_poweroff_pending)
+ queue_work(kbdev->pm.backend.gpu_poweroff_wq,
+ &kbdev->pm.backend.gpu_poweroff_work);
+
+ if (kbdev->pm.backend.shader_poweroff_pending) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ if (kbdev->pm.backend.shader_poweroff_pending) {
+ kbdev->pm.backend.shader_poweroff_pending_time--;
+
+ KBASE_DEBUG_ASSERT(
+ kbdev->pm.backend.shader_poweroff_pending_time
+ >= 0);
+
+ if (!kbdev->pm.backend.shader_poweroff_pending_time) {
+ u64 prev_shader_state =
+ kbdev->pm.backend.desired_shader_state;
+
+ kbdev->pm.backend.desired_shader_state &=
+ ~kbdev->pm.backend.shader_poweroff_pending;
+
+ kbdev->pm.backend.shader_poweroff_pending = 0;
+
+ if (prev_shader_state !=
+ kbdev->pm.backend.desired_shader_state
+ || kbdev->pm.backend.ca_in_transition) {
+ bool cores_are_available;
+
+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START);
+ cores_are_available =
+ kbase_pm_check_transitions_nolock(
+ kbdev);
+ KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END);
+
+ /* Don't need 'cores_are_available',
+ * because we don't return anything */
+ CSTD_UNUSED(cores_are_available);
+ }
+ }
+ }
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+ }
+
+ if (kbdev->pm.backend.poweroff_timer_needed) {
+ hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time);
+
+ return HRTIMER_RESTART;
+ }
+
+ return HRTIMER_NORESTART;
+}
+
+static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data)
+{
+ unsigned long flags;
+ struct kbase_device *kbdev;
+ bool do_poweroff = false;
+
+ kbdev = container_of(data, struct kbase_device,
+ pm.backend.gpu_poweroff_work);
+
+ mutex_lock(&kbdev->pm.lock);
+
+ if (kbdev->pm.backend.gpu_poweroff_pending == 0) {
+ mutex_unlock(&kbdev->pm.lock);
+ return;
+ }
+
+ kbdev->pm.backend.gpu_poweroff_pending--;
+
+ if (kbdev->pm.backend.gpu_poweroff_pending > 0) {
+ mutex_unlock(&kbdev->pm.lock);
+ return;
+ }
+
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0);
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ /* Only power off the GPU if a request is still pending */
+ if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev))
+ do_poweroff = true;
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ if (do_poweroff) {
+ kbdev->pm.backend.poweroff_timer_needed = false;
+ hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+ /* Power off the GPU */
+ if (!kbase_pm_do_poweroff(kbdev, false)) {
+ /* GPU can not be powered off at present */
+ kbdev->pm.backend.poweroff_timer_needed = true;
+ hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
+ kbdev->pm.gpu_poweroff_time,
+ HRTIMER_MODE_REL);
+ }
+ }
+
+ mutex_unlock(&kbdev->pm.lock);
+}
+
+int kbase_pm_policy_init(struct kbase_device *kbdev)
+{
+ struct workqueue_struct *wq;
+
+ wq = alloc_workqueue("kbase_pm_do_poweroff",
+ WQ_HIGHPRI | WQ_UNBOUND, 1);
+ if (!wq)
+ return -ENOMEM;
+
+ kbdev->pm.backend.gpu_poweroff_wq = wq;
+ INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work,
+ kbasep_pm_do_gpu_poweroff_wq);
+ hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ kbdev->pm.backend.gpu_poweroff_timer.function =
+ kbasep_pm_do_gpu_poweroff_callback;
+ kbdev->pm.backend.pm_current_policy = policy_list[0];
+ kbdev->pm.backend.pm_current_policy->init(kbdev);
+ kbdev->pm.gpu_poweroff_time =
+ HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS);
+ kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER;
+ kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU;
+
+ return 0;
+}
+
+void kbase_pm_policy_term(struct kbase_device *kbdev)
+{
+ kbdev->pm.backend.pm_current_policy->term(kbdev);
+ destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq);
+}
+
+void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ kbdev->pm.backend.poweroff_timer_needed = false;
+ hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+
+ /* If wq is already running but is held off by pm.lock, make sure it has
+ * no effect */
+ kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ kbdev->pm.backend.shader_poweroff_pending = 0;
+ kbdev->pm.backend.shader_poweroff_pending_time = 0;
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+void kbase_pm_update_active(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ bool active;
+
+ lockdep_assert_held(&kbdev->pm.lock);
+
+ /* pm_current_policy will never be NULL while pm.lock is held */
+ KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy);
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ active = kbdev->pm.backend.pm_current_policy->get_core_active(kbdev);
+
+ if (active) {
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ if (kbdev->pm.backend.gpu_poweroff_pending) {
+ /* Cancel any pending power off request */
+ kbdev->pm.backend.gpu_poweroff_pending = 0;
+
+ /* If a request was pending then the GPU was still
+ * powered, so no need to continue */
+ if (!kbdev->poweroff_pending)
+ return;
+ }
+
+ if (!kbdev->pm.backend.poweroff_timer_needed &&
+ !kbdev->pm.backend.gpu_powered) {
+ kbdev->pm.backend.poweroff_timer_needed = true;
+ hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer,
+ kbdev->pm.gpu_poweroff_time,
+ HRTIMER_MODE_REL);
+ }
+
+ /* Power on the GPU and any cores requested by the policy */
+ kbase_pm_do_poweron(kbdev, false);
+ } else {
+ /* It is an error for the power policy to power off the GPU
+ * when there are contexts active */
+ KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0);
+
+ if (kbdev->pm.backend.shader_poweroff_pending) {
+ kbdev->pm.backend.shader_poweroff_pending = 0;
+ kbdev->pm.backend.shader_poweroff_pending_time = 0;
+ }
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+
+ /* Request power off */
+ if (kbdev->pm.backend.gpu_powered) {
+ kbdev->pm.backend.gpu_poweroff_pending =
+ kbdev->pm.poweroff_gpu_ticks;
+ if (!kbdev->pm.backend.poweroff_timer_needed) {
+ /* Start timer if not running (eg if power
+ * policy has been changed from always_on to
+ * something else). This will ensure the GPU is
+ * actually powered off */
+ kbdev->pm.backend.poweroff_timer_needed = true;
+ hrtimer_start(
+ &kbdev->pm.backend.gpu_poweroff_timer,
+ kbdev->pm.gpu_poweroff_time,
+ HRTIMER_MODE_REL);
+ }
+ }
+ }
+}
+
+void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
+{
+ u64 desired_bitmap;
+ bool cores_are_available;
+
+ lockdep_assert_held(&kbdev->pm.power_change_lock);
+
+ if (kbdev->pm.backend.pm_current_policy == NULL)
+ return;
+
+ desired_bitmap =
+ kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev);
+ desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev);
+
+ /* Enable core 0 if tiler required, regardless of core availability */
+ if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0)
+ desired_bitmap |= 1;
+
+ if (kbdev->pm.backend.desired_shader_state != desired_bitmap)
+ KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u,
+ (u32)desired_bitmap);
+
+ /* Are any cores being powered on? */
+ if (~kbdev->pm.backend.desired_shader_state & desired_bitmap ||
+ kbdev->pm.backend.ca_in_transition) {
+ /* Check if we are powering off any cores before updating shader
+ * state */
+ if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) {
+ /* Start timer to power off cores */
+ kbdev->pm.backend.shader_poweroff_pending |=
+ (kbdev->pm.backend.desired_shader_state &
+ ~desired_bitmap);
+ kbdev->pm.backend.shader_poweroff_pending_time =
+ kbdev->pm.poweroff_shader_ticks;
+ }
+
+ kbdev->pm.backend.desired_shader_state = desired_bitmap;
+
+ /* If any cores are being powered on, transition immediately */
+ cores_are_available = kbase_pm_check_transitions_nolock(kbdev);
+ } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) {
+ /* Start timer to power off cores */
+ kbdev->pm.backend.shader_poweroff_pending |=
+ (kbdev->pm.backend.desired_shader_state &
+ ~desired_bitmap);
+ kbdev->pm.backend.shader_poweroff_pending_time =
+ kbdev->pm.poweroff_shader_ticks;
+ } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 &&
+ kbdev->pm.backend.poweroff_timer_needed) {
+ /* If power policy is keeping cores on despite there being no
+ * active contexts then disable poweroff timer as it isn't
+ * required.
+ * Only reset poweroff_timer_needed if we're not in the middle
+ * of the power off callback */
+ kbdev->pm.backend.poweroff_timer_needed = false;
+ hrtimer_try_to_cancel(&kbdev->pm.backend.gpu_poweroff_timer);
+ }
+
+ /* Ensure timer does not power off wanted cores and make sure to power
+ * off unwanted cores */
+ if (kbdev->pm.backend.shader_poweroff_pending != 0) {
+ kbdev->pm.backend.shader_poweroff_pending &=
+ ~(kbdev->pm.backend.desired_shader_state &
+ desired_bitmap);
+ if (kbdev->pm.backend.shader_poweroff_pending == 0)
+ kbdev->pm.backend.shader_poweroff_pending_time = 0;
+ }
+
+ /* Don't need 'cores_are_available', because we don't return anything */
+ CSTD_UNUSED(cores_are_available);
+}
+
+void kbase_pm_update_cores_state(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ kbase_pm_update_cores_state_nolock(kbdev);
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **list)
+{
+ if (!list)
+ return POLICY_COUNT;
+
+ *list = policy_list;
+
+ return POLICY_COUNT;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_list_policies);
+
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ return kbdev->pm.backend.pm_current_policy;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
+
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+ const struct kbase_pm_policy *new_policy)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ const struct kbase_pm_policy *old_policy;
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(new_policy != NULL);
+
+ KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id);
+
+ /* During a policy change we pretend the GPU is active */
+ /* A suspend won't happen here, because we're in a syscall from a
+ * userspace thread */
+ kbase_pm_context_active(kbdev);
+
+ mutex_lock(&js_devdata->runpool_mutex);
+ mutex_lock(&kbdev->pm.lock);
+
+ /* Remove the policy to prevent IRQ handlers from working on it */
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ old_policy = kbdev->pm.backend.pm_current_policy;
+ kbdev->pm.backend.pm_current_policy = NULL;
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u,
+ old_policy->id);
+ if (old_policy->term)
+ old_policy->term(kbdev);
+
+ KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u,
+ new_policy->id);
+ if (new_policy->init)
+ new_policy->init(kbdev);
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+ kbdev->pm.backend.pm_current_policy = new_policy;
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ /* If any core power state changes were previously attempted, but
+ * couldn't be made because the policy was changing (current_policy was
+ * NULL), then re-try them here. */
+ kbase_pm_update_active(kbdev);
+ kbase_pm_update_cores_state(kbdev);
+
+ mutex_unlock(&kbdev->pm.lock);
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ /* Now the policy change is finished, we release our fake context active
+ * reference */
+ kbase_pm_context_idle(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_set_policy);
+
+/** Check whether a state change has finished, and trace it as completed */
+static void
+kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev)
+{
+ if ((kbdev->shader_available_bitmap &
+ kbdev->pm.backend.desired_shader_state)
+ == kbdev->pm.backend.desired_shader_state &&
+ (kbdev->tiler_available_bitmap &
+ kbdev->pm.backend.desired_tiler_state)
+ == kbdev->pm.backend.desired_tiler_state)
+ kbase_timeline_pm_check_handle_event(kbdev,
+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED);
+}
+
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+ bool tiler_required, u64 shader_cores)
+{
+ unsigned long flags;
+ u64 cores;
+
+ kbase_pm_change_state change_gpu_state = 0u;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ cores = shader_cores;
+ while (cores) {
+ int bitnum = fls64(cores) - 1;
+ u64 bit = 1ULL << bitnum;
+
+ /* It should be almost impossible for this to overflow. It would
+ * require 2^32 atoms to request a particular core, which would
+ * require 2^24 contexts to submit. This would require an amount
+ * of memory that is impossible on a 32-bit system and extremely
+ * unlikely on a 64-bit system. */
+ int cnt = ++kbdev->shader_needed_cnt[bitnum];
+
+ if (1 == cnt) {
+ kbdev->shader_needed_bitmap |= bit;
+ change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+ }
+
+ cores &= ~bit;
+ }
+
+ if (tiler_required) {
+ int cnt = ++kbdev->tiler_needed_cnt;
+
+ if (1 == cnt)
+ change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+
+ KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0);
+ }
+
+ if (change_gpu_state) {
+ KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL,
+ NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+ kbase_timeline_pm_cores_func(kbdev,
+ KBASE_PM_FUNC_ID_REQUEST_CORES_START,
+ change_gpu_state);
+ kbase_pm_update_cores_state_nolock(kbdev);
+ kbase_timeline_pm_cores_func(kbdev,
+ KBASE_PM_FUNC_ID_REQUEST_CORES_END,
+ change_gpu_state);
+ }
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores);
+
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+ bool tiler_required, u64 shader_cores)
+{
+ unsigned long flags;
+
+ kbase_pm_change_state change_gpu_state = 0u;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ while (shader_cores) {
+ int bitnum = fls64(shader_cores) - 1;
+ u64 bit = 1ULL << bitnum;
+ int cnt;
+
+ KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+ cnt = --kbdev->shader_needed_cnt[bitnum];
+
+ if (0 == cnt) {
+ kbdev->shader_needed_bitmap &= ~bit;
+
+ change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+ }
+
+ shader_cores &= ~bit;
+ }
+
+ if (tiler_required) {
+ int cnt;
+
+ KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+ cnt = --kbdev->tiler_needed_cnt;
+
+ if (0 == cnt)
+ change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+ }
+
+ if (change_gpu_state) {
+ KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL,
+ NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+ kbase_pm_update_cores_state_nolock(kbdev);
+
+ /* Trace that any state change effectively completes immediately
+ * - no-one will wait on the state change */
+ kbase_pm_trace_check_and_finish_state_change(kbdev);
+ }
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores);
+
+enum kbase_pm_cores_ready
+kbase_pm_register_inuse_cores(struct kbase_device *kbdev,
+ bool tiler_required, u64 shader_cores)
+{
+ unsigned long flags;
+ u64 prev_shader_needed; /* Just for tracing */
+ u64 prev_shader_inuse; /* Just for tracing */
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ prev_shader_needed = kbdev->shader_needed_bitmap;
+ prev_shader_inuse = kbdev->shader_inuse_bitmap;
+
+ /* If desired_shader_state does not contain the requested cores, then
+ * power management is not attempting to powering those cores (most
+ * likely due to core availability policy) and a new job affinity must
+ * be chosen */
+ if ((kbdev->pm.backend.desired_shader_state & shader_cores) !=
+ shader_cores) {
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ return KBASE_NEW_AFFINITY;
+ }
+
+ if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores ||
+ (tiler_required && !kbdev->tiler_available_bitmap)) {
+ /* Trace ongoing core transition */
+ kbase_timeline_pm_l2_transition_start(kbdev);
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+ return KBASE_CORES_NOT_READY;
+ }
+
+ /* If we started to trace a state change, then trace it has being
+ * finished by now, at the very latest */
+ kbase_pm_trace_check_and_finish_state_change(kbdev);
+ /* Trace core transition done */
+ kbase_timeline_pm_l2_transition_done(kbdev);
+
+ while (shader_cores) {
+ int bitnum = fls64(shader_cores) - 1;
+ u64 bit = 1ULL << bitnum;
+ int cnt;
+
+ KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0);
+
+ cnt = --kbdev->shader_needed_cnt[bitnum];
+
+ if (0 == cnt)
+ kbdev->shader_needed_bitmap &= ~bit;
+
+ /* shader_inuse_cnt should not overflow because there can only
+ * be a very limited number of jobs on the h/w at one time */
+
+ kbdev->shader_inuse_cnt[bitnum]++;
+ kbdev->shader_inuse_bitmap |= bit;
+
+ shader_cores &= ~bit;
+ }
+
+ if (tiler_required) {
+ KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0);
+
+ --kbdev->tiler_needed_cnt;
+
+ kbdev->tiler_inuse_cnt++;
+
+ KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0);
+ }
+
+ if (prev_shader_needed != kbdev->shader_needed_bitmap)
+ KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL,
+ NULL, 0u, (u32) kbdev->shader_needed_bitmap);
+
+ if (prev_shader_inuse != kbdev->shader_inuse_bitmap)
+ KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL,
+ NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+
+ return KBASE_CORES_READY;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores);
+
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+ bool tiler_required, u64 shader_cores)
+{
+ unsigned long flags;
+ kbase_pm_change_state change_gpu_state = 0u;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ while (shader_cores) {
+ int bitnum = fls64(shader_cores) - 1;
+ u64 bit = 1ULL << bitnum;
+ int cnt;
+
+ KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0);
+
+ cnt = --kbdev->shader_inuse_cnt[bitnum];
+
+ if (0 == cnt) {
+ kbdev->shader_inuse_bitmap &= ~bit;
+ change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER;
+ }
+
+ shader_cores &= ~bit;
+ }
+
+ if (tiler_required) {
+ int cnt;
+
+ KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0);
+
+ cnt = --kbdev->tiler_inuse_cnt;
+
+ if (0 == cnt)
+ change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER;
+ }
+
+ if (change_gpu_state) {
+ KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL,
+ NULL, 0u, (u32) kbdev->shader_inuse_bitmap);
+
+ kbase_timeline_pm_cores_func(kbdev,
+ KBASE_PM_FUNC_ID_RELEASE_CORES_START,
+ change_gpu_state);
+ kbase_pm_update_cores_state_nolock(kbdev);
+ kbase_timeline_pm_cores_func(kbdev,
+ KBASE_PM_FUNC_ID_RELEASE_CORES_END,
+ change_gpu_state);
+
+ /* Trace that any state change completed immediately */
+ kbase_pm_trace_check_and_finish_state_change(kbdev);
+ }
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_cores);
+
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+ bool tiler_required,
+ u64 shader_cores)
+{
+ kbase_pm_request_cores(kbdev, tiler_required, shader_cores);
+
+ kbase_pm_check_transitions_sync(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync);
+
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ u32 prior_l2_users_count;
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ prior_l2_users_count = kbdev->l2_users_count++;
+
+ KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0);
+
+ /* if the GPU is reset while the l2 is on, l2 will be off but
+ * prior_l2_users_count will be > 0. l2_available_bitmap will have been
+ * set to 0 though by kbase_pm_init_hw */
+ if (!prior_l2_users_count || !kbdev->l2_available_bitmap)
+ kbase_pm_check_transitions_nolock(kbdev);
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+ wait_event(kbdev->pm.backend.l2_powered_wait,
+ kbdev->pm.backend.l2_powered == 1);
+
+ /* Trace that any state change completed immediately */
+ kbase_pm_trace_check_and_finish_state_change(kbdev);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches);
+
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ kbdev->l2_users_count++;
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on);
+
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0);
+
+ --kbdev->l2_users_count;
+
+ if (!kbdev->l2_users_count) {
+ kbase_pm_check_transitions_nolock(kbdev);
+ /* Trace that any state change completed immediately */
+ kbase_pm_trace_check_and_finish_state_change(kbdev);
+ }
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches);
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
new file mode 100644
index 00000000000..fabb3cc970e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Power policy API definitions
+ */
+
+#ifndef _KBASE_PM_POLICY_H_
+#define _KBASE_PM_POLICY_H_
+
+/**
+ * kbase_pm_policy_init - Initialize power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Must be called before calling any other policy function
+ *
+ * Return: 0 if the power policy framework was successfully
+ * initialized, -errno otherwise.
+ */
+int kbase_pm_policy_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_policy_term - Terminate power policy framework
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_policy_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_active - Update the active power state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_update_cores - Update the desired core state of the GPU
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Calls into the current power policy
+ */
+void kbase_pm_update_cores(struct kbase_device *kbdev);
+
+
+enum kbase_pm_cores_ready {
+ KBASE_CORES_NOT_READY = 0,
+ KBASE_NEW_AFFINITY = 1,
+ KBASE_CORES_READY = 2
+};
+
+
+/**
+ * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores()
+ *
+ * @kbdev: The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores: A bitmask of shader cores which are necessary for the job
+ *
+ * When this function returns, the @shader_cores will be in the READY state.
+ *
+ * This is safe variant of kbase_pm_check_transitions_sync(): it handles the
+ * work of ensuring the requested cores will remain powered until a matching
+ * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate)
+ * is made.
+ */
+void kbase_pm_request_cores_sync(struct kbase_device *kbdev,
+ bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_cores - Mark one or more cores as being required
+ * for jobs to be submitted
+ *
+ * @kbdev: The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores: A bitmask of shader cores which are necessary for the job
+ *
+ * This function is called by the job scheduler to mark one or more cores as
+ * being required to submit jobs that are ready to run.
+ *
+ * The cores requested are reference counted and a subsequent call to
+ * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be
+ * made to dereference the cores as being 'needed'.
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete/the cores might not be available
+ * until a Power Management IRQ.
+ *
+ * Return: 0 if the cores were successfully requested, or -errno otherwise.
+ */
+void kbase_pm_request_cores(struct kbase_device *kbdev,
+ bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_unrequest_cores - Unmark one or more cores as being required for
+ * jobs to be submitted.
+ *
+ * @kbdev: The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores: A bitmask of shader cores (as given to
+ * kbase_pm_request_cores() )
+ *
+ * This function undoes the effect of kbase_pm_request_cores(). It should be
+ * used when a job is not going to be submitted to the hardware (e.g. the job is
+ * cancelled before it is enqueued).
+ *
+ * The active power policy will meet or exceed the requirements of the
+ * requested cores in the system. Any core transitions needed will be begun
+ * immediately, but they might not complete until a Power Management IRQ.
+ *
+ * The policy may use this as an indication that it can power down cores.
+ */
+void kbase_pm_unrequest_cores(struct kbase_device *kbdev,
+ bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job
+ *
+ * @kbdev: The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores: A bitmask of shader cores (as given to
+ * kbase_pm_request_cores() )
+ *
+ * This function should be called after kbase_pm_request_cores() when the job
+ * is about to be submitted to the hardware. It will check that the necessary
+ * cores are available and if so update the 'needed' and 'inuse' bitmasks to
+ * reflect that the job is now committed to being run.
+ *
+ * If the necessary cores are not currently available then the function will
+ * return %KBASE_CORES_NOT_READY and have no effect.
+ *
+ * Return: true if the job can be submitted to the hardware or false
+ * if the job is not ready to run.
+ * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready,
+ * %KBASE_NEW_AFFINITY if the affinity requested is not allowed,
+ * %KBASE_CORES_READY if the cores requested are already available
+ */
+enum kbase_pm_cores_ready kbase_pm_register_inuse_cores(
+ struct kbase_device *kbdev,
+ bool tiler_required,
+ u64 shader_cores);
+
+/**
+ * kbase_pm_release_cores - Release cores after a job has run
+ *
+ * @kbdev: The kbase device structure for the device
+ * @tiler_required: true if the tiler is required, false otherwise
+ * @shader_cores: A bitmask of shader cores (as given to
+ * kbase_pm_register_inuse_cores() )
+ *
+ * This function should be called when a job has finished running on the
+ * hardware. A call to kbase_pm_register_inuse_cores() must have previously
+ * occurred. The reference counts of the specified cores will be decremented
+ * which may cause the bitmask of 'inuse' cores to be reduced. The power policy
+ * may then turn off any cores which are no longer 'inuse'.
+ */
+void kbase_pm_release_cores(struct kbase_device *kbdev,
+ bool tiler_required, u64 shader_cores);
+
+/**
+ * kbase_pm_request_l2_caches - Request l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Request the use of l2 caches for all core groups, power up, wait and prevent
+ * the power manager from powering down the l2 caches.
+ *
+ * This tells the power management that the caches should be powered up, and
+ * they should remain powered, irrespective of the usage of shader cores. This
+ * does not return until the l2 caches are powered up.
+ *
+ * The caller must call kbase_pm_release_l2_caches() when they are finished
+ * to allow normal power management of the l2 caches to resume.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_request_l2_caches(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Increment the count of l2 users but do not attempt to power on the l2
+ *
+ * It is the callers responsibility to ensure that the l2 is already powered up
+ * and to eventually call kbase_pm_release_l2_caches()
+ */
+void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_request_l2_caches - Release l2 caches
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Release the use of l2 caches for all core groups and allow the power manager
+ * to power them down when necessary.
+ *
+ * This tells the power management that the caches can be powered down if
+ * necessary, with respect to the usage of shader cores.
+ *
+ * The caller must have called kbase_pm_request_l2_caches() prior to a call
+ * to this.
+ *
+ * This should only be used when power management is active.
+ */
+void kbase_pm_release_l2_caches(struct kbase_device *kbdev);
+
+#endif /* _KBASE_PM_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
new file mode 100644
index 00000000000..be7c3663e2b
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_time.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+ u64 *system_time, struct timespec *ts)
+{
+ u32 hi1, hi2;
+
+ kbase_pm_request_gpu_cycle_counter(kbdev);
+
+ /* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+ * correctly */
+ do {
+ hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+ NULL);
+ *cycle_counter = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+ hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI),
+ NULL);
+ *cycle_counter |= (((u64) hi1) << 32);
+ } while (hi1 != hi2);
+
+ /* Read hi, lo, hi to ensure that overflow from lo to hi is handled
+ * correctly */
+ do {
+ hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+ NULL);
+ *system_time = kbase_reg_read(kbdev,
+ GPU_CONTROL_REG(TIMESTAMP_LO), NULL);
+ hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI),
+ NULL);
+ *system_time |= (((u64) hi1) << 32);
+ } while (hi1 != hi2);
+
+ /* Record the CPU's idea of current time */
+ getrawmonotonic(ts);
+
+ kbase_pm_release_gpu_cycle_counter(kbdev);
+}
+
+/**
+ * kbase_wait_write_flush - Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * Only in use for BASE_HW_ISSUE_6367
+ *
+ * Note : If GPU resets occur then the counters are reset to zero, the delay may
+ * not be as expected.
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+ u32 base_count = 0;
+
+ /* A suspend won't happen here, because we're in a syscall from a
+ * userspace thread */
+
+ kbase_pm_context_active(kctx->kbdev);
+ kbase_pm_request_gpu_cycle_counter(kctx->kbdev);
+
+ while (true) {
+ u32 new_count;
+
+ new_count = kbase_reg_read(kctx->kbdev,
+ GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL);
+ /* First time around, just store the count. */
+ if (base_count == 0) {
+ base_count = new_count;
+ continue;
+ }
+
+ /* No need to handle wrapping, unsigned maths works for this. */
+ if ((new_count - base_count) > 1000)
+ break;
+ }
+
+ kbase_pm_release_gpu_cycle_counter(kctx->kbdev);
+ kbase_pm_context_idle(kctx->kbdev);
+}
+#endif /* CONFIG_MALI_NO_MALI */
diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
new file mode 100644
index 00000000000..4ae1b0c304c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h
@@ -0,0 +1,57 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev: Device pointer
+ * @cycle_counter: Pointer to u64 to store cycle counter in
+ * @system_time: Pointer to u64 to store system time in
+ * @ts: Pointer to struct timespec to store current monotonic
+ * time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+ u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() - Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifdef CONFIG_MALI_NO_MALI
+static inline void kbase_wait_write_flush(struct kbase_context *kctx)
+{
+}
+#else
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/drivers/gpu/arm/midgard/docs/Doxyfile b/drivers/gpu/arm/midgard/docs/Doxyfile
new file mode 100644
index 00000000000..35ff2f1ce4a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/Doxyfile
@@ -0,0 +1,126 @@
+#
+# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
+##############################################################################
+
+# This file contains per-module Doxygen configuration. Please do not add
+# extra settings to this file without consulting all stakeholders, as they
+# may cause override project-wide settings.
+#
+# Additionally, when defining aliases, macros, sections etc, use the module
+# name as a prefix e.g. gles_my_alias.
+
+##############################################################################
+
+@INCLUDE = ../../bldsys/Doxyfile_common
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT += ../../kernel/drivers/gpu/arm/midgard/
+
+##############################################################################
+# Everything below here is optional, and in most cases not required
+##############################################################################
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES +=
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS +=
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS +=
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+EXCLUDE += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile
+
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS +=
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS +=
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH +=
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH +=
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH +=
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED +=
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED +=
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS += ../../kernel/drivers/gpu/arm/midgard/docs
+
diff --git a/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
new file mode 100644
index 00000000000..7ae05c2f8de
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot
@@ -0,0 +1,112 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+ rankdir=LR;
+ size="12,8";
+ compound=true;
+
+ node [ shape = box ];
+
+ subgraph cluster_policy_queues {
+ low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ];
+ queues_middle_sep [ label="" shape=plaintext width=0 height=0 ];
+
+ rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ];
+
+ label = "Policy's Queue(s)";
+ }
+
+ call_enqueue [ shape=plaintext label="enqueue_ctx()" ];
+
+ {
+ rank=same;
+ ordering=out;
+ call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ];
+ call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ];
+
+ call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ];
+ }
+
+ subgraph cluster_runpool {
+
+ as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ];
+ as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ];
+ as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ];
+ as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ];
+
+ label = "Policy's Run Pool";
+ }
+
+ {
+ rank=same;
+ call_jdequeue [ shape=plaintext label="dequeue_job()" ];
+ sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ];
+ }
+
+ {
+ rank=same;
+ ordering=out;
+ sstop [ shape=ellipse label="SS-Timer expires" ]
+ jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+ irq [ label="IRQ" shape=ellipse ];
+
+ job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ];
+ }
+
+ hstop [ shape=ellipse label="HS-Timer expires" ]
+
+ /*
+ * Edges
+ */
+
+ call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ];
+
+ low_queue:qr -> call_dequeue:w;
+ rt_queue:qr -> call_dequeue:w;
+
+ call_dequeue -> as1 [lhead=cluster_runpool];
+
+ as1->call_jdequeue [ltail=cluster_runpool];
+ call_jdequeue->jobslots:0;
+ call_jdequeue->sstop_dotfixup [ arrowhead=none];
+ sstop_dotfixup->sstop [label="Spawn SS-Timer"];
+ sstop->jobslots [label="SoftStop"];
+ sstop->hstop [label="Spawn HS-Timer"];
+ hstop->jobslots:ne [label="HardStop"];
+
+
+ as3->call_ctxfinish:ne [ ltail=cluster_runpool ];
+ call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ];
+
+ call_ctxfinish->call_ctxdone [constraint=false];
+
+ call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false];
+
+
+ {
+ jobslots->irq [constraint=false];
+
+ irq->job_finish [constraint=false];
+ }
+
+ irq->as2 [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ];
+
+}
diff --git a/drivers/gpu/arm/midgard/docs/policy_overview.dot b/drivers/gpu/arm/midgard/docs/policy_overview.dot
new file mode 100644
index 00000000000..159b993b7d6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/docs/policy_overview.dot
@@ -0,0 +1,63 @@
+/*
+ *
+ * (C) COPYRIGHT 2010 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+digraph policy_objects_diagram {
+ rankdir=LR
+ size="6,6"
+ compound=true;
+
+ node [ shape = box ];
+
+ call_enqueue [ shape=plaintext label="enqueue ctx" ];
+
+
+ policy_queue [ label="Policy's Queue" ];
+
+ {
+ rank=same;
+ runpool [ label="Policy's Run Pool" ];
+
+ ctx_finish [ label="ctx finished" ];
+ }
+
+ {
+ rank=same;
+ jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ];
+
+ job_finish [ label="Job finished" ];
+ }
+
+
+
+ /*
+ * Edges
+ */
+
+ call_enqueue -> policy_queue;
+
+ policy_queue->runpool [label="dequeue ctx" weight=0.1];
+ runpool->policy_queue [label="requeue ctx" weight=0.1];
+
+ runpool->ctx_finish [ style=dotted ];
+
+ runpool->jobslots [label="dequeue job" weight=0.1];
+ jobslots->runpool [label="requeue job" weight=0.1];
+
+ jobslots->job_finish [ style=dotted ];
+}
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
new file mode 100644
index 00000000000..bec9a5acf5c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h
@@ -0,0 +1,163 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_FEATURES_H_
+#define _BASE_HWCONFIG_FEATURES_H_
+
+enum base_hw_feature {
+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+ BASE_HW_FEATURE_33BIT_VA,
+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+ BASE_HW_FEATURE_MRT,
+ BASE_HW_FEATURE_BRNDOUT_CC,
+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+ BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+ BASE_HW_FEATURE_MSAA_16X,
+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+ BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+ BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+ BASE_HW_FEATURE_LD_ST_LEA_TEX,
+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+ BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+ BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS,
+ BASE_HW_FEATURE_TEST4_DATUM_MODE,
+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+ BASE_HW_FEATURE_BRNDOUT_KILL,
+ BASE_HW_FEATURE_WARPING,
+ BASE_HW_FEATURE_FLUSH_REDUCTION,
+ BASE_HW_FEATURE_V4,
+ BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_generic[] = {
+ BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t60x[] = {
+ BASE_HW_FEATURE_LD_ST_LEA_TEX,
+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+ BASE_HW_FEATURE_V4,
+ BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t62x[] = {
+ BASE_HW_FEATURE_LD_ST_LEA_TEX,
+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+ BASE_HW_FEATURE_V4,
+ BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t72x[] = {
+ BASE_HW_FEATURE_33BIT_VA,
+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+ BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK,
+ BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+ BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4,
+ BASE_HW_FEATURE_WARPING,
+ BASE_HW_FEATURE_V4,
+ BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t76x[] = {
+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+ BASE_HW_FEATURE_BRNDOUT_CC,
+ BASE_HW_FEATURE_LD_ST_LEA_TEX,
+ BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+ BASE_HW_FEATURE_MRT,
+ BASE_HW_FEATURE_MSAA_16X,
+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+ BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+ BASE_HW_FEATURE_TEST4_DATUM_MODE,
+ BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_tFxx[] = {
+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+ BASE_HW_FEATURE_BRNDOUT_CC,
+ BASE_HW_FEATURE_BRNDOUT_KILL,
+ BASE_HW_FEATURE_LD_ST_LEA_TEX,
+ BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+ BASE_HW_FEATURE_MRT,
+ BASE_HW_FEATURE_MSAA_16X,
+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+ BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+ BASE_HW_FEATURE_TEST4_DATUM_MODE,
+ BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t83x[] = {
+ BASE_HW_FEATURE_33BIT_VA,
+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+ BASE_HW_FEATURE_WARPING,
+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+ BASE_HW_FEATURE_BRNDOUT_CC,
+ BASE_HW_FEATURE_BRNDOUT_KILL,
+ BASE_HW_FEATURE_LD_ST_LEA_TEX,
+ BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+ BASE_HW_FEATURE_MRT,
+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+ BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+ BASE_HW_FEATURE_TEST4_DATUM_MODE,
+ BASE_HW_FEATURE_END
+};
+
+static const enum base_hw_feature base_hw_features_t82x[] = {
+ BASE_HW_FEATURE_33BIT_VA,
+ BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
+ BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
+ BASE_HW_FEATURE_WARPING,
+ BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
+ BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
+ BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
+ BASE_HW_FEATURE_BRNDOUT_CC,
+ BASE_HW_FEATURE_BRNDOUT_KILL,
+ BASE_HW_FEATURE_LD_ST_LEA_TEX,
+ BASE_HW_FEATURE_LD_ST_TILEBUFFER,
+ BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
+ BASE_HW_FEATURE_MRT,
+ BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
+ BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
+ BASE_HW_FEATURE_T7XX_PAIRING_RULES,
+ BASE_HW_FEATURE_TEST4_DATUM_MODE,
+ BASE_HW_FEATURE_END
+};
+
+#endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
new file mode 100644
index 00000000000..7bd30dd7278
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h
@@ -0,0 +1,786 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
+ * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
+ * For more information see base/tools/hwconfig_generator/README
+ */
+
+#ifndef _BASE_HWCONFIG_ISSUES_H_
+#define _BASE_HWCONFIG_ISSUES_H_
+
+enum base_hw_issue {
+ BASE_HW_ISSUE_5736,
+ BASE_HW_ISSUE_6367,
+ BASE_HW_ISSUE_6398,
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_6787,
+ BASE_HW_ISSUE_7027,
+ BASE_HW_ISSUE_7144,
+ BASE_HW_ISSUE_7304,
+ BASE_HW_ISSUE_8073,
+ BASE_HW_ISSUE_8186,
+ BASE_HW_ISSUE_8215,
+ BASE_HW_ISSUE_8245,
+ BASE_HW_ISSUE_8250,
+ BASE_HW_ISSUE_8260,
+ BASE_HW_ISSUE_8280,
+ BASE_HW_ISSUE_8316,
+ BASE_HW_ISSUE_8381,
+ BASE_HW_ISSUE_8394,
+ BASE_HW_ISSUE_8401,
+ BASE_HW_ISSUE_8408,
+ BASE_HW_ISSUE_8443,
+ BASE_HW_ISSUE_8456,
+ BASE_HW_ISSUE_8564,
+ BASE_HW_ISSUE_8634,
+ BASE_HW_ISSUE_8778,
+ BASE_HW_ISSUE_8791,
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_8833,
+ BASE_HW_ISSUE_8879,
+ BASE_HW_ISSUE_8896,
+ BASE_HW_ISSUE_8975,
+ BASE_HW_ISSUE_8986,
+ BASE_HW_ISSUE_8987,
+ BASE_HW_ISSUE_9010,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9418,
+ BASE_HW_ISSUE_9423,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_9510,
+ BASE_HW_ISSUE_9566,
+ BASE_HW_ISSUE_9630,
+ BASE_HW_ISSUE_10127,
+ BASE_HW_ISSUE_10327,
+ BASE_HW_ISSUE_10410,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10472,
+ BASE_HW_ISSUE_10487,
+ BASE_HW_ISSUE_10607,
+ BASE_HW_ISSUE_10632,
+ BASE_HW_ISSUE_10676,
+ BASE_HW_ISSUE_10682,
+ BASE_HW_ISSUE_10684,
+ BASE_HW_ISSUE_10797,
+ BASE_HW_ISSUE_10817,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10931,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_10959,
+ BASE_HW_ISSUE_10969,
+ BASE_HW_ISSUE_10984,
+ BASE_HW_ISSUE_10995,
+ BASE_HW_ISSUE_11012,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11024,
+ BASE_HW_ISSUE_11035,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_26,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3542,
+ BASE_HW_ISSUE_T76X_3556,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_generic[] = {
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = {
+ BASE_HW_ISSUE_6367,
+ BASE_HW_ISSUE_6398,
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_6787,
+ BASE_HW_ISSUE_7027,
+ BASE_HW_ISSUE_7144,
+ BASE_HW_ISSUE_7304,
+ BASE_HW_ISSUE_8073,
+ BASE_HW_ISSUE_8186,
+ BASE_HW_ISSUE_8215,
+ BASE_HW_ISSUE_8245,
+ BASE_HW_ISSUE_8250,
+ BASE_HW_ISSUE_8260,
+ BASE_HW_ISSUE_8280,
+ BASE_HW_ISSUE_8316,
+ BASE_HW_ISSUE_8381,
+ BASE_HW_ISSUE_8394,
+ BASE_HW_ISSUE_8401,
+ BASE_HW_ISSUE_8408,
+ BASE_HW_ISSUE_8443,
+ BASE_HW_ISSUE_8456,
+ BASE_HW_ISSUE_8564,
+ BASE_HW_ISSUE_8634,
+ BASE_HW_ISSUE_8778,
+ BASE_HW_ISSUE_8791,
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_8833,
+ BASE_HW_ISSUE_8896,
+ BASE_HW_ISSUE_8975,
+ BASE_HW_ISSUE_8986,
+ BASE_HW_ISSUE_8987,
+ BASE_HW_ISSUE_9010,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9418,
+ BASE_HW_ISSUE_9423,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_9510,
+ BASE_HW_ISSUE_9566,
+ BASE_HW_ISSUE_9630,
+ BASE_HW_ISSUE_10410,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10472,
+ BASE_HW_ISSUE_10487,
+ BASE_HW_ISSUE_10607,
+ BASE_HW_ISSUE_10632,
+ BASE_HW_ISSUE_10676,
+ BASE_HW_ISSUE_10682,
+ BASE_HW_ISSUE_10684,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10931,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_10969,
+ BASE_HW_ISSUE_10984,
+ BASE_HW_ISSUE_10995,
+ BASE_HW_ISSUE_11012,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11035,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = {
+ BASE_HW_ISSUE_6367,
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_6787,
+ BASE_HW_ISSUE_7027,
+ BASE_HW_ISSUE_7304,
+ BASE_HW_ISSUE_8408,
+ BASE_HW_ISSUE_8564,
+ BASE_HW_ISSUE_8778,
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_8975,
+ BASE_HW_ISSUE_9010,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9418,
+ BASE_HW_ISSUE_9423,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_9510,
+ BASE_HW_ISSUE_10410,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10472,
+ BASE_HW_ISSUE_10487,
+ BASE_HW_ISSUE_10607,
+ BASE_HW_ISSUE_10632,
+ BASE_HW_ISSUE_10676,
+ BASE_HW_ISSUE_10682,
+ BASE_HW_ISSUE_10684,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10931,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_10969,
+ BASE_HW_ISSUE_11012,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11035,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = {
+ BASE_HW_ISSUE_6367,
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_6787,
+ BASE_HW_ISSUE_7027,
+ BASE_HW_ISSUE_7304,
+ BASE_HW_ISSUE_8408,
+ BASE_HW_ISSUE_8564,
+ BASE_HW_ISSUE_8778,
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_8975,
+ BASE_HW_ISSUE_9010,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_9510,
+ BASE_HW_ISSUE_10410,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10472,
+ BASE_HW_ISSUE_10487,
+ BASE_HW_ISSUE_10607,
+ BASE_HW_ISSUE_10632,
+ BASE_HW_ISSUE_10676,
+ BASE_HW_ISSUE_10682,
+ BASE_HW_ISSUE_10684,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10931,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_11012,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11035,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = {
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10127,
+ BASE_HW_ISSUE_10327,
+ BASE_HW_ISSUE_10410,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10472,
+ BASE_HW_ISSUE_10487,
+ BASE_HW_ISSUE_10607,
+ BASE_HW_ISSUE_10632,
+ BASE_HW_ISSUE_10676,
+ BASE_HW_ISSUE_10682,
+ BASE_HW_ISSUE_10684,
+ BASE_HW_ISSUE_10817,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10931,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_10959,
+ BASE_HW_ISSUE_11012,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11024,
+ BASE_HW_ISSUE_11035,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = {
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10472,
+ BASE_HW_ISSUE_10684,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10931,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_10959,
+ BASE_HW_ISSUE_11012,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11024,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = {
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10472,
+ BASE_HW_ISSUE_10684,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10931,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_10959,
+ BASE_HW_ISSUE_11012,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11024,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_26,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3542,
+ BASE_HW_ISSUE_T76X_3556,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11024,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_26,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3542,
+ BASE_HW_ISSUE_T76X_3556,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_26,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3542,
+ BASE_HW_ISSUE_T76X_3556,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11024,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_26,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3542,
+ BASE_HW_ISSUE_T76X_3556,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_26,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3542,
+ BASE_HW_ISSUE_T76X_3556,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = {
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10684,
+ BASE_HW_ISSUE_10797,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = {
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10684,
+ BASE_HW_ISSUE_10797,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = {
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10684,
+ BASE_HW_ISSUE_10797,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t72x[] = {
+ BASE_HW_ISSUE_5736,
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10471,
+ BASE_HW_ISSUE_10797,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t76x[] = {
+ BASE_HW_ISSUE_5736,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11024,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t60x[] = {
+ BASE_HW_ISSUE_5736,
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_8778,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10472,
+ BASE_HW_ISSUE_10931,
+ BASE_HW_ISSUE_11012,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11024,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t62x[] = {
+ BASE_HW_ISSUE_5736,
+ BASE_HW_ISSUE_6402,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10472,
+ BASE_HW_ISSUE_10931,
+ BASE_HW_ISSUE_11012,
+ BASE_HW_ISSUE_11020,
+ BASE_HW_ISSUE_11024,
+ BASE_HW_ISSUE_11042,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_tFRx[] = {
+ BASE_HW_ISSUE_5736,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3966,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t86x[] = {
+ BASE_HW_ISSUE_5736,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t83x[] = {
+ BASE_HW_ISSUE_5736,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = {
+ BASE_HW_ISSUE_8803,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_10821,
+ BASE_HW_ISSUE_10883,
+ BASE_HW_ISSUE_10946,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_T76X_3953,
+ BASE_HW_ISSUE_T76X_3960,
+ BASE_HW_ISSUE_END
+};
+
+static const enum base_hw_issue base_hw_issues_model_t82x[] = {
+ BASE_HW_ISSUE_5736,
+ BASE_HW_ISSUE_9275,
+ BASE_HW_ISSUE_9435,
+ BASE_HW_ISSUE_T76X_1909,
+ BASE_HW_ISSUE_T76X_1963,
+ BASE_HW_ISSUE_T76X_3086,
+ BASE_HW_ISSUE_T76X_3700,
+ BASE_HW_ISSUE_T76X_3793,
+ BASE_HW_ISSUE_END
+};
+
+#endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h
new file mode 100644
index 00000000000..21fe319b202
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_kernel.h
@@ -0,0 +1,1613 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base structures shared with the kernel.
+ */
+
+#ifndef _BASE_KERNEL_H_
+#define _BASE_KERNEL_H_
+
+#ifndef __user
+#define __user
+#endif
+
+/* Support UK6 IOCTLS */
+#define BASE_LEGACY_UK6_SUPPORT 1
+
+/* Support UK7 IOCTLS */
+/* NB: To support UK6 we also need to support UK7 */
+#define BASE_LEGACY_UK7_SUPPORT 1
+
+typedef u64 base_mem_handle;
+
+#include "mali_base_mem_priv.h"
+#include "mali_kbase_profiling_gator_api.h"
+
+/*
+ * Dependency stuff, keep it private for now. May want to expose it if
+ * we decide to make the number of semaphores a configurable
+ * option.
+ */
+#define BASE_JD_ATOM_COUNT 256
+
+#define BASEP_JD_SEM_PER_WORD_LOG2 5
+#define BASEP_JD_SEM_PER_WORD (1 << BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_WORD_NR(x) ((x) >> BASEP_JD_SEM_PER_WORD_LOG2)
+#define BASEP_JD_SEM_MASK_IN_WORD(x) (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1)))
+#define BASEP_JD_SEM_ARRAY_SIZE BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT)
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+#if defined CDBG_ASSERT
+#define LOCAL_ASSERT CDBG_ASSERT
+#elif defined KBASE_DEBUG_ASSERT
+#define LOCAL_ASSERT KBASE_DEBUG_ASSERT
+#else
+#error assert macro not defined!
+#endif
+
+#if defined PAGE_MASK
+#define LOCAL_PAGE_LSB ~PAGE_MASK
+#else
+#include <osu/mali_osu.h>
+
+#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1)
+#else
+#error Failed to find page size
+#endif
+#endif
+
+/** 32/64-bit neutral way to represent pointers */
+typedef union kbase_pointer {
+ void __user *value; /**< client should store their pointers here */
+ u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */
+ u64 sizer; /**< Force 64-bit storage for all clients regardless */
+} kbase_pointer;
+
+/**
+ * @addtogroup base_user_api User-side Base APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_memory User-side Base Memory APIs
+ * @{
+ */
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator
+ * in order to determine the best cache policy. Some combinations are
+ * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD),
+ * which defines a @a write-only region on the CPU side, which is
+ * heavily read by the CPU...
+ * Other flags are only meaningful to a particular allocator.
+ * More flags can be added to this list, as long as they don't clash
+ * (see ::BASE_MEM_FLAGS_NR_BITS for the number of the first free bit).
+ */
+typedef u32 base_mem_alloc_flags;
+
+/**
+ * @brief Memory allocation, access/hint flags
+ *
+ * See ::base_mem_alloc_flags.
+ *
+ */
+enum {
+/* IN */
+ BASE_MEM_PROT_CPU_RD = (1U << 0), /**< Read access CPU side */
+ BASE_MEM_PROT_CPU_WR = (1U << 1), /**< Write access CPU side */
+ BASE_MEM_PROT_GPU_RD = (1U << 2), /**< Read access GPU side */
+ BASE_MEM_PROT_GPU_WR = (1U << 3), /**< Write access GPU side */
+ BASE_MEM_PROT_GPU_EX = (1U << 4), /**< Execute allowed on the GPU
+ side */
+
+ /* BASE_MEM_HINT flags have been removed, but their values are reserved
+ * for backwards compatibility with older user-space drivers. The values
+ * can be re-used once support for r5p0 user-space drivers is removed,
+ * presumably in r7p0.
+ *
+ * RESERVED: (1U << 5)
+ * RESERVED: (1U << 6)
+ * RESERVED: (1U << 7)
+ * RESERVED: (1U << 8)
+ */
+
+ BASE_MEM_GROW_ON_GPF = (1U << 9), /**< Grow backing store on GPU
+ Page Fault */
+
+ BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer
+ shareable, if available */
+ BASE_MEM_COHERENT_LOCAL = (1U << 11), /**< Page coherence Inner
+ shareable */
+ BASE_MEM_CACHED_CPU = (1U << 12), /**< Should be cached on the
+ CPU */
+
+/* IN/OUT */
+ BASE_MEM_SAME_VA = (1U << 13), /**< Must have same VA on both the GPU
+ and the CPU */
+/* OUT */
+ BASE_MEM_NEED_MMAP = (1U << 14), /**< Must call mmap to aquire a GPU
+ address for the alloc */
+
+/* IN */
+ BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence
+ Outer shareable, required. */
+ BASE_MEM_SECURE = (1U << 16) /**< Secure memory */
+
+};
+
+/**
+ * @brief Number of bits used as flags for base memory management
+ *
+ * Must be kept in sync with the ::base_mem_alloc_flags flags
+ */
+#define BASE_MEM_FLAGS_NR_BITS 17
+
+/**
+ * A mask for all output bits, excluding IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP
+
+/**
+ * A mask for all input bits, including IN/OUT bits.
+ */
+#define BASE_MEM_FLAGS_INPUT_MASK \
+ (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK)
+
+
+/**
+ * @brief Memory types supported by @a base_mem_import
+ *
+ * Each type defines what the supported handle type is.
+ *
+ * If any new type is added here ARM must be contacted
+ * to allocate a numeric value for it.
+ * Do not just add a new type without synchronizing with ARM
+ * as future releases from ARM might include other new types
+ * which could clash with your custom types.
+ */
+typedef enum base_mem_import_type {
+ BASE_MEM_IMPORT_TYPE_INVALID = 0,
+ /** UMP import. Handle type is ump_secure_id. */
+ BASE_MEM_IMPORT_TYPE_UMP = 1,
+ /** UMM import. Handle type is a file descriptor (int) */
+ BASE_MEM_IMPORT_TYPE_UMM = 2
+} base_mem_import_type;
+
+/* legacy API wrappers */
+#define base_tmem_import_type base_mem_import_type
+#define BASE_TMEM_IMPORT_TYPE_INVALID BASE_MEM_IMPORT_TYPE_INVALID
+#define BASE_TMEM_IMPORT_TYPE_UMP BASE_MEM_IMPORT_TYPE_UMP
+#define BASE_TMEM_IMPORT_TYPE_UMM BASE_MEM_IMPORT_TYPE_UMM
+
+/**
+ * @brief Invalid memory handle type.
+ * Return value from functions returning @a base_mem_handle on error.
+ */
+#define BASE_MEM_INVALID_HANDLE (0ull << 12)
+#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12)
+#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12)
+#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12)
+#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12)
+/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */
+#define BASE_MEM_COOKIE_BASE (64ul << 12)
+#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \
+ BASE_MEM_COOKIE_BASE)
+
+/* Bit mask of cookies used for for memory allocation setup */
+#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */
+
+
+/**
+ * @brief Result codes of changing the size of the backing store allocated to a tmem region
+ */
+typedef enum base_backing_threshold_status {
+ BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */
+ BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1, /**< Not a growable tmem object */
+ BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */
+ BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3, /**< Resize attempted on buffer while it was mapped, which is not permitted */
+ BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */
+} base_backing_threshold_status;
+
+/**
+ * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs
+ * @{
+ */
+
+/**
+ * @brief a basic memory operation (sync-set).
+ *
+ * The content of this structure is private, and should only be used
+ * by the accessors.
+ */
+typedef struct base_syncset {
+ struct basep_syncset basep_sset;
+} base_syncset;
+
+/** @} end group base_user_api_memory_defered */
+
+/**
+ * Handle to represent imported memory object.
+ * Simple opague handle to imported memory, can't be used
+ * with anything but base_external_resource_init to bind to an atom.
+ */
+typedef struct base_import_handle {
+ struct {
+ u64 handle;
+ } basep;
+} base_import_handle;
+
+/** @} end group base_user_api_memory */
+
+/**
+ * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs
+ * @{
+ */
+
+typedef int platform_fence_type;
+#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1)
+
+/**
+ * Base stream handle.
+ *
+ * References an underlying base stream object.
+ */
+typedef struct base_stream {
+ struct {
+ int fd;
+ } basep;
+} base_stream;
+
+/**
+ * Base fence handle.
+ *
+ * References an underlying base fence object.
+ */
+typedef struct base_fence {
+ struct {
+ int fd;
+ int stream_fd;
+ } basep;
+} base_fence;
+
+/**
+ * @brief Per-job data
+ *
+ * This structure is used to store per-job data, and is completly unused
+ * by the Base driver. It can be used to store things such as callback
+ * function pointer, data to handle job completion. It is guaranteed to be
+ * untouched by the Base driver.
+ */
+typedef struct base_jd_udata {
+ u64 blob[2]; /**< per-job data array */
+} base_jd_udata;
+
+/**
+ * @brief Memory aliasing info
+ *
+ * Describes a memory handle to be aliased.
+ * A subset of the handle can be chosen for aliasing, given an offset and a
+ * length.
+ * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a
+ * region where a special page is mapped with a write-alloc cache setup,
+ * typically used when the write result of the GPU isn't needed, but the GPU
+ * must write anyway.
+ *
+ * Offset and length are specified in pages.
+ * Offset must be within the size of the handle.
+ * Offset+length must not overrun the size of the handle.
+ *
+ * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * @offset Offset within the handle to start aliasing from, in pages.
+ * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE.
+ * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE
+ * specifies the number of times the special page is needed.
+ */
+struct base_mem_aliasing_info {
+ base_mem_handle handle;
+ u64 offset;
+ u64 length;
+};
+
+/**
+ * @brief Job dependency type.
+ *
+ * A flags field will be inserted into the atom structure to specify whether a dependency is a data or
+ * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without
+ * changing the structure size).
+ * When the flag is set for a particular dependency to signal that it is an ordering only dependency then
+ * errors will not be propagated.
+ */
+typedef u8 base_jd_dep_type;
+
+
+#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */
+#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */
+#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */
+
+/**
+ * @brief Job chain hardware requirements.
+ *
+ * A job chain must specify what GPU features it needs to allow the
+ * driver to schedule the job correctly. By not specifying the
+ * correct settings can/will cause an early job termination. Multiple
+ * values can be ORed together to specify multiple requirements.
+ * Special case is ::BASE_JD_REQ_DEP, which is used to express complex
+ * dependencies, and that doesn't execute anything on the hardware.
+ */
+typedef u16 base_jd_core_req;
+
+/* Requirements that come from the HW */
+#define BASE_JD_REQ_DEP 0 /**< No requirement, dependency only */
+#define BASE_JD_REQ_FS (1U << 0) /**< Requires fragment shaders */
+/**
+ * Requires compute shaders
+ * This covers any of the following Midgard Job types:
+ * - Vertex Shader Job
+ * - Geometry Shader Job
+ * - An actual Compute Shader Job
+ *
+ * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the
+ * job is specifically just the "Compute Shader" job type, and not the "Vertex
+ * Shader" nor the "Geometry Shader" job type.
+ */
+#define BASE_JD_REQ_CS (1U << 1)
+#define BASE_JD_REQ_T (1U << 2) /**< Requires tiling */
+#define BASE_JD_REQ_CF (1U << 3) /**< Requires cache flushes */
+#define BASE_JD_REQ_V (1U << 4) /**< Requires value writeback */
+
+/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */
+
+/* Requires fragment job with AFBC encoding */
+#define BASE_JD_REQ_FS_AFBC (1U << 13)
+
+/**
+ * SW Only requirement: the job chain requires a coherent core group. We don't
+ * mind which coherent core group is used.
+ */
+#define BASE_JD_REQ_COHERENT_GROUP (1U << 6)
+
+/**
+ * SW Only requirement: The performance counters should be enabled only when
+ * they are needed, to reduce power consumption.
+ */
+
+#define BASE_JD_REQ_PERMON (1U << 7)
+
+/**
+ * SW Only requirement: External resources are referenced by this atom.
+ * When external resources are referenced no syncsets can be bundled with the atom
+ * but should instead be part of a NULL jobs inserted into the dependency tree.
+ * The first pre_dep object must be configured for the external resouces to use,
+ * the second pre_dep object can be used to create other dependencies.
+ */
+#define BASE_JD_REQ_EXTERNAL_RESOURCES (1U << 8)
+
+/**
+ * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted
+ * to the hardware but will cause some action to happen within the driver
+ */
+#define BASE_JD_REQ_SOFT_JOB (1U << 9)
+
+#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1)
+#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2)
+#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3)
+
+/**
+ * SW Only requirement : Replay job.
+ *
+ * If the preceeding job fails, the replay job will cause the jobs specified in
+ * the list of base_jd_replay_payload pointed to by the jc pointer to be
+ * replayed.
+ *
+ * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT
+ * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay
+ * job is failed, as well as any following dependencies.
+ *
+ * The replayed jobs will require a number of atom IDs. If there are not enough
+ * free atom IDs then the replay job will fail.
+ *
+ * If the preceeding job does not fail, then the replay job is returned as
+ * completed.
+ *
+ * The replayed jobs will never be returned to userspace. The preceeding failed
+ * job will be returned to userspace as failed; the status of this job should
+ * be ignored. Completion should be determined by the status of the replay soft
+ * job.
+ *
+ * In order for the jobs to be replayed, the job headers will have to be
+ * modified. The Status field will be reset to NOT_STARTED. If the Job Type
+ * field indicates a Vertex Shader Job then it will be changed to Null Job.
+ *
+ * The replayed jobs have the following assumptions :
+ *
+ * - No external resources. Any required external resources will be held by the
+ * replay atom.
+ * - Pre-dependencies are created based on job order.
+ * - Atom numbers are automatically assigned.
+ * - device_nr is set to 0. This is not relevant as
+ * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ */
+#define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4)
+
+/**
+ * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders)
+ *
+ * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type.
+ *
+ * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job
+ * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs.
+ *
+ * @note This is a more flexible variant of the @ref BASE_CONTEXT_HINT_ONLY_COMPUTE flag,
+ * allowing specific jobs to be marked as 'Only Compute' instead of the entire context
+ */
+#define BASE_JD_REQ_ONLY_COMPUTE (1U << 10)
+
+/**
+ * HW Requirement: Use the base_jd_atom::device_nr field to specify a
+ * particular core group
+ *
+ * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority
+ *
+ * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms.
+ *
+ * If the core availability policy is keeping the required core group turned off, then
+ * the job will fail with a BASE_JD_EVENT_PM_EVENT error code.
+ */
+#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11)
+
+/**
+ * SW Flag: If this bit is set then the successful completion of this atom
+ * will not cause an event to be sent to userspace
+ */
+#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE (1U << 12)
+
+/**
+ * SW Flag: If this bit is set then completion of this atom will not cause an
+ * event to be sent to userspace, whether successful or not.
+ */
+#define BASEP_JD_REQ_EVENT_NEVER (1U << 14)
+
+/**
+* These requirement bits are currently unused in base_jd_core_req (currently a u16)
+*/
+
+#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5)
+#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15)
+
+/**
+* Mask of all the currently unused requirement bits in base_jd_core_req.
+*/
+
+#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \
+ BASEP_JD_REQ_RESERVED_BIT15)
+
+/**
+ * Mask of all bits in base_jd_core_req that control the type of the atom.
+ *
+ * This allows dependency only atoms to have flags set
+ */
+#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\
+ BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER))
+
+/**
+ * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which
+ * handles retaining cores for power management and affinity management.
+ *
+ * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack
+ * where lots of atoms could be submitted before powerup, and each has an
+ * affinity chosen that causes other atoms to have an affinity
+ * violation. Whilst the affinity was not causing violations at the time it
+ * was chosen, it could cause violations thereafter. For example, 1000 jobs
+ * could have had their affinity chosen during the powerup time, so any of
+ * those 1000 jobs could cause an affinity violation later on.
+ *
+ * The attack would otherwise occur because other atoms/contexts have to wait for:
+ * -# the currently running atoms (which are causing the violation) to
+ * finish
+ * -# and, the atoms that had their affinity chosen during powerup to
+ * finish. These are run preferrentially because they don't cause a
+ * violation, but instead continue to cause the violation in others.
+ * -# or, the attacker is scheduled out (which might not happen for just 2
+ * contexts)
+ *
+ * By re-choosing the affinity (which is designed to avoid violations at the
+ * time it's chosen), we break condition (2) of the wait, which minimizes the
+ * problem to just waiting for current jobs to finish (which can be bounded if
+ * the Job Scheduling Policy has a timer).
+ */
+enum kbase_atom_coreref_state {
+ /** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */
+ KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED,
+ /** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */
+ KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES,
+ /** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */
+ KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY,
+ /** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */
+ KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS,
+ /** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */
+ KBASE_ATOM_COREREF_STATE_READY
+};
+
+/*
+ * Base Atom priority
+ *
+ * Only certain priority levels are actually implemented, as specified by the
+ * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority
+ * level that is not one of those defined below.
+ *
+ * Priority levels only affect scheduling between atoms of the same type within
+ * a base context, and only after the atoms have had dependencies resolved.
+ * Fragment atoms does not affect non-frament atoms with lower priorities, and
+ * the other way around. For example, a low priority atom that has had its
+ * dependencies resolved might run before a higher priority atom that has not
+ * had its dependencies resolved.
+ *
+ * The scheduling between base contexts/processes and between atoms from
+ * different base contexts/processes is unaffected by atom priority.
+ *
+ * The atoms are scheduled as follows with respect to their priorities:
+ * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies
+ * resolved, and atom 'X' has a higher priority than atom 'Y'
+ * - If atom 'Y' is currently running on the HW, then it is interrupted to
+ * allow atom 'X' to run soon after
+ * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing
+ * the next atom to run, atom 'X' will always be chosen instead of atom 'Y'
+ * - Any two atoms that have the same priority could run in any order with
+ * respect to each other. That is, there is no ordering constraint between
+ * atoms of the same priority.
+ */
+typedef u8 base_jd_prio;
+
+/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0)
+/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and
+ * BASE_JD_PRIO_LOW */
+#define BASE_JD_PRIO_HIGH ((base_jd_prio)1)
+/* Low atom priority. */
+#define BASE_JD_PRIO_LOW ((base_jd_prio)2)
+
+/* Count of the number of priority levels. This itself is not a valid
+ * base_jd_prio setting */
+#define BASE_JD_NR_PRIO_LEVELS 3
+
+enum kbase_jd_atom_state {
+ /** Atom is not used */
+ KBASE_JD_ATOM_STATE_UNUSED,
+ /** Atom is queued in JD */
+ KBASE_JD_ATOM_STATE_QUEUED,
+ /** Atom has been given to JS (is runnable/running) */
+ KBASE_JD_ATOM_STATE_IN_JS,
+ /** Atom has been completed, but not yet handed back to job dispatcher
+ * for dependency resolution */
+ KBASE_JD_ATOM_STATE_HW_COMPLETED,
+ /** Atom has been completed, but not yet handed back to userspace */
+ KBASE_JD_ATOM_STATE_COMPLETED
+};
+
+typedef u8 base_atom_id; /**< Type big enough to store an atom number in */
+
+struct base_dependency {
+ base_atom_id atom_id; /**< An atom number */
+ base_jd_dep_type dependency_type; /**< Dependency type */
+};
+
+typedef struct base_jd_atom_v2 {
+ u64 jc; /**< job-chain GPU address */
+ struct base_jd_udata udata; /**< user data */
+ kbase_pointer extres_list; /**< list of external resources */
+ u16 nr_extres; /**< nr of external resources */
+ base_jd_core_req core_req; /**< core requirements */
+ const struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field,
+ this is done in order to reduce possibility of improper assigment of a dependency field */
+ base_atom_id atom_number; /**< unique number to identify the atom */
+ base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */
+ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+ u8 padding[5];
+} base_jd_atom_v2;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+struct base_jd_atom_v2_uk6 {
+ u64 jc; /**< job-chain GPU address */
+ struct base_jd_udata udata; /**< user data */
+ kbase_pointer extres_list; /**< list of external resources */
+ u16 nr_extres; /**< nr of external resources */
+ base_jd_core_req core_req; /**< core requirements */
+ base_atom_id pre_dep[2]; /**< pre-dependencies */
+ base_atom_id atom_number; /**< unique number to identify the atom */
+ base_jd_prio prio; /**< priority - smaller is higher priority */
+ u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */
+ u8 padding[7];
+};
+#endif
+
+typedef enum base_external_resource_access {
+ BASE_EXT_RES_ACCESS_SHARED,
+ BASE_EXT_RES_ACCESS_EXCLUSIVE
+} base_external_resource_access;
+
+typedef struct base_external_resource {
+ u64 ext_resource;
+} base_external_resource;
+
+/**
+ * @brief Setter for a dependency structure
+ *
+ * @param[in] dep The kbase jd atom dependency to be initialized.
+ * @param id The atom_id to be assigned.
+ * @param dep_type The dep_type to be assigned.
+ *
+ */
+static inline void base_jd_atom_dep_set(const struct base_dependency *const_dep, base_atom_id id, base_jd_dep_type dep_type)
+{
+ struct base_dependency *dep;
+
+ LOCAL_ASSERT(const_dep != NULL);
+ /* make sure we don't set not allowed combinations of atom_id/dependency_type */
+ LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) ||
+ (id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID));
+
+ dep = (struct base_dependency *)const_dep;
+
+ dep->atom_id = id;
+ dep->dependency_type = dep_type;
+}
+
+/**
+ * @brief Make a copy of a dependency structure
+ *
+ * @param[in,out] dep The kbase jd atom dependency to be written.
+ * @param[in] from The dependency to make a copy from.
+ *
+ */
+static inline void base_jd_atom_dep_copy(const struct base_dependency *const_dep, const struct base_dependency *from)
+{
+ LOCAL_ASSERT(const_dep != NULL);
+
+ base_jd_atom_dep_set(const_dep, from->atom_id, from->dependency_type);
+}
+
+/**
+ * @brief Soft-atom fence trigger setup.
+ *
+ * Sets up an atom to be a SW-only atom signaling a fence
+ * when it reaches the run state.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to trigger when a GPU job has finished.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom
+ * @param[in] fence The base fence object to trigger.
+ */
+static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+ LOCAL_ASSERT(atom);
+ LOCAL_ASSERT(fence);
+ LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE);
+ LOCAL_ASSERT(fence->basep.stream_fd >= 0);
+ atom->jc = (uintptr_t) fence;
+ atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER;
+}
+
+/**
+ * @brief Soft-atom fence wait setup.
+ *
+ * Sets up an atom to be a SW-only atom waiting on a fence.
+ * When the fence becomes triggered the atom becomes runnable
+ * and completes immediately.
+ *
+ * Using the existing base dependency system the fence can
+ * be set to block a GPU job until it has been triggered.
+ *
+ * The base fence object must not be terminated until the atom
+ * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned.
+ *
+ * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import.
+ * Calling this function with a uninitialized fence results in undefined behavior.
+ *
+ * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom
+ * @param[in] fence The base fence object to wait on
+ */
+static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence)
+{
+ LOCAL_ASSERT(atom);
+ LOCAL_ASSERT(fence);
+ LOCAL_ASSERT(fence->basep.fd >= 0);
+ atom->jc = (uintptr_t) fence;
+ atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT;
+}
+
+/**
+ * @brief External resource info initialization.
+ *
+ * Sets up a external resource object to reference
+ * a memory allocation and the type of access requested.
+ *
+ * @param[in] res The resource object to initialize
+ * @param handle The handle to the imported memory object
+ * @param access The type of access requested
+ */
+static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access)
+{
+ u64 address;
+
+ address = handle.basep.handle;
+
+ LOCAL_ASSERT(res != NULL);
+ LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB));
+ LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+ res->ext_resource = address | (access & LOCAL_PAGE_LSB);
+}
+
+/**
+ * @brief Job chain event code bits
+ * Defines the bits used to create ::base_jd_event_code
+ */
+enum {
+ BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */
+ BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */
+ BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */
+ BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */
+ BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */
+ BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */
+ BASE_JD_SW_EVENT_RESERVED = (3u << 11), /**< Reserved event type */
+ BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) /**< Mask to extract the type from an event code */
+};
+
+/**
+ * @brief Job chain event codes
+ *
+ * HW and low-level SW events are represented by event codes.
+ * The status of jobs which succeeded are also represented by
+ * an event code (see ::BASE_JD_EVENT_DONE).
+ * Events are usually reported as part of a ::base_jd_event.
+ *
+ * The event codes are encoded in the following way:
+ * @li 10:0 - subtype
+ * @li 12:11 - type
+ * @li 13 - SW success (only valid if the SW bit is set)
+ * @li 14 - SW event (HW event if not set)
+ * @li 15 - Kernel event (should never be seen in userspace)
+ *
+ * Events are split up into ranges as follows:
+ * - BASE_JD_EVENT_RANGE_\<description\>_START
+ * - BASE_JD_EVENT_RANGE_\<description\>_END
+ *
+ * \a code is in \<description\>'s range when:
+ * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt>
+ *
+ * Ranges can be asserted for adjacency by testing that the END of the previous
+ * is equal to the START of the next. This is useful for optimizing some tests
+ * for range.
+ *
+ * A limitation is that the last member of this enum must explicitly be handled
+ * (with an assert-unreachable statement) in switch statements that use
+ * variables of this type. Otherwise, the compiler warns that we have not
+ * handled that enum value.
+ */
+typedef enum base_jd_event_code {
+ /* HW defined exceptions */
+
+ /** Start of HW Non-fault status codes
+ *
+ * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+ * because the job was hard-stopped
+ */
+ BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0,
+
+ /* non-fatal exceptions */
+ BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */
+ BASE_JD_EVENT_DONE = 0x01,
+ BASE_JD_EVENT_STOPPED = 0x03, /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */
+ BASE_JD_EVENT_TERMINATED = 0x04, /**< This is actually a fault status code - the job was hard stopped */
+ BASE_JD_EVENT_ACTIVE = 0x08, /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */
+
+ /** End of HW Non-fault status codes
+ *
+ * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault,
+ * because the job was hard-stopped
+ */
+ BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40,
+
+ /** Start of HW fault and SW Error status codes */
+ BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40,
+
+ /* job exceptions */
+ BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40,
+ BASE_JD_EVENT_JOB_POWER_FAULT = 0x41,
+ BASE_JD_EVENT_JOB_READ_FAULT = 0x42,
+ BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43,
+ BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44,
+ BASE_JD_EVENT_JOB_BUS_FAULT = 0x48,
+ BASE_JD_EVENT_INSTR_INVALID_PC = 0x50,
+ BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51,
+ BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52,
+ BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53,
+ BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54,
+ BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55,
+ BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56,
+ BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58,
+ BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59,
+ BASE_JD_EVENT_STATE_FAULT = 0x5A,
+ BASE_JD_EVENT_OUT_OF_MEMORY = 0x60,
+ BASE_JD_EVENT_UNKNOWN = 0x7F,
+
+ /* GPU exceptions */
+ BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80,
+ BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88,
+
+ /* MMU exceptions */
+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1,
+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2,
+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3,
+ BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4,
+ BASE_JD_EVENT_PERMISSION_FAULT = 0xC8,
+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1,
+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2,
+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3,
+ BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4,
+ BASE_JD_EVENT_ACCESS_FLAG = 0xD8,
+
+ /* SW defined exceptions */
+ BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000,
+ BASE_JD_EVENT_TIMED_OUT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001,
+ BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002,
+ BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003,
+ BASE_JD_EVENT_PM_EVENT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004,
+ BASE_JD_EVENT_FORCE_REPLAY = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005,
+
+ BASE_JD_EVENT_BAG_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003,
+
+ /** End of HW fault and SW Error status codes */
+ BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+ /** Start of SW Success status codes */
+ BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000,
+
+ BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000,
+ BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000,
+ BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000,
+
+ /** End of SW Success status codes */
+ BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF,
+
+ /** Start of Kernel-only status codes. Such codes are never returned to user-space */
+ BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000,
+ BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000,
+
+ /** End of Kernel-only status codes. */
+ BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF
+} base_jd_event_code;
+
+/**
+ * @brief Event reporting structure
+ *
+ * This structure is used by the kernel driver to report information
+ * about GPU events. The can either be HW-specific events or low-level
+ * SW events, such as job-chain completion.
+ *
+ * The event code contains an event type field which can be extracted
+ * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK.
+ *
+ * Based on the event type base_jd_event::data holds:
+ * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed
+ * job-chain
+ * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has
+ * been completed (ie all contained job-chains have been completed).
+ * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used
+ */
+typedef struct base_jd_event_v2 {
+ base_jd_event_code event_code; /**< event code */
+ base_atom_id atom_number; /**< the atom number that has completed */
+ struct base_jd_udata udata; /**< user data */
+} base_jd_event_v2;
+
+/**
+ * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills
+ * a full cache line.
+ */
+
+#define BASE_CPU_GPU_CACHE_LINE_PADDING (36)
+
+
+/**
+ * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs.
+ *
+ * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom.
+ *
+ * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid
+ * cases where access to pages containing the structure is shared between cached and un-cached
+ * memory regions, which would cause memory corruption. Here we set the structure size to be 64 bytes
+ * which is the cache line for ARM A15 processors.
+ */
+
+typedef struct base_dump_cpu_gpu_counters {
+ u64 system_time;
+ u64 cycle_counter;
+ u64 sec;
+ u32 usec;
+ u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING];
+} base_dump_cpu_gpu_counters;
+
+
+
+/** @} end group base_user_api_job_dispatch */
+
+#ifdef __KERNEL__
+/*
+ * The following typedefs should be removed when a midg types header is added.
+ * See MIDCOM-1657 for details.
+ */
+typedef u32 gpu_product_id;
+typedef u32 gpu_cache_features;
+typedef u32 gpu_tiler_features;
+typedef u32 gpu_mem_features;
+typedef u32 gpu_mmu_features;
+typedef u32 gpu_js_features;
+typedef u32 gpu_as_present;
+typedef u32 gpu_js_present;
+
+#define GPU_MAX_JOB_SLOTS 16
+
+#else
+#include <gpu/mali_gpu_registers.h>
+#include <gpu/mali_gpu_props.h>
+#endif
+
+/**
+ * @page page_base_user_api_gpuprops User-side Base GPU Property Query API
+ *
+ * The User-side Base GPU Property Query API encapsulates two
+ * sub-modules:
+ *
+ * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties"
+ * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties"
+ *
+ * There is a related third module outside of Base, which is owned by the MIDG
+ * module:
+ * - @ref gpu_props_static "Midgard Compile-time GPU Properties"
+ *
+ * Base only deals with properties that vary between different Midgard
+ * implementations - the Dynamic GPU properties and the Platform Config
+ * properties.
+ *
+ * For properties that are constant for the Midgard Architecture, refer to the
+ * MIDG module. However, we will discuss their relevance here <b>just to
+ * provide background information.</b>
+ *
+ * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules
+ *
+ * The compile-time properties (Platform Config, Midgard Compile-time
+ * properties) are exposed as pre-processor macros.
+ *
+ * Complementing the compile-time properties are the Dynamic GPU
+ * Properties, which act as a conduit for the Midgard Configuration
+ * Discovery.
+ *
+ * In general, the dynamic properties are present to verify that the platform
+ * has been configured correctly with the right set of Platform Config
+ * Compile-time Properties.
+ *
+ * As a consistant guide across the entire DDK, the choice for dynamic or
+ * compile-time should consider the following, in order:
+ * -# Can the code be written so that it doesn't need to know the
+ * implementation limits at all?
+ * -# If you need the limits, get the information from the Dynamic Property
+ * lookup. This should be done once as you fetch the context, and then cached
+ * as part of the context data structure, so it's cheap to access.
+ * -# If there's a clear and arguable inefficiency in using Dynamic Properties,
+ * then use a Compile-Time Property (Platform Config, or Midgard Compile-time
+ * property). Examples of where this might be sensible follow:
+ * - Part of a critical inner-loop
+ * - Frequent re-use throughout the driver, causing significant extra load
+ * instructions or control flow that would be worthwhile optimizing out.
+ *
+ * We cannot provide an exhaustive set of examples, neither can we provide a
+ * rule for every possible situation. Use common sense, and think about: what
+ * the rest of the driver will be doing; how the compiler might represent the
+ * value if it is a compile-time constant; whether an OEM shipping multiple
+ * devices would benefit much more from a single DDK binary, instead of
+ * insignificant micro-optimizations.
+ *
+ * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties
+ *
+ * Dynamic GPU properties are presented in two sets:
+ * -# the commonly used properties in @ref base_gpu_props, which have been
+ * unpacked from GPU register bitfields.
+ * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props
+ * (also a member of @ref base_gpu_props). All of these are presented in
+ * the packed form, as presented by the GPU registers themselves.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ * The properties returned extend the Midgard Configuration Discovery
+ * registers. For example, GPU clock speed is not specified in the Midgard
+ * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ *
+ * The GPU properties are obtained by a call to
+ * _mali_base_get_gpu_props(). This simply returns a pointer to a const
+ * base_gpu_props structure. It is constant for the life of a base
+ * context. Multiple calls to _mali_base_get_gpu_props() to a base context
+ * return the same pointer to a constant structure. This avoids cache pollution
+ * of the common data.
+ *
+ * This pointer must not be freed, because it does not point to the start of a
+ * region allocated by the memory allocator; instead, just close the @ref
+ * base_context.
+ *
+ *
+ * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties
+ *
+ * The Platform Config File sets up gpu properties that are specific to a
+ * certain platform. Properties that are 'Implementation Defined' in the
+ * Midgard Architecture spec are placed here.
+ *
+ * @note Reference configurations are provided for Midgard Implementations, such as
+ * the Mali-T600 family. The customer need not repeat this information, and can select one of
+ * these reference configurations. For example, VA_BITS, PA_BITS and the
+ * maximum number of samples per pixel might vary between Midgard Implementations, but
+ * \b not for platforms using the Mali-T604. This information is placed in
+ * the reference configuration files.
+ *
+ * The System Integrator creates the following structure:
+ * - platform_XYZ
+ * - platform_XYZ/plat
+ * - platform_XYZ/plat/plat_config.h
+ *
+ * They then edit plat_config.h, using the example plat_config.h files as a
+ * guide.
+ *
+ * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will
+ * receive a helpful \#error message if they do not do this correctly. This
+ * selects the Reference Configuration for the Midgard Implementation. The rationale
+ * behind this decision (against asking the customer to write \#include
+ * <gpus/mali_t600.h> in their plat_config.h) is as follows:
+ * - This mechanism 'looks' like a regular config file (such as Linux's
+ * .config)
+ * - It is difficult to get wrong in a way that will produce strange build
+ * errors:
+ * - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and
+ * so they won't accidentally pick another file with 'mali_t600' in its name
+ * - When the build doesn't work, the System Integrator may think the DDK is
+ * doesn't work, and attempt to fix it themselves:
+ * - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the
+ * error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells
+ * you.
+ * - For a \#include mechanism, checks must still be made elsewhere, which the
+ * System Integrator may try working around by setting \#defines (such as
+ * VA_BITS) themselves in their plat_config.h. In the worst case, they may
+ * set the prevention-mechanism \#define of
+ * "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN".
+ * - In this case, they would believe they are on the right track, because
+ * the build progresses with their fix, but with errors elsewhere.
+ *
+ * However, there is nothing to prevent the customer using \#include to organize
+ * their own configurations files hierarchically.
+ *
+ * The mechanism for the header file processing is as follows:
+ *
+ * @dot
+ digraph plat_config_mechanism {
+ rankdir=BT
+ size="6,6"
+
+ "mali_base.h";
+ "gpu/mali_gpu.h";
+
+ node [ shape=box ];
+ {
+ rank = same; ordering = out;
+
+ "gpu/mali_gpu_props.h";
+ "base/midg_gpus/mali_t600.h";
+ "base/midg_gpus/other_midg_gpu.h";
+ }
+ { rank = same; "plat/plat_config.h"; }
+ {
+ rank = same;
+ "gpu/mali_gpu.h" [ shape=box ];
+ gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ];
+ select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ;
+ }
+ node [ shape=box ];
+ { rank = same; "plat/plat_config.h"; }
+ { rank = same; "mali_base.h"; }
+
+ "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h";
+ "mali_base.h" -> "plat/plat_config.h" ;
+ "mali_base.h" -> select_gpu ;
+
+ "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ;
+ gpu_chooser -> select_gpu [style="dotted,bold"] ;
+
+ select_gpu -> "base/midg_gpus/mali_t600.h" ;
+ select_gpu -> "base/midg_gpus/other_midg_gpu.h" ;
+ }
+ @enddot
+ *
+ *
+ * @section sec_base_user_api_gpuprops_kernel Kernel Operation
+ *
+ * During Base Context Create time, user-side makes a single kernel call:
+ * - A call to fill user memory with GPU information structures
+ *
+ * The kernel-side will fill the provided the entire processed @ref base_gpu_props
+ * structure, because this information is required in both
+ * user and kernel side; it does not make sense to decode it twice.
+ *
+ * Coherency groups must be derived from the bitmasks, but this can be done
+ * kernel side, and just once at kernel startup: Coherency groups must already
+ * be known kernel-side, to support chains that specify a 'Only Coherent Group'
+ * SW requirement, or 'Only Coherent Group with Tiler' SW requirement.
+ *
+ * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation
+ * Creation of the coherent group data is done at device-driver startup, and so
+ * is one-time. This will most likely involve a loop with CLZ, shifting, and
+ * bit clearing on the L2_PRESENT mask, depending on whether the
+ * system is L2 Coherent. The number of shader cores is done by a
+ * population count, since faulty cores may be disabled during production,
+ * producing a non-contiguous mask.
+ *
+ * The memory requirements for this algoirthm can be determined either by a u64
+ * population count on the L2_PRESENT mask (a LUT helper already is
+ * requried for the above), or simple assumption that there can be no more than
+ * 16 coherent groups, since core groups are typically 4 cores.
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs
+ * @{
+ */
+
+/**
+ * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties
+ * @{
+ */
+
+#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3
+
+#define BASE_MAX_COHERENT_GROUPS 16
+
+struct mali_base_gpu_core_props {
+ /**
+ * Product specific value.
+ */
+ gpu_product_id product_id;
+
+ /**
+ * Status of the GPU release.
+ * No defined values, but starts at 0 and increases by one for each release
+ * status (alpha, beta, EAC, etc.).
+ * 4 bit values (0-15).
+ */
+ u16 version_status;
+
+ /**
+ * Minor release number of the GPU. "P" part of an "RnPn" release number.
+ * 8 bit values (0-255).
+ */
+ u16 minor_revision;
+
+ /**
+ * Major release number of the GPU. "R" part of an "RnPn" release number.
+ * 4 bit values (0-15).
+ */
+ u16 major_revision;
+
+ u16 padding;
+
+ /**
+ * @usecase GPU clock speed is not specified in the Midgard Architecture, but is
+ * <b>necessary for OpenCL's clGetDeviceInfo() function</b>.
+ */
+ u32 gpu_speed_mhz;
+
+ /**
+ * @usecase GPU clock max/min speed is required for computing best/worst case
+ * in tasks as job scheduling ant irq_throttling. (It is not specified in the
+ * Midgard Architecture).
+ */
+ u32 gpu_freq_khz_max;
+ u32 gpu_freq_khz_min;
+
+ /**
+ * Size of the shader program counter, in bits.
+ */
+ u32 log2_program_counter_size;
+
+ /**
+ * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
+ * bitpattern where a set bit indicates that the format is supported.
+ *
+ * Before using a texture format, it is recommended that the corresponding
+ * bit be checked.
+ */
+ u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+
+ /**
+ * Theoretical maximum memory available to the GPU. It is unlikely that a
+ * client will be able to allocate all of this memory for their own
+ * purposes, but this at least provides an upper bound on the memory
+ * available to the GPU.
+ *
+ * This is required for OpenCL's clGetDeviceInfo() call when
+ * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
+ * client will not be expecting to allocate anywhere near this value.
+ */
+ u64 gpu_available_memory_size;
+};
+
+/**
+ *
+ * More information is possible - but associativity and bus width are not
+ * required by upper-level apis.
+ */
+struct mali_base_gpu_l2_cache_props {
+ u8 log2_line_size;
+ u8 log2_cache_size;
+ u8 num_l2_slices; /* Number of L2C slices. 1 or higher */
+ u8 padding[5];
+};
+
+struct mali_base_gpu_tiler_props {
+ u32 bin_size_bytes; /* Max is 4*2^15 */
+ u32 max_active_levels; /* Max is 2^15 */
+};
+
+/**
+ * GPU threading system details.
+ */
+struct mali_base_gpu_thread_props {
+ u32 max_threads; /* Max. number of threads per core */
+ u32 max_workgroup_size; /* Max. number of threads per workgroup */
+ u32 max_barrier_size; /* Max. number of threads that can synchronize on a simple barrier */
+ u16 max_registers; /* Total size [1..65535] of the register file available per core. */
+ u8 max_task_queue; /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
+ u8 max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */
+ u8 impl_tech; /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
+ u8 padding[7];
+};
+
+/**
+ * @brief descriptor for a coherent group
+ *
+ * \c core_mask exposes all cores in that coherent group, and \c num_cores
+ * provides a cached population-count for that mask.
+ *
+ * @note Whilst all cores are exposed in the mask, not all may be available to
+ * the application, depending on the Kernel Power policy.
+ *
+ * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage.
+ */
+struct mali_base_gpu_coherent_group {
+ u64 core_mask; /**< Core restriction mask required for the group */
+ u16 num_cores; /**< Number of cores in the group */
+ u16 padding[3];
+};
+
+/**
+ * @brief Coherency group information
+ *
+ * Note that the sizes of the members could be reduced. However, the \c group
+ * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
+ * aligned, thus leading to wastage if the other members sizes were reduced.
+ *
+ * The groups are sorted by core mask. The core masks are non-repeating and do
+ * not intersect.
+ */
+struct mali_base_gpu_coherent_group_info {
+ u32 num_groups;
+
+ /**
+ * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches.
+ *
+ * The GPU Counter dumping writes 2048 bytes per core group, regardless of
+ * whether the core groups are coherent or not. Hence this member is needed
+ * to calculate how much memory is required for dumping.
+ *
+ * @note Do not use it to work out how many valid elements are in the
+ * group[] member. Use num_groups instead.
+ */
+ u32 num_core_groups;
+
+ /**
+ * Coherency features of the memory, accessed by @ref gpu_mem_features
+ * methods
+ */
+ gpu_mem_features coherency;
+
+ u32 padding;
+
+ /**
+ * Descriptors of coherent groups
+ */
+ struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS];
+};
+
+/**
+ * A complete description of the GPU's Hardware Configuration Discovery
+ * registers.
+ *
+ * The information is presented inefficiently for access. For frequent access,
+ * the values should be better expressed in an unpacked form in the
+ * base_gpu_props structure.
+ *
+ * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
+ * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
+ * behaving differently?". In this case, all information about the
+ * configuration is potentially useful, but it <b>does not need to be processed
+ * by the driver</b>. Instead, the raw registers can be processed by the Mali
+ * Tools software on the host PC.
+ *
+ */
+struct gpu_raw_gpu_props {
+ u64 shader_present;
+ u64 tiler_present;
+ u64 l2_present;
+ u64 unused_1; /* keep for backward compatibility */
+
+ gpu_cache_features l2_features;
+ u32 suspend_size; /* API 8.2+ */
+ gpu_mem_features mem_features;
+ gpu_mmu_features mmu_features;
+
+ gpu_as_present as_present;
+
+ u32 js_present;
+ gpu_js_features js_features[GPU_MAX_JOB_SLOTS];
+ gpu_tiler_features tiler_features;
+ u32 texture_features[3];
+
+ u32 gpu_id;
+
+ u32 thread_max_threads;
+ u32 thread_max_workgroup_size;
+ u32 thread_max_barrier_size;
+ u32 thread_features;
+
+ u32 coherency_features;
+};
+
+/**
+ * Return structure for _mali_base_get_gpu_props().
+ *
+ * NOTE: the raw_props member in this datastructure contains the register
+ * values from which the value of the other members are derived. The derived
+ * members exist to allow for efficient access and/or shielding the details
+ * of the layout of the registers.
+ *
+ */
+typedef struct mali_base_gpu_props {
+ struct mali_base_gpu_core_props core_props;
+ struct mali_base_gpu_l2_cache_props l2_props;
+ u64 unused_1; /* keep for backwards compatibility */
+ struct mali_base_gpu_tiler_props tiler_props;
+ struct mali_base_gpu_thread_props thread_props;
+
+ /** This member is large, likely to be 128 bytes */
+ struct gpu_raw_gpu_props raw_props;
+
+ /** This must be last member of the structure */
+ struct mali_base_gpu_coherent_group_info coherency_info;
+} base_gpu_props;
+
+/** @} end group base_user_api_gpuprops_dyn */
+
+/** @} end group base_user_api_gpuprops */
+
+/**
+ * @addtogroup base_user_api_core User-side Base core APIs
+ * @{
+ */
+
+/**
+ * \enum base_context_create_flags
+ *
+ * Flags to pass to ::base_context_init.
+ * Flags can be ORed together to enable multiple things.
+ *
+ * These share the same space as @ref basep_context_private_flags, and so must
+ * not collide with them.
+ */
+enum base_context_create_flags {
+ /** No flags set */
+ BASE_CONTEXT_CREATE_FLAG_NONE = 0,
+
+ /** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */
+ BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0),
+
+ /** Base context is a 'System Monitor' context for Hardware counters.
+ *
+ * One important side effect of this is that job submission is disabled. */
+ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1),
+
+ /** Base context flag indicating a 'hint' that this context uses Compute
+ * Jobs only.
+ *
+ * Specifially, this means that it only sends atoms that <b>do not</b>
+ * contain the following @ref base_jd_core_req :
+ * - BASE_JD_REQ_FS
+ * - BASE_JD_REQ_T
+ *
+ * Violation of these requirements will cause the Job-Chains to be rejected.
+ *
+ * In addition, it is inadvisable for the atom's Job-Chains to contain Jobs
+ * of the following @ref gpu_job_type (whilst it may work now, it may not
+ * work in future) :
+ * - @ref GPU_JOB_VERTEX
+ * - @ref GPU_JOB_GEOMETRY
+ *
+ * @note An alternative to using this is to specify the BASE_JD_REQ_ONLY_COMPUTE
+ * requirement in atoms.
+ */
+ BASE_CONTEXT_HINT_ONLY_COMPUTE = (1u << 2)
+};
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init()
+ */
+#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \
+ (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \
+ ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \
+ ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE))
+
+/**
+ * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel
+ */
+#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \
+ (((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \
+ ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE))
+
+/**
+ * Private flags used on the base context
+ *
+ * These start at bit 31, and run down to zero.
+ *
+ * They share the same space as @ref base_context_create_flags, and so must
+ * not collide with them.
+ */
+enum basep_context_private_flags {
+ /** Private flag tracking whether job descriptor dumping is disabled */
+ BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31)
+};
+
+/** @} end group base_user_api_core */
+
+/** @} end group base_user_api */
+
+/**
+ * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties
+ * @{
+ *
+ * C Pre-processor macros are exposed here to do with Platform
+ * Config.
+ *
+ * These include:
+ * - GPU Properties that are constant on a particular Midgard Family
+ * Implementation e.g. Maximum samples per pixel on Mali-T600.
+ * - General platform config for the GPU, such as the GPU major and minor
+ * revison.
+ */
+
+/** @} end group base_plat_config_gpuprops */
+
+/**
+ * @addtogroup base_api Base APIs
+ * @{
+ */
+
+/**
+ * @brief The payload for a replay job. This must be in GPU memory.
+ */
+typedef struct base_jd_replay_payload {
+ /**
+ * Pointer to the first entry in the base_jd_replay_jc list. These
+ * will be replayed in @b reverse order (so that extra ones can be added
+ * to the head in future soft jobs without affecting this soft job)
+ */
+ u64 tiler_jc_list;
+
+ /**
+ * Pointer to the fragment job chain.
+ */
+ u64 fragment_jc;
+
+ /**
+ * Pointer to the tiler heap free FBD field to be modified.
+ */
+ u64 tiler_heap_free;
+
+ /**
+ * Hierarchy mask for the replayed fragment jobs. May be zero.
+ */
+ u16 fragment_hierarchy_mask;
+
+ /**
+ * Hierarchy mask for the replayed tiler jobs. May be zero.
+ */
+ u16 tiler_hierarchy_mask;
+
+ /**
+ * Default weight to be used for hierarchy levels not in the original
+ * mask.
+ */
+ u32 hierarchy_default_weight;
+
+ /**
+ * Core requirements for the tiler job chain
+ */
+ base_jd_core_req tiler_core_req;
+
+ /**
+ * Core requirements for the fragment job chain
+ */
+ base_jd_core_req fragment_core_req;
+
+ u8 padding[4];
+} base_jd_replay_payload;
+
+/**
+ * @brief An entry in the linked list of job chains to be replayed. This must
+ * be in GPU memory.
+ */
+typedef struct base_jd_replay_jc {
+ /**
+ * Pointer to next entry in the list. A setting of NULL indicates the
+ * end of the list.
+ */
+ u64 next;
+
+ /**
+ * Pointer to the job chain.
+ */
+ u64 jc;
+
+} base_jd_replay_jc;
+
+/* Maximum number of jobs allowed in a fragment chain in the payload of a
+ * replay job */
+#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256
+
+/** @} end group base_api */
+
+typedef struct base_profiling_controls {
+ u32 profiling_controls[FBDUMP_CONTROL_MAX];
+} base_profiling_controls;
+
+#endif /* _BASE_KERNEL_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_kernel_sync.h b/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
new file mode 100644
index 00000000000..a24791f4af7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_kernel_sync.h
@@ -0,0 +1,47 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Base cross-proccess sync API.
+ */
+
+#ifndef _BASE_KERNEL_SYNC_H_
+#define _BASE_KERNEL_SYNC_H_
+
+#include <linux/ioctl.h>
+
+#define STREAM_IOC_MAGIC '~'
+
+/* Fence insert.
+ *
+ * Inserts a fence on the stream operated on.
+ * Fence can be waited via a base fence wait soft-job
+ * or triggered via a base fence trigger soft-job.
+ *
+ * Fences must be cleaned up with close when no longer needed.
+ *
+ * No input/output arguments.
+ * Returns
+ * >=0 fd
+ * <0 error code
+ */
+#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0)
+
+#endif /* _BASE_KERNEL_SYNC_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/drivers/gpu/arm/midgard/mali_base_mem_priv.h
new file mode 100644
index 00000000000..4a98a72cc37
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_mem_priv.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _BASE_MEM_PRIV_H_
+#define _BASE_MEM_PRIV_H_
+
+#define BASE_SYNCSET_OP_MSYNC (1U << 0)
+#define BASE_SYNCSET_OP_CSYNC (1U << 1)
+
+/*
+ * This structure describe a basic memory coherency operation.
+ * It can either be:
+ * @li a sync from CPU to Memory:
+ * - type = ::BASE_SYNCSET_OP_MSYNC
+ * - mem_handle = a handle to the memory object on which the operation
+ * is taking place
+ * - user_addr = the address of the range to be synced
+ * - size = the amount of data to be synced, in bytes
+ * - offset is ignored.
+ * @li a sync from Memory to CPU:
+ * - type = ::BASE_SYNCSET_OP_CSYNC
+ * - mem_handle = a handle to the memory object on which the operation
+ * is taking place
+ * - user_addr = the address of the range to be synced
+ * - size = the amount of data to be synced, in bytes.
+ * - offset is ignored.
+ */
+struct basep_syncset {
+ base_mem_handle mem_handle;
+ u64 user_addr;
+ u64 size;
+ u8 type;
+ u8 padding[7];
+};
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
new file mode 100644
index 00000000000..be454a216a3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h
@@ -0,0 +1,24 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+#ifndef _BASE_VENDOR_SPEC_FUNC_H_
+#define _BASE_VENDOR_SPEC_FUNC_H_
+
+int kbase_get_vendor_specific_cpu_clock_speed(u32 * const);
+
+#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h
new file mode 100644
index 00000000000..be3527820fa
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase.h
@@ -0,0 +1,499 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_H_
+#define _KBASE_H_
+
+#include <mali_malisw.h>
+
+#include <mali_kbase_debug.h>
+
+#include <asm/page.h>
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
+#include "mali_base_kernel.h"
+#include <mali_kbase_uku.h>
+#include <mali_kbase_linux.h>
+
+#include "mali_kbase_pm.h"
+#include "mali_kbase_mem_lowlevel.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_trace_timeline.h"
+#include "mali_kbase_js.h"
+#include "mali_kbase_mem.h"
+#include "mali_kbase_security.h"
+#include "mali_kbase_utility.h"
+#include "mali_kbase_gpu_memory_debugfs.h"
+#include "mali_kbase_mem_profile_debugfs.h"
+#include "mali_kbase_jd_debugfs.h"
+#include "mali_kbase_cpuprops.h"
+#include "mali_kbase_gpuprops.h"
+#include "mali_kbase_jm.h"
+#include "mali_kbase_vinstr.h"
+#ifdef CONFIG_GPU_TRACEPOINTS
+#include <trace/events/gpu.h>
+#endif
+/**
+ * @page page_base_kernel_main Kernel-side Base (KBase) APIs
+ *
+ * The Kernel-side Base (KBase) APIs are divided up as follows:
+ * - @subpage page_kbase_js_policy
+ */
+
+/**
+ * @defgroup base_kbase_api Kernel-side Base (KBase) APIs
+ */
+
+struct kbase_device *kbase_device_alloc(void);
+/*
+* note: configuration attributes member of kbdev needs to have
+* been setup before calling kbase_device_init
+*/
+
+/*
+* API to acquire device list semaphone and return pointer
+* to the device list head
+*/
+const struct list_head *kbase_dev_list_get(void);
+/* API to release the device list semaphore */
+void kbase_dev_list_put(const struct list_head *dev_list);
+
+int kbase_device_init(struct kbase_device * const kbdev);
+void kbase_device_term(struct kbase_device *kbdev);
+void kbase_device_free(struct kbase_device *kbdev);
+int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature);
+
+/* Needed for gator integration and for reporting vsync information */
+struct kbase_device *kbase_find_device(int minor);
+void kbase_release_device(struct kbase_device *kbdev);
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value);
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control);
+
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat);
+void kbase_destroy_context(struct kbase_context *kctx);
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags);
+
+int kbase_jd_init(struct kbase_context *kctx);
+void kbase_jd_exit(struct kbase_context *kctx);
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+ const struct kbase_uk_job_submit *submit_data,
+ int uk6_atom);
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+ const struct kbase_uk_job_submit *submit_data);
+#endif
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp,
+ kbasep_js_atom_done_code done_code);
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+void kbase_jd_zap_context(struct kbase_context *kctx);
+bool jd_done_nolock(struct kbase_jd_atom *katom);
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
+bool jd_submit_atom(struct kbase_context *kctx,
+ const struct base_jd_atom_v2 *user_atom,
+ struct kbase_jd_atom *katom);
+
+void kbase_job_done(struct kbase_device *kbdev, u32 done);
+
+/**
+ * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms
+ * and soft stop them
+ * @kctx: Pointer to context to check.
+ * @katom: Pointer to priority atom.
+ *
+ * Atoms from @kctx on the same job slot as @katom, which have lower priority
+ * than @katom will be soft stopped and put back in the queue, so that atoms
+ * with higher priority can run.
+ *
+ * The js_data.runpool_irq.lock must be held when calling this function.
+ */
+void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
+ struct kbase_jd_atom *katom);
+
+void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
+ struct kbase_jd_atom *target_katom);
+void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
+ struct kbase_jd_atom *target_katom, u32 sw_flags);
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+ struct kbase_jd_atom *target_katom);
+void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
+ u16 core_reqs, struct kbase_jd_atom *target_katom);
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+ struct kbase_jd_atom *target_katom);
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event);
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent);
+int kbase_event_pending(struct kbase_context *ctx);
+int kbase_event_init(struct kbase_context *kctx);
+void kbase_event_close(struct kbase_context *kctx);
+void kbase_event_cleanup(struct kbase_context *kctx);
+void kbase_event_wakeup(struct kbase_context *kctx);
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom);
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
+void kbase_finish_soft_job(struct kbase_jd_atom *katom);
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom);
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev);
+
+bool kbase_replay_process(struct kbase_jd_atom *katom);
+
+/* api used internally for register access. Contains validation and tracing */
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value);
+void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size);
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx);
+
+/* api to be ported per OS, only need to do the raw register access */
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value);
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset);
+
+
+void kbasep_as_do_poke(struct work_struct *work);
+
+/** Returns the name associated with a Mali exception code
+ *
+ * This function is called from the interrupt handler when a GPU fault occurs.
+ * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN.
+ *
+ * @param[in] kbdev The kbase device that the GPU fault occurred from.
+ * @param[in] exception_code exception code
+ * @return name associated with the exception code
+ */
+const char *kbase_exception_name(struct kbase_device *kbdev,
+ u32 exception_code);
+
+/**
+ * Check whether a system suspend is in progress, or has already been suspended
+ *
+ * The caller should ensure that either kbdev->pm.active_count_lock is held, or
+ * a dmb was executed recently (to ensure the value is most
+ * up-to-date). However, without a lock the value could change afterwards.
+ *
+ * @return false if a suspend is not in progress
+ * @return !=false otherwise
+ */
+static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
+{
+ return kbdev->pm.suspending;
+}
+
+/**
+ * Return the atom's ID, as was originally supplied by userspace in
+ * base_jd_atom_v2::atom_number
+ */
+static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ int result;
+
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(katom);
+ KBASE_DEBUG_ASSERT(katom->kctx == kctx);
+
+ result = katom - &kctx->jctx.atoms[0];
+ KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT);
+ return result;
+}
+
+/**
+ * kbase_jd_atom_from_id - Return the atom structure for the given atom ID
+ * @kctx: Context pointer
+ * @id: ID of atom to retrieve
+ *
+ * Return: Pointer to struct kbase_jd_atom associated with the supplied ID
+ */
+static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
+ struct kbase_context *kctx, int id)
+{
+ return &kctx->jctx.atoms[id];
+}
+
+/**
+ * Initialize the disjoint state
+ *
+ * The disjoint event count and state are both set to zero.
+ *
+ * Disjoint functions usage:
+ *
+ * The disjoint event count should be incremented whenever a disjoint event occurs.
+ *
+ * There are several cases which are regarded as disjoint behavior. Rather than just increment
+ * the counter during disjoint events we also increment the counter when jobs may be affected
+ * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state.
+ *
+ * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying
+ * (as part of the replay workaround). Increasing the disjoint state also increases the count of
+ * disjoint events.
+ *
+ * The disjoint state is then used to increase the count of disjoint events during job submission
+ * and job completion. Any atom submitted or completed while the disjoint state is greater than
+ * zero is regarded as a disjoint event.
+ *
+ * The disjoint event counter is also incremented immediately whenever a job is soft stopped
+ * and during context creation.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_init(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events
+ * called when a disjoint event has happened
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event(struct kbase_device *kbdev);
+
+/**
+ * Increase the count of disjoint events only if the GPU is in a disjoint state
+ *
+ * This should be called when something happens which could be disjoint if the GPU
+ * is in a disjoint state. The state refcount keeps track of this.
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev);
+
+/**
+ * Returns the count of disjoint events
+ *
+ * @param kbdev The kbase device
+ * @return the count of disjoint events
+ */
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
+
+/**
+ * Increment the refcount state indicating that the GPU is in a disjoint state.
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
+ * should be called
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_up(struct kbase_device *kbdev);
+
+/**
+ * Decrement the refcount state
+ *
+ * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
+ *
+ * Called after @ref kbase_disjoint_state_up once the disjoint state is over
+ *
+ * @param kbdev The kbase device
+ */
+void kbase_disjoint_state_down(struct kbase_device *kbdev);
+
+/**
+ * If a job is soft stopped and the number of contexts is >= this value
+ * it is reported as a disjoint event
+ */
+#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
+
+#if KBASE_TRACE_ENABLE
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev);
+
+#ifndef CONFIG_MALI_SYSTEM_TRACE
+/** Add trace values about a job-slot
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \
+ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+ KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0)
+
+/** Add trace values about a job-slot, with info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \
+ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+ KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val)
+
+/** Add trace values about a ctx refcount
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \
+ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+ KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0)
+/** Add trace values about a ctx refcount, and info
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \
+ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+ KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val)
+
+/** Add trace values (no slot or refcount)
+ *
+ * @note Any functions called through this macro will still be evaluated in
+ * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any
+ * functions called to get the parameters supplied to this macro must:
+ * - be static or static inline
+ * - must just return 0 and have no other statements present in the body.
+ */
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val) \
+ kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \
+ 0, 0, 0, info_val)
+
+/** Clear the trace */
+#define KBASE_TRACE_CLEAR(kbdev) \
+ kbasep_trace_clear(kbdev)
+
+/** Dump the slot trace */
+#define KBASE_TRACE_DUMP(kbdev) \
+ kbasep_trace_dump(kbdev)
+
+/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val);
+/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */
+void kbasep_trace_clear(struct kbase_device *kbdev);
+#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+/* Dispatch kbase trace events as system trace events */
+#include <mali_linux_kbase_trace.h>
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+ trace_mali_##code(jobslot, 0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+ trace_mali_##code(jobslot, info_val)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+ trace_mali_##code(refcount, 0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+ trace_mali_##code(refcount, info_val)
+
+#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\
+ trace_mali_##code(gpu_addr, info_val)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+ do {\
+ CSTD_UNUSED(kbdev);\
+ CSTD_NOP(0);\
+ } while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+ do {\
+ CSTD_UNUSED(kbdev);\
+ CSTD_NOP(0);\
+ } while (0)
+
+#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */
+#else
+#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\
+ do {\
+ CSTD_UNUSED(kbdev);\
+ CSTD_NOP(code);\
+ CSTD_UNUSED(ctx);\
+ CSTD_UNUSED(katom);\
+ CSTD_UNUSED(gpu_addr);\
+ CSTD_UNUSED(jobslot);\
+ } while (0)
+
+#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\
+ do {\
+ CSTD_UNUSED(kbdev);\
+ CSTD_NOP(code);\
+ CSTD_UNUSED(ctx);\
+ CSTD_UNUSED(katom);\
+ CSTD_UNUSED(gpu_addr);\
+ CSTD_UNUSED(jobslot);\
+ CSTD_UNUSED(info_val);\
+ CSTD_NOP(0);\
+ } while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\
+ do {\
+ CSTD_UNUSED(kbdev);\
+ CSTD_NOP(code);\
+ CSTD_UNUSED(ctx);\
+ CSTD_UNUSED(katom);\
+ CSTD_UNUSED(gpu_addr);\
+ CSTD_UNUSED(refcount);\
+ CSTD_NOP(0);\
+ } while (0)
+
+#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\
+ do {\
+ CSTD_UNUSED(kbdev);\
+ CSTD_NOP(code);\
+ CSTD_UNUSED(ctx);\
+ CSTD_UNUSED(katom);\
+ CSTD_UNUSED(gpu_addr);\
+ CSTD_UNUSED(info_val);\
+ CSTD_NOP(0);\
+ } while (0)
+
+#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\
+ do {\
+ CSTD_UNUSED(kbdev);\
+ CSTD_NOP(code);\
+ CSTD_UNUSED(subcode);\
+ CSTD_UNUSED(ctx);\
+ CSTD_UNUSED(katom);\
+ CSTD_UNUSED(val);\
+ CSTD_NOP(0);\
+ } while (0)
+
+#define KBASE_TRACE_CLEAR(kbdev)\
+ do {\
+ CSTD_UNUSED(kbdev);\
+ CSTD_NOP(0);\
+ } while (0)
+#define KBASE_TRACE_DUMP(kbdev)\
+ do {\
+ CSTD_UNUSED(kbdev);\
+ CSTD_NOP(0);\
+ } while (0)
+#endif /* KBASE_TRACE_ENABLE */
+/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */
+void kbasep_trace_dump(struct kbase_device *kbdev);
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
new file mode 100644
index 00000000000..933aa285469
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c
@@ -0,0 +1,209 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_kbase_10969_workaround.h>
+
+/* This function is used to solve an HW issue with single iterator GPUs.
+ * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the
+ * restart index is out of bounds and the rerun causes a tile range fault. If this happens
+ * we try to clamp the restart index to a correct value and rerun the job.
+ */
+/* Mask of X and Y coordinates for the coordinates words in the descriptors*/
+#define X_COORDINATE_MASK 0x00000FFF
+#define Y_COORDINATE_MASK 0x0FFF0000
+/* Max number of words needed from the fragment shader job descriptor */
+#define JOB_HEADER_SIZE_IN_WORDS 10
+#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32))
+
+/* Word 0: Status Word */
+#define JOB_DESC_STATUS_WORD 0
+/* Word 1: Restart Index */
+#define JOB_DESC_RESTART_INDEX_WORD 1
+/* Word 2: Fault address low word */
+#define JOB_DESC_FAULT_ADDR_LOW_WORD 2
+/* Word 8: Minimum Tile Coordinates */
+#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8
+/* Word 9: Maximum Tile Coordinates */
+#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom)
+{
+ struct device *dev = katom->kctx->kbdev->dev;
+ u32 clamped = 0;
+ struct kbase_va_region *region;
+ phys_addr_t *page_array;
+ u64 page_index;
+ u32 offset = katom->jc & (~PAGE_MASK);
+ u32 *page_1 = NULL;
+ u32 *page_2 = NULL;
+ u32 job_header[JOB_HEADER_SIZE_IN_WORDS];
+ void *dst = job_header;
+ u32 minX, minY, maxX, maxY;
+ u32 restartX, restartY;
+ struct page *p;
+ u32 copy_size;
+
+ dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n");
+ if (!(katom->core_req & BASE_JD_REQ_FS))
+ return 0;
+
+ kbase_gpu_vm_lock(katom->kctx);
+ region = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+ katom->jc);
+ if (!region || (region->flags & KBASE_REG_FREE))
+ goto out_unlock;
+
+ page_array = kbase_get_cpu_phy_pages(region);
+ if (!page_array)
+ goto out_unlock;
+
+ page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn;
+
+ p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+ /* we need the first 10 words of the fragment shader job descriptor.
+ * We need to check that the offset + 10 words is less that the page
+ * size otherwise we need to load the next page.
+ * page_size_overflow will be equal to 0 in case the whole descriptor
+ * is within the page > 0 otherwise.
+ */
+ copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE);
+
+ page_1 = kmap_atomic(p);
+
+ /* page_1 is a u32 pointer, offset is expressed in bytes */
+ page_1 += offset>>2;
+
+ kbase_sync_single_for_cpu(katom->kctx->kbdev,
+ kbase_dma_addr(p) + offset,
+ copy_size, DMA_BIDIRECTIONAL);
+
+ memcpy(dst, page_1, copy_size);
+
+ /* The data needed overflows page the dimension,
+ * need to map the subsequent page */
+ if (copy_size < JOB_HEADER_SIZE) {
+ p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+ page_2 = kmap_atomic(p);
+
+ kbase_sync_single_for_cpu(katom->kctx->kbdev,
+ kbase_dma_addr(p),
+ JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL);
+
+ memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size);
+ }
+
+ /* We managed to correctly map one or two pages (in case of overflow) */
+ /* Get Bounding Box data and restart index from fault address low word */
+ minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK;
+ minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+ maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK;
+ maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK;
+ restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK;
+ restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK;
+
+ dev_warn(dev, "Before Clamping:\n"
+ "Jobstatus: %08x\n"
+ "restartIdx: %08x\n"
+ "Fault_addr_low: %08x\n"
+ "minCoordsX: %08x minCoordsY: %08x\n"
+ "maxCoordsX: %08x maxCoordsY: %08x\n",
+ job_header[JOB_DESC_STATUS_WORD],
+ job_header[JOB_DESC_RESTART_INDEX_WORD],
+ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+ minX, minY,
+ maxX, maxY);
+
+ /* Set the restart index to the one which generated the fault*/
+ job_header[JOB_DESC_RESTART_INDEX_WORD] =
+ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD];
+
+ if (restartX < minX) {
+ job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY;
+ dev_warn(dev,
+ "Clamping restart X index to minimum. %08x clamped to %08x\n",
+ restartX, minX);
+ clamped = 1;
+ }
+ if (restartY < minY) {
+ job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX;
+ dev_warn(dev,
+ "Clamping restart Y index to minimum. %08x clamped to %08x\n",
+ restartY, minY);
+ clamped = 1;
+ }
+ if (restartX > maxX) {
+ job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY;
+ dev_warn(dev,
+ "Clamping restart X index to maximum. %08x clamped to %08x\n",
+ restartX, maxX);
+ clamped = 1;
+ }
+ if (restartY > maxY) {
+ job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX;
+ dev_warn(dev,
+ "Clamping restart Y index to maximum. %08x clamped to %08x\n",
+ restartY, maxY);
+ clamped = 1;
+ }
+
+ if (clamped) {
+ /* Reset the fault address low word
+ * and set the job status to STOPPED */
+ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0;
+ job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED;
+ dev_warn(dev, "After Clamping:\n"
+ "Jobstatus: %08x\n"
+ "restartIdx: %08x\n"
+ "Fault_addr_low: %08x\n"
+ "minCoordsX: %08x minCoordsY: %08x\n"
+ "maxCoordsX: %08x maxCoordsY: %08x\n",
+ job_header[JOB_DESC_STATUS_WORD],
+ job_header[JOB_DESC_RESTART_INDEX_WORD],
+ job_header[JOB_DESC_FAULT_ADDR_LOW_WORD],
+ minX, minY,
+ maxX, maxY);
+
+ /* Flush CPU cache to update memory for future GPU reads*/
+ memcpy(page_1, dst, copy_size);
+ p = pfn_to_page(PFN_DOWN(page_array[page_index]));
+
+ kbase_sync_single_for_device(katom->kctx->kbdev,
+ kbase_dma_addr(p) + offset,
+ copy_size, DMA_TO_DEVICE);
+
+ if (copy_size < JOB_HEADER_SIZE) {
+ memcpy(page_2, dst + copy_size,
+ JOB_HEADER_SIZE - copy_size);
+ p = pfn_to_page(PFN_DOWN(page_array[page_index + 1]));
+
+ kbase_sync_single_for_device(katom->kctx->kbdev,
+ kbase_dma_addr(p),
+ JOB_HEADER_SIZE - copy_size,
+ DMA_TO_DEVICE);
+ }
+ }
+ if (copy_size < JOB_HEADER_SIZE)
+ kunmap_atomic(page_2);
+
+ kunmap_atomic(page_1);
+
+out_unlock:
+ kbase_gpu_vm_unlock(katom->kctx);
+ return clamped;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
new file mode 100644
index 00000000000..099a2986167
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_10969_WORKAROUND_
+#define _KBASE_10969_WORKAROUND_
+
+int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom);
+
+#endif /* _KBASE_10969_WORKAROUND_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
new file mode 100644
index 00000000000..51f934404e5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c
@@ -0,0 +1,61 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#include "mali_kbase_cache_policy.h"
+
+/*
+ * The output flags should be a combination of the following values:
+ * KBASE_REG_CPU_CACHED: CPU cache should be enabled.
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages)
+{
+ u32 cache_flags = 0;
+
+ CSTD_UNUSED(nr_pages);
+
+#ifdef CONFIG_MALI_CACHE_COHERENT
+ /* Cache is completely coherent at hardware level. So always allocate
+ * cached memory.
+ */
+ cache_flags |= KBASE_REG_CPU_CACHED;
+#else
+ if (flags & BASE_MEM_CACHED_CPU)
+ cache_flags |= KBASE_REG_CPU_CACHED;
+#endif /* (CONFIG_MALI_CACHE_COHERENT) */
+
+ return cache_flags;
+}
+
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir)
+{
+ dma_sync_single_for_device(kbdev->dev, handle, size, dir);
+}
+
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir)
+{
+ dma_sync_single_for_cpu(kbdev->dev, handle, size, dir);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
new file mode 100644
index 00000000000..d2487fdf21d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Cache Policy API.
+ */
+
+#ifndef _KBASE_CACHE_POLICY_H_
+#define _KBASE_CACHE_POLICY_H_
+
+#include "mali_kbase.h"
+#include "mali_base_kernel.h"
+#include "mali_kbase_device_internal.h"
+/**
+ * kbase_cache_enabled - Choose the cache policy for a specific region
+ * @flags: flags describing attributes of the region
+ * @nr_pages: total number of pages (backed or not) for the region
+ *
+ * Tells whether the CPU and GPU caches should be enabled or not for a specific
+ * region.
+ * This function can be modified to customize the cache policy depending on the
+ * flags and size of the region.
+ *
+ * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED
+ * depending on the cache policy
+ */
+u32 kbase_cache_enabled(u32 flags, u32 nr_pages);
+
+#endif /* _KBASE_CACHE_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.c b/drivers/gpu/arm/midgard/mali_kbase_config.c
new file mode 100644
index 00000000000..fb615ae02ea
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config.c
@@ -0,0 +1,51 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+int kbasep_platform_device_init(struct kbase_device *kbdev)
+{
+ struct kbase_platform_funcs_conf *platform_funcs_p;
+
+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+ if (platform_funcs_p && platform_funcs_p->platform_init_func)
+ return platform_funcs_p->platform_init_func(kbdev);
+
+ return 0;
+}
+
+void kbasep_platform_device_term(struct kbase_device *kbdev)
+{
+ struct kbase_platform_funcs_conf *platform_funcs_p;
+
+ platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS;
+ if (platform_funcs_p && platform_funcs_p->platform_term_func)
+ platform_funcs_p->platform_term_func(kbdev);
+}
+
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed)
+{
+ KBASE_DEBUG_ASSERT(NULL != clock_speed);
+
+ *clock_speed = 100;
+ return 0;
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h
new file mode 100644
index 00000000000..5fb226dfd68
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config.h
@@ -0,0 +1,327 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_config.h
+ * Configuration API and Attributes for KBase
+ */
+
+#ifndef _KBASE_CONFIG_H_
+#define _KBASE_CONFIG_H_
+
+#include <asm/page.h>
+
+#include <mali_malisw.h>
+#include <mali_kbase_backend_config.h>
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_config Configuration API and Attributes
+ * @{
+ */
+
+#if !MALI_CUSTOMER_RELEASE
+/* This flag is set for internal builds so we can run tests without credentials. */
+#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1
+#else
+#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0
+#endif
+
+#include <linux/rbtree.h>
+
+/* Forward declaration of struct kbase_device */
+struct kbase_device;
+
+/**
+ * kbase_platform_funcs_conf - Specifies platform init/term function pointers
+ *
+ * Specifies the functions pointers for platform specific initialization and
+ * termination. By default no functions are required. No additional platform
+ * specific control is necessary.
+ */
+struct kbase_platform_funcs_conf {
+ /**
+ * platform_init_func - platform specific init function pointer
+ * @kbdev - kbase_device pointer
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Function pointer for platform specific initialization or NULL if no
+ * initialization function is required. This function will be called
+ * before any other callbacks listed in the struct kbase_attribute
+ * struct (such as Power Management callbacks).
+ *
+ * The platform specific private pointer kbase_device::platform_context
+ * can be accessed (and possibly initialized) in here.
+ */
+ int (*platform_init_func)(struct kbase_device *kbdev);
+ /**
+ * platform_term_func - platform specific termination function pointer
+ * @kbdev - kbase_device pointer
+ *
+ * Function pointer for platform specific termination or NULL if no
+ * termination function is required. This function will be called
+ * after any other callbacks listed in the struct kbase_attribute struct
+ * (such as Power Management callbacks).
+ *
+ * The platform specific private pointer kbase_device::platform_context
+ * can be accessed (and possibly terminated) in here.
+ */
+ void (*platform_term_func)(struct kbase_device *kbdev);
+};
+
+/*
+ * @brief Specifies the callbacks for power management
+ *
+ * By default no callbacks will be made and the GPU must not be powered off.
+ */
+struct kbase_pm_callback_conf {
+ /** Callback for when the GPU is idle and the power to it can be switched off.
+ *
+ * The system integrator can decide whether to either do nothing, just switch off
+ * the clocks to the GPU, or to completely power down the GPU.
+ * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+ * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+ */
+ void (*power_off_callback)(struct kbase_device *kbdev);
+
+ /** Callback for when the GPU is about to become active and power must be supplied.
+ *
+ * This function must not return until the GPU is powered and clocked sufficiently for register access to
+ * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback.
+ * If the GPU state has been lost then this function must return 1, otherwise it should return 0.
+ * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the
+ * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf).
+ *
+ * The return value of the first call to this function is ignored.
+ *
+ * @return 1 if the GPU state may have been lost, 0 otherwise.
+ */
+ int (*power_on_callback)(struct kbase_device *kbdev);
+
+ /** Callback for when the system is requesting a suspend and GPU power
+ * must be switched off.
+ *
+ * Note that if this callback is present, then this may be called
+ * without a preceding call to power_off_callback. Therefore this
+ * callback must be able to take any action that might otherwise happen
+ * in power_off_callback.
+ *
+ * The platform specific private pointer kbase_device::platform_context
+ * can be accessed and modified in here. It is the platform \em
+ * callbacks responsibility to initialize and terminate this pointer if
+ * used (see @ref kbase_platform_funcs_conf).
+ */
+ void (*power_suspend_callback)(struct kbase_device *kbdev);
+
+ /** Callback for when the system is resuming from a suspend and GPU
+ * power must be switched on.
+ *
+ * Note that if this callback is present, then this may be called
+ * without a following call to power_on_callback. Therefore this
+ * callback must be able to take any action that might otherwise happen
+ * in power_on_callback.
+ *
+ * The platform specific private pointer kbase_device::platform_context
+ * can be accessed and modified in here. It is the platform \em
+ * callbacks responsibility to initialize and terminate this pointer if
+ * used (see @ref kbase_platform_funcs_conf).
+ */
+ void (*power_resume_callback)(struct kbase_device *kbdev);
+
+ /** Callback for handling runtime power management initialization.
+ *
+ * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+ * will become active from calls made to the OS from within this function.
+ * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback.
+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+ *
+ * @return 0 on success, else int erro code.
+ */
+ int (*power_runtime_init_callback)(struct kbase_device *kbdev);
+
+ /** Callback for handling runtime power management termination.
+ *
+ * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback
+ * should no longer be called by the OS on completion of this function.
+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+ */
+ void (*power_runtime_term_callback)(struct kbase_device *kbdev);
+
+ /** Callback for runtime power-off power management callback
+ *
+ * For linux this callback will be called by the kernel runtime_suspend callback.
+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+ *
+ * @return 0 on success, else OS error code.
+ */
+ void (*power_runtime_off_callback)(struct kbase_device *kbdev);
+
+ /** Callback for runtime power-on power management callback
+ *
+ * For linux this callback will be called by the kernel runtime_resume callback.
+ * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+ */
+ int (*power_runtime_on_callback)(struct kbase_device *kbdev);
+};
+
+/**
+ * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC
+ * @clock_speed - see kbase_cpu_clk_speed_func for details on the parameters
+ *
+ * Returns 0 on success, negative error code otherwise.
+ *
+ * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed
+ * to 100, so will be an underestimate for any real system.
+ */
+int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed);
+
+/**
+ * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current CPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ *
+ * This is mainly used to implement OpenCL's clGetDeviceInfo().
+ */
+typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed);
+
+/**
+ * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC
+ * @param clock_speed - pointer to store the current GPU clock speed in MHz
+ *
+ * Returns 0 on success, otherwise negative error code.
+ * When an error is returned the caller assumes maximum GPU speed stored in
+ * gpu_freq_khz_max.
+ *
+ * If the system timer is not available then this function is required
+ * for the OpenCL queue profiling to return correct timing information.
+ *
+ */
+typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed);
+
+#ifdef CONFIG_OF
+struct kbase_platform_config {
+};
+#else
+
+/*
+ * @brief Specifies start and end of I/O memory region.
+ */
+struct kbase_io_memory_region {
+ u64 start;
+ u64 end;
+};
+
+/*
+ * @brief Specifies I/O related resources like IRQs and memory region for I/O operations.
+ */
+struct kbase_io_resources {
+ u32 job_irq_number;
+ u32 mmu_irq_number;
+ u32 gpu_irq_number;
+ struct kbase_io_memory_region io_memory_region;
+};
+
+struct kbase_platform_config {
+ const struct kbase_io_resources *io_resources;
+};
+
+#endif /* CONFIG_OF */
+
+/**
+ * @brief Gets the pointer to platform config.
+ *
+ * @return Pointer to the platform config
+ */
+struct kbase_platform_config *kbase_get_platform_config(void);
+
+/**
+ * kbasep_platform_device_init: - Platform specific call to initialize hardware
+ * @kbdev: kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can initialize any hardware and context state that
+ * is required for the GPU block to function.
+ *
+ * Return: 0 if no errors have been found in the config.
+ * Negative error code otherwise.
+ */
+int kbasep_platform_device_init(struct kbase_device *kbdev);
+
+/**
+ * kbasep_platform_device_term - Platform specific call to terminate hardware
+ * @kbdev: Kbase device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration
+ * attributes. The routine can destroy any platform specific context state and
+ * shut down any hardware functionality that are outside of the Power Management
+ * callbacks.
+ *
+ */
+void kbasep_platform_device_term(struct kbase_device *kbdev);
+
+
+/**
+ * kbase_platform_early_init - Early initialisation of the platform code
+ *
+ * This function will be called when the module is loaded to perform any
+ * early initialisation required by the platform code. Such as reading
+ * platform specific device tree entries for the GPU.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_early_init(void);
+
+#ifndef CONFIG_OF
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+/**
+ * kbase_platform_fake_register - Register a platform device for the GPU
+ *
+ * This can be used to register a platform device on systems where device tree
+ * is not enabled and the platform initialisation code in the kernel doesn't
+ * create the GPU device. Where possible device tree should be used instead.
+ *
+ * Return: 0 for success, any other fail causes module initialisation to fail
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Unregister a fake platform device
+ *
+ * Unregister the platform device created with kbase_platform_fake_register()
+ */
+void kbase_platform_fake_unregister(void);
+#endif
+#endif
+
+ /** @} *//* end group kbase_config */
+ /** @} *//* end group base_kbase_api */
+ /** @} *//* end group base_api */
+
+#endif /* _KBASE_CONFIG_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
new file mode 100644
index 00000000000..ce5d0703911
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h
@@ -0,0 +1,259 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_config_defaults.h
+ *
+ * Default values for configuration settings
+ *
+ */
+
+#ifndef _KBASE_CONFIG_DEFAULTS_H_
+#define _KBASE_CONFIG_DEFAULTS_H_
+
+/* Include mandatory definitions per platform */
+#include <mali_kbase_config_platform.h>
+
+/**
+ * Irq throttle. It is the minimum desired time in between two
+ * consecutive gpu interrupts (given in 'us'). The irq throttle
+ * gpu register will be configured after this, taking into
+ * account the configured max frequency.
+ *
+ * Attached value: number in micro seconds
+ */
+#define DEFAULT_IRQ_THROTTLE_TIME_US 20
+
+/**
+ * Default Job Scheduler initial runtime of a context for the CFS Policy,
+ * in time-slices.
+ *
+ * This value is relative to that of the least-run context, and defines
+ * where in the CFS queue a new context is added. A value of 1 means 'after
+ * the least-run context has used its timeslice'. Therefore, when all
+ * contexts consistently use the same amount of time, a value of 1 models a
+ * FIFO. A value of 0 would model a LIFO.
+ *
+ * The value is represented in "numbers of time slices". Multiply this
+ * value by that defined in @ref DEFAULT_JS_CTX_TIMESLICE_NS to get
+ * the time value for this in nanoseconds.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1
+
+/**
+ * Default Job Scheduler minimum runtime value of a context for CFS, in
+ * time_slices relative to that of the least-run context.
+ *
+ * This is a measure of how much preferrential treatment is given to a
+ * context that is not run very often.
+ *
+ * Specficially, this value defines how many timeslices such a context is
+ * (initially) allowed to use at once. Such contexts (e.g. 'interactive'
+ * processes) will appear near the front of the CFS queue, and can initially
+ * use more time than contexts that run continuously (e.g. 'batch'
+ * processes).
+ *
+ * This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx
+ * not run for a long time attacks the system by using a very large initial
+ * number of timeslices when it finally does run.
+ *
+ * @note A value of zero allows not-run-often contexts to get scheduled in
+ * quickly, but to only use a single timeslice when they get scheduled in.
+ */
+#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2
+
+/**
+* Boolean indicating whether the driver is configured to be secure at
+* a potential loss of performance.
+*
+* This currently affects only r0p0-15dev0 HW and earlier.
+*
+* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and
+* performance:
+*
+* - When this is set to true, the driver remains fully secure,
+* but potentially loses performance compared with setting this to
+* false.
+* - When set to false, the driver is open to certain security
+* attacks.
+*
+* From r0p0-00rel0 and onwards, there is no security loss by setting
+* this to false, and no performance loss by setting it to
+* true.
+*/
+#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false
+
+enum {
+ /**
+ * Use unrestricted Address ID width on the AXI bus.
+ */
+ KBASE_AID_32 = 0x0,
+
+ /**
+ * Restrict GPU to a half of maximum Address ID count.
+ * This will reduce performance, but reduce bus load due to GPU.
+ */
+ KBASE_AID_16 = 0x3,
+
+ /**
+ * Restrict GPU to a quarter of maximum Address ID count.
+ * This will reduce performance, but reduce bus load due to GPU.
+ */
+ KBASE_AID_8 = 0x2,
+
+ /**
+ * Restrict GPU to an eighth of maximum Address ID count.
+ * This will reduce performance, but reduce bus load due to GPU.
+ */
+ KBASE_AID_4 = 0x1
+};
+
+/**
+ * Default setting for read Address ID limiting on AXI bus.
+ *
+ * Attached value: u32 register value
+ * KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ * KBASE_AID_16 - use 16 IDs (4 ID bits)
+ * KBASE_AID_8 - use 8 IDs (3 ID bits)
+ * KBASE_AID_4 - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_ARID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for write Address ID limiting on AXI.
+ *
+ * Attached value: u32 register value
+ * KBASE_AID_32 - use the full 32 IDs (5 ID bits)
+ * KBASE_AID_16 - use 16 IDs (4 ID bits)
+ * KBASE_AID_8 - use 8 IDs (3 ID bits)
+ * KBASE_AID_4 - use 4 IDs (2 ID bits)
+ * Default value: KBASE_AID_32 (no limit). Note hardware implementation
+ * may limit to a lower value.
+ */
+#define DEFAULT_AWID_LIMIT KBASE_AID_32
+
+/**
+ * Default setting for using alternative hardware counters.
+ */
+#define DEFAULT_ALTERNATIVE_HWC false
+
+/**
+ * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which
+ * defines which UMP device this GPU should be mapped to.
+ */
+#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT
+
+/*
+ * Default period for DVFS sampling
+ */
+#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+
+/*
+ * Power Management poweroff tick granuality. This is in nanoseconds to
+ * allow HR timer support.
+ *
+ * On each scheduling tick, the power manager core may decide to:
+ * -# Power off one or more shader cores
+ * -# Power off the entire GPU
+ */
+#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+
+/*
+ * Power Manager number of ticks before shader cores are powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+
+/*
+ * Power Manager number of ticks before GPU is powered off
+ */
+#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */
+
+/*
+ * Default scheduling tick granuality
+ */
+#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are soft-stopped.
+ *
+ * This defines the time-slice for a job (which may be different from that of a
+ * context)
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
+ */
+#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */
+#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408 (300) /* 30s */
+
+/*
+ * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */
+
+/*
+ * Default minimum number of scheduling ticks before jobs are hard-stopped
+ * during dumping
+ */
+#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job
+ */
+#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */
+#define DEFAULT_JS_RESET_TICKS_SS_8408 (450) /* 45s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" CL job.
+ */
+#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */
+
+/*
+ * Default minimum number of scheduling ticks before the GPU is reset to clear a
+ * "stuck" job during dumping.
+ */
+#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */
+
+/*
+ * Default number of milliseconds given for other jobs on the GPU to be
+ * soft-stopped when the GPU needs to be reset.
+ */
+#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
+
+/*
+ * Default timeslice that a context is scheduled in for, in nanoseconds.
+ *
+ * When a context has used up this amount of time across its jobs, it is
+ * scheduled out to let another run.
+ *
+ * @note the resolution is nanoseconds (ns) here, because that's the format
+ * often used by the OS.
+ */
+#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
+
+#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c
new file mode 100644
index 00000000000..e066b286c51
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_context.c
@@ -0,0 +1,274 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel context APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_instr.h>
+
+#define MEMPOOL_PAGES 16384
+
+
+/**
+ * kbase_create_context() - Create a kernel base context.
+ * @kbdev: Kbase device
+ * @is_compat: Force creation of a 32-bit context
+ *
+ * Allocate and init a kernel base context.
+ *
+ * Return: new kbase context
+ */
+struct kbase_context *
+kbase_create_context(struct kbase_device *kbdev, bool is_compat)
+{
+ struct kbase_context *kctx;
+ int mali_err;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ /* zero-inited as lot of code assume it's zero'ed out on create */
+ kctx = vzalloc(sizeof(*kctx));
+
+ if (!kctx)
+ goto out;
+
+ /* creating a context is considered a disjoint event */
+ kbase_disjoint_event(kbdev);
+
+ kctx->kbdev = kbdev;
+ kctx->as_nr = KBASEP_AS_NR_INVALID;
+ kctx->is_compat = is_compat;
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+ kctx->timeline.owner_tgid = task_tgid_nr(current);
+#endif
+ atomic_set(&kctx->setup_complete, 0);
+ atomic_set(&kctx->setup_in_progress, 0);
+ kctx->infinite_cache_active = 0;
+ spin_lock_init(&kctx->mm_update_lock);
+ kctx->process_mm = NULL;
+ atomic_set(&kctx->nonmapped_pages, 0);
+ kctx->slots_pullable = 0;
+
+ if (kbase_mem_allocator_init(&kctx->osalloc, MEMPOOL_PAGES, kctx->kbdev) != 0)
+ goto free_kctx;
+
+ kctx->pgd_allocator = &kctx->osalloc;
+ atomic_set(&kctx->used_pages, 0);
+
+ if (kbase_jd_init(kctx))
+ goto free_allocator;
+
+ mali_err = kbasep_js_kctx_init(kctx);
+ if (mali_err)
+ goto free_jd; /* safe to call kbasep_js_kctx_term in this case */
+
+ mali_err = kbase_event_init(kctx);
+ if (mali_err)
+ goto free_jd;
+
+ mutex_init(&kctx->reg_lock);
+
+ INIT_LIST_HEAD(&kctx->waiting_soft_jobs);
+#ifdef CONFIG_KDS
+ INIT_LIST_HEAD(&kctx->waiting_kds_resource);
+#endif
+
+ mali_err = kbase_mmu_init(kctx);
+ if (mali_err)
+ goto free_event;
+
+ kctx->pgd = kbase_mmu_alloc_pgd(kctx);
+ if (!kctx->pgd)
+ goto free_mmu;
+
+ if (kbase_mem_allocator_alloc(&kctx->osalloc, 1, &kctx->aliasing_sink_page) != 0)
+ goto no_sink_page;
+
+ kctx->tgid = current->tgid;
+ kctx->pid = current->pid;
+ init_waitqueue_head(&kctx->event_queue);
+
+ kctx->cookies = KBASE_COOKIE_MASK;
+
+ /* Make sure page 0 is not used... */
+ if (kbase_region_tracker_init(kctx))
+ goto no_region_tracker;
+#ifdef CONFIG_GPU_TRACEPOINTS
+ atomic_set(&kctx->jctx.work_id, 0);
+#endif
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+ atomic_set(&kctx->timeline.jd_atoms_in_flight, 0);
+#endif
+
+ kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1;
+
+ mutex_init(&kctx->vinstr_cli_lock);
+
+ return kctx;
+
+no_region_tracker:
+no_sink_page:
+ kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0);
+ /* VM lock needed for the call to kbase_mmu_free_pgd */
+ kbase_gpu_vm_lock(kctx);
+ kbase_mmu_free_pgd(kctx);
+ kbase_gpu_vm_unlock(kctx);
+free_mmu:
+ kbase_mmu_term(kctx);
+free_event:
+ kbase_event_cleanup(kctx);
+free_jd:
+ /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+ kbasep_js_kctx_term(kctx);
+ kbase_jd_exit(kctx);
+free_allocator:
+ kbase_mem_allocator_term(&kctx->osalloc);
+free_kctx:
+ vfree(kctx);
+out:
+ return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_create_context);
+
+static void kbase_reg_pending_dtor(struct kbase_va_region *reg)
+{
+ dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n");
+ kbase_mem_phy_alloc_put(reg->cpu_alloc);
+ kbase_mem_phy_alloc_put(reg->gpu_alloc);
+ kfree(reg);
+}
+
+/**
+ * kbase_destroy_context - Destroy a kernel base context.
+ * @kctx: Context to destroy
+ *
+ * Calls kbase_destroy_os_context() to free OS specific structures.
+ * Will release all outstanding regions.
+ */
+void kbase_destroy_context(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev;
+ int pages;
+ unsigned long pending_regions_to_clean;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+
+ kbdev = kctx->kbdev;
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+
+ KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u);
+
+ /* Ensure the core is powered up for the destroy process */
+ /* A suspend won't happen here, because we're in a syscall from a userspace
+ * thread. */
+ kbase_pm_context_active(kbdev);
+
+ kbase_jd_zap_context(kctx);
+ kbase_event_cleanup(kctx);
+
+ kbase_gpu_vm_lock(kctx);
+
+ /* MMU is disabled as part of scheduling out the context */
+ kbase_mmu_free_pgd(kctx);
+
+ /* drop the aliasing sink page now that it can't be mapped anymore */
+ kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0);
+
+ /* free pending region setups */
+ pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK;
+ while (pending_regions_to_clean) {
+ unsigned int cookie = __ffs(pending_regions_to_clean);
+
+ BUG_ON(!kctx->pending_regions[cookie]);
+
+ kbase_reg_pending_dtor(kctx->pending_regions[cookie]);
+
+ kctx->pending_regions[cookie] = NULL;
+ pending_regions_to_clean &= ~(1UL << cookie);
+ }
+
+ kbase_region_tracker_term(kctx);
+ kbase_gpu_vm_unlock(kctx);
+
+ /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */
+ kbasep_js_kctx_term(kctx);
+
+ kbase_jd_exit(kctx);
+
+ kbase_pm_context_idle(kbdev);
+
+ kbase_mmu_term(kctx);
+
+ pages = atomic_read(&kctx->used_pages);
+ if (pages != 0)
+ dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+
+ kbase_mem_allocator_term(&kctx->osalloc);
+ WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
+
+ vfree(kctx);
+}
+KBASE_EXPORT_SYMBOL(kbase_destroy_context);
+
+/**
+ * kbase_context_set_create_flags - Set creation flags on a context
+ * @kctx: Kbase context
+ * @flags: Flags to set
+ *
+ * Return: 0 on success
+ */
+int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags)
+{
+ int err = 0;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ unsigned long irq_flags;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ /* Validate flags */
+ if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+
+ /* Translate the flags */
+ if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0)
+ js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED);
+
+ if ((flags & BASE_CONTEXT_HINT_ONLY_COMPUTE) != 0)
+ js_kctx_info->ctx.flags |= (u32) KBASE_CTX_FLAG_HINT_ONLY_COMPUTE;
+
+ /* Latch the initial attributes into the Job Scheduler */
+ kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx);
+
+ spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock,
+ irq_flags);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ out:
+ return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
new file mode 100644
index 00000000000..8cffa55fa98
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -0,0 +1,3930 @@
+
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_uku.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_instr.h>
+#include <mali_kbase_gator.h>
+#include <backend/gpu/mali_kbase_js_affinity.h>
+#include <mali_kbase_mem_linux.h>
+#ifdef CONFIG_MALI_DEVFREQ
+#include <backend/gpu/mali_kbase_devfreq.h>
+#endif /* CONFIG_MALI_DEVFREQ */
+#include <mali_kbase_cpuprops.h>
+#ifdef CONFIG_MALI_NO_MALI
+#include "mali_kbase_model_linux.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "mali_kbase_mem_profile_debugfs_buf_size.h"
+#include "mali_kbase_debug_mem_view.h"
+#include <mali_kbase_hwaccess_backend.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#endif /* CONFIG_KDS */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/compat.h> /* is_compat_task */
+#include <linux/version.h>
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+#include <linux/pm_runtime.h>
+#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */
+#include <mali_kbase_hw.h>
+#include <platform/mali_kbase_platform_common.h>
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+#include <platform/mali_kbase_platform_fake.h>
+#endif /*CONFIG_MALI_PLATFORM_FAKE */
+#ifdef CONFIG_SYNC
+#include <mali_kbase_sync.h>
+#endif /* CONFIG_SYNC */
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_PM_DEVFREQ */
+#include <linux/clk.h>
+
+#include <mali_kbase_config.h>
+
+#ifdef CONFIG_MACH_MANTA
+#include <plat/devs.h>
+#endif
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0))
+#include <linux/pm_opp.h>
+#else
+#include <linux/opp.h>
+#endif
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+
+/* GPU IRQ Tags */
+#define JOB_IRQ_TAG 0
+#define MMU_IRQ_TAG 1
+#define GPU_IRQ_TAG 2
+
+#if MALI_UNIT_TEST
+static struct kbase_exported_test_data shared_kernel_test_data;
+EXPORT_SYMBOL(shared_kernel_test_data);
+#endif /* MALI_UNIT_TEST */
+
+#define KBASE_DRV_NAME "mali"
+
+static const char kbase_drv_name[] = KBASE_DRV_NAME;
+
+static int kbase_dev_nr;
+
+static DEFINE_MUTEX(kbase_dev_list_lock);
+static LIST_HEAD(kbase_dev_list);
+
+#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
+static inline void __compile_time_asserts(void)
+{
+ CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE);
+}
+
+#ifdef CONFIG_KDS
+
+struct kbasep_kds_resource_set_file_data {
+ struct kds_resource_set *lock;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file);
+
+static const struct file_operations kds_resource_fops = {
+ .release = kds_resource_release
+};
+
+struct kbase_kds_resource_list_data {
+ struct kds_resource **kds_resources;
+ unsigned long *kds_access_bitmap;
+ int num_elems;
+};
+
+static int kds_resource_release(struct inode *inode, struct file *file)
+{
+ struct kbasep_kds_resource_set_file_data *data;
+
+ data = (struct kbasep_kds_resource_set_file_data *)file->private_data;
+ if (NULL != data) {
+ if (NULL != data->lock)
+ kds_resource_set_release(&data->lock);
+
+ kfree(data);
+ }
+ return 0;
+}
+
+static int kbasep_kds_allocate_resource_list_data(struct kbase_context *kctx, struct base_external_resource *ext_res, int num_elems, struct kbase_kds_resource_list_data *resources_list)
+{
+ struct base_external_resource *res = ext_res;
+ int res_id;
+
+ /* assume we have to wait for all */
+
+ KBASE_DEBUG_ASSERT(0 != num_elems);
+ resources_list->kds_resources = kmalloc_array(num_elems,
+ sizeof(struct kds_resource *), GFP_KERNEL);
+
+ if (NULL == resources_list->kds_resources)
+ return -ENOMEM;
+
+ KBASE_DEBUG_ASSERT(0 != num_elems);
+ resources_list->kds_access_bitmap = kzalloc(
+ sizeof(unsigned long) *
+ ((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG),
+ GFP_KERNEL);
+
+ if (NULL == resources_list->kds_access_bitmap) {
+ kfree(resources_list->kds_access_bitmap);
+ return -ENOMEM;
+ }
+
+ kbase_gpu_vm_lock(kctx);
+ for (res_id = 0; res_id < num_elems; res_id++, res++) {
+ int exclusive;
+ struct kbase_va_region *reg;
+ struct kds_resource *kds_res = NULL;
+
+ exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE;
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+
+ /* did we find a matching region object? */
+ if (NULL == reg || (reg->flags & KBASE_REG_FREE))
+ break;
+
+ /* no need to check reg->alloc as only regions with an alloc has
+ * a size, and kbase_region_tracker_find_region_enclosing_address
+ * only returns regions with size > 0 */
+ switch (reg->gpu_alloc->type) {
+#if defined(CONFIG_UMP) && defined(CONFIG_KDS)
+ case KBASE_MEM_TYPE_IMPORTED_UMP:
+ kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle);
+ break;
+#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */
+ default:
+ break;
+ }
+
+ /* no kds resource for the region ? */
+ if (!kds_res)
+ break;
+
+ resources_list->kds_resources[res_id] = kds_res;
+
+ if (exclusive)
+ set_bit(res_id, resources_list->kds_access_bitmap);
+ }
+ kbase_gpu_vm_unlock(kctx);
+
+ /* did the loop run to completion? */
+ if (res_id == num_elems)
+ return 0;
+
+ /* Clean up as the resource list is not valid. */
+ kfree(resources_list->kds_resources);
+ kfree(resources_list->kds_access_bitmap);
+
+ return -EINVAL;
+}
+
+static bool kbasep_validate_kbase_pointer(
+ struct kbase_context *kctx, union kbase_pointer *p)
+{
+ if (kctx->is_compat) {
+ if (p->compat_value == 0)
+ return false;
+ } else {
+ if (NULL == p->value)
+ return false;
+ }
+ return true;
+}
+
+static int kbase_external_buffer_lock(struct kbase_context *kctx,
+ struct kbase_uk_ext_buff_kds_data *args, u32 args_size)
+{
+ struct base_external_resource *ext_res_copy;
+ size_t ext_resource_size;
+ int ret = -EINVAL;
+ int fd = -EBADF;
+ struct base_external_resource __user *ext_res_user;
+ int __user *file_desc_usr;
+ struct kbasep_kds_resource_set_file_data *fdata;
+ struct kbase_kds_resource_list_data resource_list_data;
+
+ if (args_size != sizeof(struct kbase_uk_ext_buff_kds_data))
+ return -EINVAL;
+
+ /* Check user space has provided valid data */
+ if (!kbasep_validate_kbase_pointer(kctx, &args->external_resource) ||
+ !kbasep_validate_kbase_pointer(kctx, &args->file_descriptor) ||
+ (0 == args->num_res) ||
+ (args->num_res > KBASE_MAXIMUM_EXT_RESOURCES))
+ return -EINVAL;
+
+ ext_resource_size = sizeof(struct base_external_resource) * args->num_res;
+
+ KBASE_DEBUG_ASSERT(0 != ext_resource_size);
+ ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL);
+
+ if (!ext_res_copy)
+ return -EINVAL;
+#ifdef CONFIG_COMPAT
+ if (kctx->is_compat) {
+ ext_res_user = compat_ptr(args->external_resource.compat_value);
+ file_desc_usr = compat_ptr(args->file_descriptor.compat_value);
+ } else {
+#endif /* CONFIG_COMPAT */
+ ext_res_user = args->external_resource.value;
+ file_desc_usr = args->file_descriptor.value;
+#ifdef CONFIG_COMPAT
+ }
+#endif /* CONFIG_COMPAT */
+
+ /* Copy the external resources to lock from user space */
+ if (copy_from_user(ext_res_copy, ext_res_user, ext_resource_size))
+ goto out;
+
+ /* Allocate data to be stored in the file */
+ fdata = kmalloc(sizeof(*fdata), GFP_KERNEL);
+
+ if (!fdata) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Parse given elements and create resource and access lists */
+ ret = kbasep_kds_allocate_resource_list_data(kctx,
+ ext_res_copy, args->num_res, &resource_list_data);
+ if (!ret) {
+ long err;
+
+ fdata->lock = NULL;
+
+ fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0);
+
+ err = copy_to_user(file_desc_usr, &fd, sizeof(fd));
+
+ /* If the file descriptor was valid and we successfully copied
+ * it to user space, then we can try and lock the requested
+ * kds resources.
+ */
+ if ((fd >= 0) && (0 == err)) {
+ struct kds_resource_set *lock;
+
+ lock = kds_waitall(args->num_res,
+ resource_list_data.kds_access_bitmap,
+ resource_list_data.kds_resources,
+ KDS_WAIT_BLOCKING);
+
+ if (IS_ERR_OR_NULL(lock)) {
+ ret = -EINVAL;
+ } else {
+ ret = 0;
+ fdata->lock = lock;
+ }
+ } else {
+ ret = -EINVAL;
+ }
+
+ kfree(resource_list_data.kds_resources);
+ kfree(resource_list_data.kds_access_bitmap);
+ }
+
+ if (ret) {
+ /* If the file was opened successfully then close it which will
+ * clean up the file data, otherwise we clean up the file data
+ * ourself.
+ */
+ if (fd >= 0)
+ sys_close(fd);
+ else
+ kfree(fdata);
+ }
+out:
+ kfree(ext_res_copy);
+
+ return ret;
+}
+#endif /* CONFIG_KDS */
+
+#ifdef CONFIG_MALI_MIPE_ENABLED
+static void kbase_create_timeline_objects(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ unsigned int lpu_id;
+ struct kbasep_kctx_list_element *element;
+
+ /* Create LPU objects. */
+ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+ gpu_js_features *lpu =
+ &kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+ kbase_tlstream_tl_summary_new_lpu(lpu, lpu_id, (u32)*lpu);
+ }
+
+ /* Create GPU object and make it retain all LPUs. */
+ kbase_tlstream_tl_summary_new_gpu(
+ kbdev,
+ kbdev->gpu_props.props.raw_props.gpu_id,
+ kbdev->gpu_props.num_cores);
+
+ for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) {
+ void *lpu =
+ &kbdev->gpu_props.props.raw_props.js_features[lpu_id];
+ kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, kbdev);
+ }
+
+ /* Create object for each known context. */
+ mutex_lock(&kbdev->kctx_list_lock);
+ list_for_each_entry(element, &kbdev->kctx_list, link) {
+ kbase_tlstream_tl_summary_new_ctx(
+ element->kctx,
+ (u32)(element->kctx->id));
+ }
+ /* Before releasing the lock, reset body stream buffers.
+ * This will prevent context creation message to be directed to both
+ * summary and body stream. */
+ kbase_tlstream_reset_body_streams();
+ mutex_unlock(&kbdev->kctx_list_lock);
+ /* Static object are placed into summary packet that needs to be
+ * transmitted first. Flush all streams to make it available to
+ * user space. */
+ kbase_tlstream_flush_streams();
+}
+#endif
+
+static void kbase_api_handshake(struct uku_version_check_args *version)
+{
+ switch (version->major) {
+#ifdef BASE_LEGACY_UK6_SUPPORT
+ case 6:
+ /* We are backwards compatible with version 6,
+ * so pretend to be the old version */
+ version->major = 6;
+ version->minor = 1;
+ break;
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+#ifdef BASE_LEGACY_UK7_SUPPORT
+ case 7:
+ /* We are backwards compatible with version 7,
+ * so pretend to be the old version */
+ version->major = 7;
+ version->minor = 1;
+ break;
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+ case BASE_UK_VERSION_MAJOR:
+ /* set minor to be the lowest common */
+ version->minor = min_t(int, BASE_UK_VERSION_MINOR,
+ (int)version->minor);
+ break;
+ default:
+ /* We return our actual version regardless if it
+ * matches the version returned by userspace -
+ * userspace can bail if it can't handle this
+ * version */
+ version->major = BASE_UK_VERSION_MAJOR;
+ version->minor = BASE_UK_VERSION_MINOR;
+ break;
+ }
+}
+
+/**
+ * enum mali_error - Mali error codes shared with userspace
+ *
+ * This is subset of those common Mali errors that can be returned to userspace.
+ * Values of matching user and kernel space enumerators MUST be the same.
+ * MALI_ERROR_NONE is guaranteed to be 0.
+ */
+enum mali_error {
+ MALI_ERROR_NONE = 0,
+ MALI_ERROR_OUT_OF_GPU_MEMORY,
+ MALI_ERROR_OUT_OF_MEMORY,
+ MALI_ERROR_FUNCTION_FAILED,
+};
+
+static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size)
+{
+ struct kbase_device *kbdev;
+ union uk_header *ukh = args;
+ u32 id;
+
+ KBASE_DEBUG_ASSERT(ukh != NULL);
+
+ kbdev = kctx->kbdev;
+ id = ukh->id;
+ ukh->ret = MALI_ERROR_NONE; /* Be optimistic */
+
+ if (UKP_FUNC_ID_CHECK_VERSION == id) {
+ struct uku_version_check_args *version_check;
+
+ if (args_size != sizeof(struct uku_version_check_args)) {
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ return 0;
+ }
+ version_check = (struct uku_version_check_args *)args;
+ kbase_api_handshake(version_check);
+ /* save the proposed version number for later use */
+ kctx->api_version = KBASE_API_VERSION(version_check->major,
+ version_check->minor);
+ ukh->ret = MALI_ERROR_NONE;
+ return 0;
+ }
+
+ /* block calls until version handshake */
+ if (kctx->api_version == 0)
+ return -EINVAL;
+
+ if (!atomic_read(&kctx->setup_complete)) {
+ struct kbase_uk_set_flags *kbase_set_flags;
+
+ /* setup pending, try to signal that we'll do the setup,
+ * if setup was already in progress, err this call
+ */
+ if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1))
+ return -EINVAL;
+
+ /* if unexpected call, will stay stuck in setup mode
+ * (is it the only call we accept?)
+ */
+ if (id != KBASE_FUNC_SET_FLAGS)
+ return -EINVAL;
+
+ kbase_set_flags = (struct kbase_uk_set_flags *)args;
+
+ /* if not matching the expected call, stay in setup mode */
+ if (sizeof(*kbase_set_flags) != args_size)
+ goto bad_size;
+
+ /* if bad flags, will stay stuck in setup mode */
+ if (kbase_context_set_create_flags(kctx,
+ kbase_set_flags->create_flags) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+ atomic_set(&kctx->setup_complete, 1);
+ return 0;
+ }
+
+ /* setup complete, perform normal operation */
+ switch (id) {
+ case KBASE_FUNC_MEM_ALLOC:
+ {
+ struct kbase_uk_mem_alloc *mem = args;
+ struct kbase_va_region *reg;
+
+ if (sizeof(*mem) != args_size)
+ goto bad_size;
+
+ reg = kbase_mem_alloc(kctx, mem->va_pages,
+ mem->commit_pages, mem->extent,
+ &mem->flags, &mem->gpu_va,
+ &mem->va_alignment);
+ if (!reg)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+ case KBASE_FUNC_MEM_IMPORT:
+ {
+ struct kbase_uk_mem_import *mem_import = args;
+ int __user *phandle;
+ int handle;
+
+ if (sizeof(*mem_import) != args_size)
+ goto bad_size;
+#ifdef CONFIG_COMPAT
+ if (kctx->is_compat)
+ phandle = compat_ptr(mem_import->phandle.compat_value);
+ else
+#endif
+ phandle = mem_import->phandle.value;
+
+ switch (mem_import->type) {
+ case BASE_MEM_IMPORT_TYPE_UMP:
+ get_user(handle, phandle);
+ break;
+ case BASE_MEM_IMPORT_TYPE_UMM:
+ get_user(handle, phandle);
+ break;
+ default:
+ mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID;
+ break;
+ }
+
+ if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID ||
+ kbase_mem_import(kctx, mem_import->type,
+ handle, &mem_import->gpu_va,
+ &mem_import->va_pages,
+ &mem_import->flags))
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+ case KBASE_FUNC_MEM_ALIAS: {
+ struct kbase_uk_mem_alias *alias = args;
+ struct base_mem_aliasing_info __user *user_ai;
+ struct base_mem_aliasing_info *ai;
+
+ if (sizeof(*alias) != args_size)
+ goto bad_size;
+
+ if (alias->nents > 2048) {
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+ if (!alias->nents) {
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+
+#ifdef CONFIG_COMPAT
+ if (kctx->is_compat)
+ user_ai = compat_ptr(alias->ai.compat_value);
+ else
+#endif
+ user_ai = alias->ai.value;
+
+ ai = vmalloc(sizeof(*ai) * alias->nents);
+
+ if (!ai) {
+ ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+ break;
+ }
+
+ if (copy_from_user(ai, user_ai,
+ sizeof(*ai) * alias->nents)) {
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ goto copy_failed;
+ }
+
+ alias->gpu_va = kbase_mem_alias(kctx, &alias->flags,
+ alias->stride,
+ alias->nents, ai,
+ &alias->va_pages);
+ if (!alias->gpu_va) {
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ goto no_alias;
+ }
+no_alias:
+copy_failed:
+ vfree(ai);
+ break;
+ }
+ case KBASE_FUNC_MEM_COMMIT:
+ {
+ struct kbase_uk_mem_commit *commit = args;
+
+ if (sizeof(*commit) != args_size)
+ goto bad_size;
+
+ if (commit->gpu_addr & ~PAGE_MASK) {
+ dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid");
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+
+ if (kbase_mem_commit(kctx, commit->gpu_addr,
+ commit->pages,
+ (base_backing_threshold_status *)&commit->result_subcode))
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+ break;
+ }
+
+ case KBASE_FUNC_MEM_QUERY:
+ {
+ struct kbase_uk_mem_query *query = args;
+
+ if (sizeof(*query) != args_size)
+ goto bad_size;
+
+ if (query->gpu_addr & ~PAGE_MASK) {
+ dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid");
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+ if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE &&
+ query->query != KBASE_MEM_QUERY_VA_SIZE &&
+ query->query != KBASE_MEM_QUERY_FLAGS) {
+ dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query);
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+
+ if (kbase_mem_query(kctx, query->gpu_addr,
+ query->query, &query->value) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ else
+ ukh->ret = MALI_ERROR_NONE;
+ break;
+ }
+ break;
+
+ case KBASE_FUNC_MEM_FLAGS_CHANGE:
+ {
+ struct kbase_uk_mem_flags_change *fc = args;
+
+ if (sizeof(*fc) != args_size)
+ goto bad_size;
+
+ if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) {
+ dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid");
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+
+ if (kbase_mem_flags_change(kctx, fc->gpu_va, fc->flags, fc->mask))
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+
+ break;
+ }
+ case KBASE_FUNC_MEM_FREE:
+ {
+ struct kbase_uk_mem_free *mem = args;
+
+ if (sizeof(*mem) != args_size)
+ goto bad_size;
+
+ if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) {
+ dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid");
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+
+ if (kbase_mem_free(kctx, mem->gpu_addr))
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+
+ case KBASE_FUNC_JOB_SUBMIT:
+ {
+ struct kbase_uk_job_submit *job = args;
+
+ if (sizeof(*job) != args_size)
+ goto bad_size;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+ if (kbase_jd_submit(kctx, job, 0) != 0)
+#else
+ if (kbase_jd_submit(kctx, job) != 0)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+ case KBASE_FUNC_JOB_SUBMIT_UK6:
+ {
+ struct kbase_uk_job_submit *job = args;
+
+ if (sizeof(*job) != args_size)
+ goto bad_size;
+
+ if (kbase_jd_submit(kctx, job, 1) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+#endif
+
+ case KBASE_FUNC_SYNC:
+ {
+ struct kbase_uk_sync_now *sn = args;
+
+ if (sizeof(*sn) != args_size)
+ goto bad_size;
+
+ if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) {
+ dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid");
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+
+#ifndef CONFIG_MALI_CACHE_COHERENT
+ if (kbase_sync_now(kctx, &sn->sset) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif
+ break;
+ }
+
+ case KBASE_FUNC_DISJOINT_QUERY:
+ {
+ struct kbase_uk_disjoint_query *dquery = args;
+
+ if (sizeof(*dquery) != args_size)
+ goto bad_size;
+
+ /* Get the disjointness counter value. */
+ dquery->counter = kbase_disjoint_event_get(kctx->kbdev);
+ break;
+ }
+
+ case KBASE_FUNC_POST_TERM:
+ {
+ kbase_event_close(kctx);
+ break;
+ }
+
+ case KBASE_FUNC_HWCNT_SETUP:
+ {
+ struct kbase_uk_hwcnt_setup *setup = args;
+
+ if (sizeof(*setup) != args_size)
+ goto bad_size;
+
+ mutex_lock(&kctx->vinstr_cli_lock);
+ if (kbase_instr_hwcnt_setup(kctx, setup) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ mutex_unlock(&kctx->vinstr_cli_lock);
+ break;
+ }
+
+ case KBASE_FUNC_HWCNT_DUMP:
+ {
+ /* args ignored */
+ mutex_lock(&kctx->vinstr_cli_lock);
+ if (kbase_instr_hwcnt_dump(kctx) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ mutex_unlock(&kctx->vinstr_cli_lock);
+ break;
+ }
+
+ case KBASE_FUNC_HWCNT_CLEAR:
+ {
+ /* args ignored */
+ mutex_lock(&kctx->vinstr_cli_lock);
+ if (kbase_vinstr_clear(kbdev->vinstr_ctx,
+ kctx->vinstr_cli) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ mutex_unlock(&kctx->vinstr_cli_lock);
+ break;
+ }
+
+#ifdef BASE_LEGACY_UK7_SUPPORT
+ case KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE:
+ {
+ struct kbase_uk_cpuprops *setup = args;
+
+ if (sizeof(*setup) != args_size)
+ goto bad_size;
+
+ if (kbase_cpuprops_uk_get_props(kctx, setup) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+
+ case KBASE_FUNC_GPU_PROPS_REG_DUMP:
+ {
+ struct kbase_uk_gpuprops *setup = args;
+
+ if (sizeof(*setup) != args_size)
+ goto bad_size;
+
+ if (kbase_gpuprops_uk_get_props(kctx, setup) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+ case KBASE_FUNC_FIND_CPU_OFFSET:
+ {
+ struct kbase_uk_find_cpu_offset *find = args;
+
+ if (sizeof(*find) != args_size)
+ goto bad_size;
+
+ if (find->gpu_addr & ~PAGE_MASK) {
+ dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid");
+ goto out_bad;
+ }
+
+ if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) {
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ } else {
+ int err;
+
+ err = kbasep_find_enclosing_cpu_mapping_offset(
+ kctx,
+ find->gpu_addr,
+ (uintptr_t) find->cpu_addr,
+ (size_t) find->size,
+ &find->offset);
+
+ if (err)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ }
+ break;
+ }
+ case KBASE_FUNC_GET_VERSION:
+ {
+ struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args;
+
+ if (sizeof(*get_version) != args_size)
+ goto bad_size;
+
+ /* version buffer size check is made in compile time assert */
+ memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING));
+ get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING);
+ break;
+ }
+
+ case KBASE_FUNC_STREAM_CREATE:
+ {
+#ifdef CONFIG_SYNC
+ struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args;
+
+ if (sizeof(*screate) != args_size)
+ goto bad_size;
+
+ if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) {
+ /* not NULL terminated */
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ break;
+ }
+
+ if (kbase_stream_create(screate->name, &screate->fd) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ else
+ ukh->ret = MALI_ERROR_NONE;
+#else /* CONFIG_SYNC */
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+#endif /* CONFIG_SYNC */
+ break;
+ }
+ case KBASE_FUNC_FENCE_VALIDATE:
+ {
+#ifdef CONFIG_SYNC
+ struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args;
+
+ if (sizeof(*fence_validate) != args_size)
+ goto bad_size;
+
+ if (kbase_fence_validate(fence_validate->fd) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ else
+ ukh->ret = MALI_ERROR_NONE;
+#endif /* CONFIG_SYNC */
+ break;
+ }
+
+ case KBASE_FUNC_EXT_BUFFER_LOCK:
+ {
+#ifdef CONFIG_KDS
+ switch (kbase_external_buffer_lock(kctx,
+ (struct kbase_uk_ext_buff_kds_data *)args,
+ args_size)) {
+ case 0:
+ ukh->ret = MALI_ERROR_NONE;
+ break;
+ case -ENOMEM:
+ ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+ break;
+ default:
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ }
+#endif /* CONFIG_KDS */
+ break;
+ }
+
+ case KBASE_FUNC_SET_TEST_DATA:
+ {
+#if MALI_UNIT_TEST
+ struct kbase_uk_set_test_data *set_data = args;
+
+ shared_kernel_test_data = set_data->test_data;
+ shared_kernel_test_data.kctx.value = (void __user *)kctx;
+ shared_kernel_test_data.mm.value = (void __user *)current->mm;
+ ukh->ret = MALI_ERROR_NONE;
+#endif /* MALI_UNIT_TEST */
+ break;
+ }
+
+ case KBASE_FUNC_INJECT_ERROR:
+ {
+#ifdef CONFIG_MALI_ERROR_INJECT
+ unsigned long flags;
+ struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params;
+
+ /*mutex lock */
+ spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+ if (job_atom_inject_error(&params) != 0)
+ ukh->ret = MALI_ERROR_OUT_OF_MEMORY;
+ else
+ ukh->ret = MALI_ERROR_NONE;
+ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+ /*mutex unlock */
+#endif /* CONFIG_MALI_ERROR_INJECT */
+ break;
+ }
+
+ case KBASE_FUNC_MODEL_CONTROL:
+ {
+#ifdef CONFIG_MALI_NO_MALI
+ unsigned long flags;
+ struct kbase_model_control_params params =
+ ((struct kbase_uk_model_control_params *)args)->params;
+
+ /*mutex lock */
+ spin_lock_irqsave(&kbdev->reg_op_lock, flags);
+ if (gpu_model_control(kbdev->model, &params) != 0)
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ else
+ ukh->ret = MALI_ERROR_NONE;
+ spin_unlock_irqrestore(&kbdev->reg_op_lock, flags);
+ /*mutex unlock */
+#endif /* CONFIG_MALI_NO_MALI */
+ break;
+ }
+
+ case KBASE_FUNC_GET_PROFILING_CONTROLS:
+ {
+ struct kbase_uk_profiling_controls *controls =
+ (struct kbase_uk_profiling_controls *)args;
+ u32 i;
+
+ if (sizeof(*controls) != args_size)
+ goto bad_size;
+
+ for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+ controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i);
+
+ break;
+ }
+
+ /* used only for testing purposes; these controls are to be set by gator through gator API */
+ case KBASE_FUNC_SET_PROFILING_CONTROLS:
+ {
+ struct kbase_uk_profiling_controls *controls =
+ (struct kbase_uk_profiling_controls *)args;
+ u32 i;
+
+ if (sizeof(*controls) != args_size)
+ goto bad_size;
+
+ for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++)
+ _mali_profiling_control(i, controls->profiling_controls[i]);
+
+ break;
+ }
+
+ case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD:
+ {
+ struct kbase_uk_debugfs_mem_profile_add *add_data =
+ (struct kbase_uk_debugfs_mem_profile_add *)args;
+ char *buf;
+ char __user *user_buf;
+
+ if (sizeof(*add_data) != args_size)
+ goto bad_size;
+
+ if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) {
+ dev_err(kbdev->dev, "buffer too big");
+ goto out_bad;
+ }
+
+#ifdef CONFIG_COMPAT
+ if (kctx->is_compat)
+ user_buf = compat_ptr(add_data->buf.compat_value);
+ else
+#endif
+ user_buf = add_data->buf.value;
+
+ buf = kmalloc(add_data->len, GFP_KERNEL);
+ if (!buf)
+ goto out_bad;
+
+ if (0 != copy_from_user(buf, user_buf, add_data->len)) {
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ kfree(buf);
+ goto out_bad;
+ }
+ kbasep_mem_profile_debugfs_insert(kctx, buf,
+ add_data->len);
+
+ break;
+ }
+#ifdef CONFIG_MALI_MIPE_ENABLED
+ case KBASE_FUNC_TLSTREAM_ACQUIRE:
+ {
+ struct kbase_uk_tlstream_acquire *tlstream_acquire =
+ args;
+
+ if (sizeof(*tlstream_acquire) != args_size)
+ goto bad_size;
+
+ if (0 != kbase_tlstream_acquire(
+ kctx,
+ &tlstream_acquire->fd)) {
+ ukh->ret = MALI_ERROR_FUNCTION_FAILED;
+ } else if (0 <= tlstream_acquire->fd) {
+ /* Summary stream was cleared during acquire.
+ * Create static timeline objects that will be
+ * read by client. */
+ kbase_create_timeline_objects(kctx);
+ }
+ break;
+ }
+ case KBASE_FUNC_TLSTREAM_FLUSH:
+ {
+ struct kbase_uk_tlstream_flush *tlstream_flush =
+ args;
+
+ if (sizeof(*tlstream_flush) != args_size)
+ goto bad_size;
+
+ kbase_tlstream_flush_streams();
+ break;
+ }
+#if MALI_UNIT_TEST
+ case KBASE_FUNC_TLSTREAM_TEST:
+ {
+ struct kbase_uk_tlstream_test *tlstream_test = args;
+
+ if (sizeof(*tlstream_test) != args_size)
+ goto bad_size;
+
+ kbase_tlstream_test(
+ tlstream_test->tpw_count,
+ tlstream_test->msg_delay,
+ tlstream_test->msg_count,
+ tlstream_test->aux_msg);
+ break;
+ }
+ case KBASE_FUNC_TLSTREAM_STATS:
+ {
+ struct kbase_uk_tlstream_stats *tlstream_stats = args;
+
+ if (sizeof(*tlstream_stats) != args_size)
+ goto bad_size;
+
+ kbase_tlstream_stats(
+ &tlstream_stats->bytes_collected,
+ &tlstream_stats->bytes_generated);
+ break;
+ }
+#endif /* MALI_UNIT_TEST */
+#endif /* CONFIG_MALI_MIPE_ENABLED */
+ /* used to signal the job core dump on fault has terminated and release the
+ * refcount of the context to let it be removed. It requires at least
+ * BASE_UK_VERSION_MAJOR to be 8 and BASE_UK_VERSION_MINOR to be 1 in the
+ * UK interface.
+ */
+ case KBASE_FUNC_DUMP_FAULT_TERM:
+ {
+#if 2 == MALI_INSTRUMENTATION_LEVEL
+ if (atomic_read(&kctx->jctx.sched_info.ctx.fault_count) > 0 &&
+ kctx->jctx.sched_info.ctx.is_scheduled)
+
+ kbasep_js_dump_fault_term(kbdev, kctx);
+
+ break;
+#endif /* 2 == MALI_INSTRUMENTATION_LEVEL */
+
+ /* This IOCTL should only be called when instr=2 at compile time. */
+ goto out_bad;
+ }
+
+ case KBASE_FUNC_GET_CONTEXT_ID:
+ {
+ struct kbase_uk_context_id *info = args;
+
+ info->id = kctx->id;
+ break;
+ }
+
+ default:
+ dev_err(kbdev->dev, "unknown ioctl %u", id);
+ goto out_bad;
+ }
+
+ return 0;
+
+ bad_size:
+ dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id);
+ out_bad:
+ return -EINVAL;
+}
+
+static struct kbase_device *to_kbase_device(struct device *dev)
+{
+ return dev_get_drvdata(dev);
+}
+
+/*
+ * API to acquire device list mutex and
+ * return pointer to the device list head
+ */
+const struct list_head *kbase_dev_list_get(void)
+{
+ mutex_lock(&kbase_dev_list_lock);
+ return &kbase_dev_list;
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_get);
+
+/* API to release the device list mutex */
+void kbase_dev_list_put(const struct list_head *dev_list)
+{
+ mutex_unlock(&kbase_dev_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_dev_list_put);
+
+/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */
+struct kbase_device *kbase_find_device(int minor)
+{
+ struct kbase_device *kbdev = NULL;
+ struct list_head *entry;
+ const struct list_head *dev_list = kbase_dev_list_get();
+
+ list_for_each(entry, dev_list) {
+ struct kbase_device *tmp;
+
+ tmp = list_entry(entry, struct kbase_device, entry);
+ if (tmp->mdev.minor == minor || minor == -1) {
+ kbdev = tmp;
+ get_device(kbdev->dev);
+ break;
+ }
+ }
+ kbase_dev_list_put(dev_list);
+
+ return kbdev;
+}
+EXPORT_SYMBOL(kbase_find_device);
+
+void kbase_release_device(struct kbase_device *kbdev)
+{
+ put_device(kbdev->dev);
+}
+EXPORT_SYMBOL(kbase_release_device);
+
+static int kbase_open(struct inode *inode, struct file *filp)
+{
+ struct kbase_device *kbdev = NULL;
+ struct kbase_context *kctx;
+ int ret = 0;
+#ifdef CONFIG_DEBUG_FS
+ char kctx_name[64];
+#endif
+
+ kbdev = kbase_find_device(iminor(inode));
+
+ if (!kbdev)
+ return -ENODEV;
+
+ kctx = kbase_create_context(kbdev, is_compat_task());
+ if (!kctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ init_waitqueue_head(&kctx->event_queue);
+ filp->private_data = kctx;
+
+ kctx->infinite_cache_active = kbdev->infinite_cache_active_default;
+
+#ifdef CONFIG_DEBUG_FS
+ snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id);
+
+ kctx->kctx_dentry = debugfs_create_dir(kctx_name,
+ kbdev->debugfs_ctx_directory);
+
+ if (IS_ERR_OR_NULL(kctx->kctx_dentry)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+#ifdef CONFIG_MALI_CACHE_COHERENT
+ /* if cache is completely coherent at hardware level, then remove the
+ * infinite cache control support from debugfs.
+ */
+#else
+ debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry,
+ &kctx->infinite_cache_active);
+#endif /* CONFIG_MALI_CACHE_COHERENT */
+ kbasep_mem_profile_debugfs_add(kctx);
+
+ kbasep_jd_debugfs_ctx_add(kctx);
+ kbase_debug_mem_view_init(filp);
+#endif
+
+ dev_dbg(kbdev->dev, "created base context\n");
+
+ {
+ struct kbasep_kctx_list_element *element;
+
+ element = kzalloc(sizeof(*element), GFP_KERNEL);
+ if (element) {
+ mutex_lock(&kbdev->kctx_list_lock);
+ element->kctx = kctx;
+ list_add(&element->link, &kbdev->kctx_list);
+#ifdef CONFIG_MALI_MIPE_ENABLED
+ kbase_tlstream_tl_new_ctx(
+ element->kctx,
+ (u32)(element->kctx->id));
+#endif
+ mutex_unlock(&kbdev->kctx_list_lock);
+ } else {
+ /* we don't treat this as a fail - just warn about it */
+ dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n");
+ }
+ }
+ return 0;
+
+ out:
+ kbase_release_device(kbdev);
+ return ret;
+}
+
+static int kbase_release(struct inode *inode, struct file *filp)
+{
+ struct kbase_context *kctx = filp->private_data;
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbasep_kctx_list_element *element, *tmp;
+ bool found_element = false;
+
+#ifdef CONFIG_MALI_MIPE_ENABLED
+ kbase_tlstream_tl_del_ctx(kctx);
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+ debugfs_remove_recursive(kctx->kctx_dentry);
+ kbasep_mem_profile_debugfs_remove(kctx);
+#endif
+
+ mutex_lock(&kbdev->kctx_list_lock);
+ list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) {
+ if (element->kctx == kctx) {
+ list_del(&element->link);
+ kfree(element);
+ found_element = true;
+ }
+ }
+ mutex_unlock(&kbdev->kctx_list_lock);
+ if (!found_element)
+ dev_warn(kbdev->dev, "kctx not in kctx_list\n");
+
+ filp->private_data = NULL;
+
+ mutex_lock(&kctx->vinstr_cli_lock);
+ /* If this client was performing hwcnt dumping and did not explicitly
+ * detach itself, remove it from the vinstr core now */
+ kbase_vinstr_detach_client(kctx->kbdev->vinstr_ctx, kctx->vinstr_cli);
+ mutex_unlock(&kctx->vinstr_cli_lock);
+
+ kbase_destroy_context(kctx);
+
+ dev_dbg(kbdev->dev, "deleted base context\n");
+ kbase_release_device(kbdev);
+ return 0;
+}
+
+#define CALL_MAX_SIZE 536
+
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull }; /* alignment fixup */
+ u32 size = _IOC_SIZE(cmd);
+ struct kbase_context *kctx = filp->private_data;
+
+ if (size > CALL_MAX_SIZE)
+ return -ENOTTY;
+
+ if (0 != copy_from_user(&msg, (void __user *)arg, size)) {
+ dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n");
+ return -EFAULT;
+ }
+
+ if (kbase_dispatch(kctx, &msg, size) != 0)
+ return -EFAULT;
+
+ if (0 != copy_to_user((void __user *)arg, &msg, size)) {
+ dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n");
+ return -EFAULT;
+ }
+ return 0;
+}
+
+static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
+{
+ struct kbase_context *kctx = filp->private_data;
+ struct base_jd_event_v2 uevent;
+ int out_count = 0;
+
+ if (count < sizeof(uevent))
+ return -ENOBUFS;
+
+ do {
+ while (kbase_event_dequeue(kctx, &uevent)) {
+ if (out_count > 0)
+ goto out;
+
+ if (filp->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ if (wait_event_interruptible(kctx->event_queue, kbase_event_pending(kctx)))
+ return -ERESTARTSYS;
+ }
+ if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
+ if (out_count == 0)
+ return -EPIPE;
+ goto out;
+ }
+
+ if (copy_to_user(buf, &uevent, sizeof(uevent)))
+ return -EFAULT;
+
+ buf += sizeof(uevent);
+ out_count++;
+ count -= sizeof(uevent);
+ } while (count >= sizeof(uevent));
+
+ out:
+ return out_count * sizeof(uevent);
+}
+
+static unsigned int kbase_poll(struct file *filp, poll_table *wait)
+{
+ struct kbase_context *kctx = filp->private_data;
+
+ poll_wait(filp, &kctx->event_queue, wait);
+ if (kbase_event_pending(kctx))
+ return POLLIN | POLLRDNORM;
+
+ return 0;
+}
+
+void kbase_event_wakeup(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(kctx);
+
+ wake_up_interruptible(&kctx->event_queue);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_wakeup);
+
+static int kbase_check_flags(int flags)
+{
+ /* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always
+ * closes the file descriptor in a child process.
+ */
+ if (0 == (flags & O_CLOEXEC))
+ return -EINVAL;
+
+ return 0;
+}
+
+static unsigned long kbase_get_unmapped_area(struct file *filp,
+ const unsigned long addr, const unsigned long len,
+ const unsigned long pgoff, const unsigned long flags)
+{
+#ifdef CONFIG_64BIT
+ /* based on get_unmapped_area, but simplified slightly due to that some
+ * values are known in advance */
+ struct kbase_context *kctx = filp->private_data;
+
+ if (!kctx->is_compat && !addr &&
+ kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) {
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long low_limit, high_limit, gap_start, gap_end;
+
+ /* Hardware has smaller VA than userspace, ensure the page
+ * comes from a VA which can be used on the GPU */
+
+ gap_end = (1UL<<33);
+ if (gap_end < len)
+ return -ENOMEM;
+ high_limit = gap_end - len;
+ low_limit = PAGE_SIZE + len;
+
+ gap_start = mm->highest_vm_end;
+ if (gap_start <= high_limit)
+ goto found_highest;
+
+ if (RB_EMPTY_ROOT(&mm->mm_rb))
+ return -ENOMEM;
+ vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
+ if (vma->rb_subtree_gap < len)
+ return -ENOMEM;
+
+ while (true) {
+ gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
+ if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+ struct vm_area_struct *right =
+ rb_entry(vma->vm_rb.rb_right,
+ struct vm_area_struct, vm_rb);
+ if (right->rb_subtree_gap >= len) {
+ vma = right;
+ continue;
+ }
+ }
+check_current:
+ gap_end = vma->vm_start;
+ if (gap_end < low_limit)
+ return -ENOMEM;
+ if (gap_start <= high_limit &&
+ gap_end - gap_start >= len)
+ goto found;
+
+ if (vma->vm_rb.rb_left) {
+ struct vm_area_struct *left =
+ rb_entry(vma->vm_rb.rb_left,
+ struct vm_area_struct, vm_rb);
+
+ if (left->rb_subtree_gap >= len) {
+ vma = left;
+ continue;
+ }
+ }
+ while (true) {
+ struct rb_node *prev = &vma->vm_rb;
+
+ if (!rb_parent(prev))
+ return -ENOMEM;
+ vma = rb_entry(rb_parent(prev),
+ struct vm_area_struct, vm_rb);
+ if (prev == vma->vm_rb.rb_right) {
+ gap_start = vma->vm_prev ?
+ vma->vm_prev->vm_end : 0;
+ goto check_current;
+ }
+ }
+ }
+
+found:
+ if (gap_end > (1UL<<33))
+ gap_end = (1UL<<33);
+
+found_highest:
+ gap_end -= len;
+
+ VM_BUG_ON(gap_end < PAGE_SIZE);
+ VM_BUG_ON(gap_end < gap_start);
+ return gap_end;
+ }
+#endif
+ /* No special requirements - fallback to the default version */
+ return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+}
+
+static const struct file_operations kbase_fops = {
+ .owner = THIS_MODULE,
+ .open = kbase_open,
+ .release = kbase_release,
+ .read = kbase_read,
+ .poll = kbase_poll,
+ .unlocked_ioctl = kbase_ioctl,
+ .compat_ioctl = kbase_ioctl,
+ .mmap = kbase_mmap,
+ .check_flags = kbase_check_flags,
+ .get_unmapped_area = kbase_get_unmapped_area,
+};
+
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value)
+{
+ writel(value, kbdev->reg + offset);
+}
+
+u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset)
+{
+ return readl(kbdev->reg + offset);
+}
+#endif /* !CONFIG_MALI_NO_MALI */
+
+
+/** Show callback for the @c power_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
+ * @param dev The device this sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+{
+ struct kbase_device *kbdev;
+ const struct kbase_pm_policy *current_policy;
+ const struct kbase_pm_policy *const *policy_list;
+ int policy_count;
+ int i;
+ ssize_t ret = 0;
+
+ kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ current_policy = kbase_pm_get_policy(kbdev);
+
+ policy_count = kbase_pm_list_policies(&policy_list);
+
+ for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+ if (policy_list[i] == current_policy)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+ else
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+ }
+
+ if (ret < PAGE_SIZE - 1) {
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+ } else {
+ buf[PAGE_SIZE - 2] = '\n';
+ buf[PAGE_SIZE - 1] = '\0';
+ ret = PAGE_SIZE - 1;
+ }
+
+ return ret;
+}
+
+/** Store callback for the @c power_policy sysfs file.
+ *
+ * This function is called when the @c power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls @ref kbase_pm_set_policy to change the
+ * policy.
+ *
+ * @param dev The device with sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The value written to the sysfs file
+ * @param count The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ const struct kbase_pm_policy *new_policy = NULL;
+ const struct kbase_pm_policy *const *policy_list;
+ int policy_count;
+ int i;
+
+ kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ policy_count = kbase_pm_list_policies(&policy_list);
+
+ for (i = 0; i < policy_count; i++) {
+ if (sysfs_streq(policy_list[i]->name, buf)) {
+ new_policy = policy_list[i];
+ break;
+ }
+ }
+
+ if (!new_policy) {
+ dev_err(dev, "power_policy: policy not found\n");
+ return -EINVAL;
+ }
+
+ kbase_pm_set_policy(kbdev, new_policy);
+
+ return count;
+}
+
+/** The sysfs file @c power_policy.
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+
+/** Show callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called to get the contents of the @c core_availability_policy
+ * sysfs file. This is a list of the available policies with the currently
+ * active one surrounded by square brackets.
+ *
+ * @param dev The device this sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+ struct kbase_device *kbdev;
+ const struct kbase_pm_ca_policy *current_policy;
+ const struct kbase_pm_ca_policy *const *policy_list;
+ int policy_count;
+ int i;
+ ssize_t ret = 0;
+
+ kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ current_policy = kbase_pm_ca_get_policy(kbdev);
+
+ policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+ for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) {
+ if (policy_list[i] == current_policy)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name);
+ else
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name);
+ }
+
+ if (ret < PAGE_SIZE - 1) {
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+ } else {
+ buf[PAGE_SIZE - 2] = '\n';
+ buf[PAGE_SIZE - 1] = '\0';
+ ret = PAGE_SIZE - 1;
+ }
+
+ return ret;
+}
+
+/** Store callback for the @c core_availability_policy sysfs file.
+ *
+ * This function is called when the @c core_availability_policy sysfs file is
+ * written to. It matches the requested policy against the available policies
+ * and if a matching policy is found calls @ref kbase_pm_set_policy to change
+ * the policy.
+ *
+ * @param dev The device with sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The value written to the sysfs file
+ * @param count The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ const struct kbase_pm_ca_policy *new_policy = NULL;
+ const struct kbase_pm_ca_policy *const *policy_list;
+ int policy_count;
+ int i;
+
+ kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ policy_count = kbase_pm_ca_list_policies(&policy_list);
+
+ for (i = 0; i < policy_count; i++) {
+ if (sysfs_streq(policy_list[i]->name, buf)) {
+ new_policy = policy_list[i];
+ break;
+ }
+ }
+
+ if (!new_policy) {
+ dev_err(dev, "core_availability_policy: policy not found\n");
+ return -EINVAL;
+ }
+
+ kbase_pm_ca_set_policy(kbdev, new_policy);
+
+ return count;
+}
+
+/** The sysfs file @c core_availability_policy
+ *
+ * This is used for obtaining information about the available policies,
+ * determining which policy is currently active, and changing the active
+ * policy.
+ */
+static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy);
+
+/** Show callback for the @c core_mask sysfs file.
+ *
+ * This function is called to get the contents of the @c core_mask sysfs
+ * file.
+ *
+ * @param dev The device this sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+ struct kbase_device *kbdev;
+ ssize_t ret = 0;
+
+ kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask : 0x%llX\n", kbdev->pm.debug_core_mask);
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Available core mask : 0x%llX\n", kbdev->shader_present_bitmap);
+
+ return ret;
+}
+
+/** Store callback for the @c core_mask sysfs file.
+ *
+ * This function is called when the @c core_mask sysfs file is written to.
+ *
+ * @param dev The device with sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The value written to the sysfs file
+ * @param count The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ u64 new_core_mask;
+ int rc;
+
+ kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ rc = kstrtoull(buf, 16, &new_core_mask);
+ if (rc)
+ return rc;
+
+ if ((new_core_mask & kbdev->shader_present_bitmap) != new_core_mask ||
+ !(new_core_mask & kbdev->gpu_props.props.coherency_info.group[0].core_mask)) {
+ dev_err(dev, "power_policy: invalid core specification\n");
+ return -EINVAL;
+ }
+
+ if (kbdev->pm.debug_core_mask != new_core_mask) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->pm.power_change_lock, flags);
+
+ kbase_pm_set_debug_core_mask(kbdev, new_core_mask);
+
+ spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags);
+ }
+
+ return count;
+}
+
+/** The sysfs file @c core_mask.
+ *
+ * This is used to restrict shader core availability for debugging purposes.
+ * Reading it will show the current core mask and the mask of cores available.
+ * Writing to it will set the current core mask.
+ */
+static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+/**
+ * struct sc_split_config
+ * @tag: Short name
+ * @human_readable: Long name
+ * @js0_mask: Mask for job slot 0
+ * @js1_mask: Mask for job slot 1
+ * @js2_mask: Mask for job slot 2
+ *
+ * Structure containing a single shader affinity split configuration.
+ */
+struct sc_split_config {
+ char const *tag;
+ char const *human_readable;
+ u64 js0_mask;
+ u64 js1_mask;
+ u64 js2_mask;
+};
+
+/*
+ * Array of available shader affinity split configurations.
+ */
+static struct sc_split_config const sc_split_configs[] = {
+ /* All must be the first config (default). */
+ {
+ "all", "All cores",
+ 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
+ },
+ {
+ "mp1", "MP1 shader core",
+ 0x1, 0x1, 0x1
+ },
+ {
+ "mp2", "MP2 shader core",
+ 0x3, 0x3, 0x3
+ },
+ {
+ "mp4", "MP4 shader core",
+ 0xF, 0xF, 0xF
+ },
+ {
+ "mp1_vf", "MP1 vertex + MP1 fragment shader core",
+ 0x2, 0x1, 0xFFFFFFFFFFFFFFFFULL
+ },
+ {
+ "mp2_vf", "MP2 vertex + MP2 fragment shader core",
+ 0xA, 0x5, 0xFFFFFFFFFFFFFFFFULL
+ },
+ /* This must be the last config. */
+ {
+ NULL, NULL,
+ 0x0, 0x0, 0x0
+ },
+};
+
+/* Pointer to the currently active shader split configuration. */
+static struct sc_split_config const *current_sc_split_config = &sc_split_configs[0];
+
+/** Show callback for the @c sc_split sysfs file
+ *
+ * Returns the current shader core affinity policy.
+ */
+static ssize_t show_split(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+ ssize_t ret;
+ /* We know we are given a buffer which is PAGE_SIZE long. Our strings are all guaranteed
+ * to be shorter than that at this time so no length check needed. */
+ ret = scnprintf(buf, PAGE_SIZE, "Current sc_split: '%s'\n", current_sc_split_config->tag);
+ return ret;
+}
+
+/** Store callback for the @c sc_split sysfs file.
+ *
+ * This function is called when the @c sc_split sysfs file is written to
+ * It modifies the system shader core affinity configuration to allow
+ * system profiling with different hardware configurations.
+ *
+ * @param dev The device with sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The value written to the sysfs file
+ * @param count The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_split(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct sc_split_config const *config = &sc_split_configs[0];
+
+ /* Try to match: loop until we hit the last "NULL" entry */
+ while (config->tag) {
+ if (sysfs_streq(config->tag, buf)) {
+ current_sc_split_config = config;
+ mali_js0_affinity_mask = config->js0_mask;
+ mali_js1_affinity_mask = config->js1_mask;
+ mali_js2_affinity_mask = config->js2_mask;
+ dev_dbg(dev, "Setting sc_split: '%s'\n", config->tag);
+ return count;
+ }
+ config++;
+ }
+
+ /* No match found in config list */
+ dev_err(dev, "sc_split: invalid value\n");
+ dev_err(dev, " Possible settings: mp[1|2|4], mp[1|2]_vf\n");
+ return -ENOENT;
+}
+
+/** The sysfs file @c sc_split
+ *
+ * This is used for configuring/querying the current shader core work affinity
+ * configuration.
+ */
+static DEVICE_ATTR(sc_split, S_IRUGO|S_IWUSR, show_split, set_split);
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+
+/** Store callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. This file contains five values separated by whitespace. The values
+ * are basically the same as JS_SOFT_STOP_TICKS, JS_HARD_STOP_TICKS_SS,
+ * JS_HARD_STOP_TICKS_DUMPING, JS_RESET_TICKS_SS, JS_RESET_TICKS_DUMPING
+ * configuration values (in that order), with the difference that the js_timeout
+ * values are expressed in MILLISECONDS.
+ *
+ * The js_timeouts sysfile file allows the current values in
+ * use by the job scheduler to get override. Note that a value needs to
+ * be other than 0 for it to override the current job scheduler value.
+ *
+ * @param dev The device with sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The value written to the sysfs file
+ * @param count The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ int items;
+ long js_soft_stop_ms;
+ long js_soft_stop_ms_cl;
+ long js_hard_stop_ms_ss;
+ long js_hard_stop_ms_cl;
+ long js_hard_stop_ms_dumping;
+ long js_reset_ms_ss;
+ long js_reset_ms_cl;
+ long js_reset_ms_dumping;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld",
+ &js_soft_stop_ms, &js_soft_stop_ms_cl,
+ &js_hard_stop_ms_ss, &js_hard_stop_ms_cl,
+ &js_hard_stop_ms_dumping, &js_reset_ms_ss,
+ &js_reset_ms_cl, &js_reset_ms_dumping);
+
+ if (items == 8) {
+ u64 ticks;
+
+ if (js_soft_stop_ms >= 0) {
+ ticks = js_soft_stop_ms * 1000000ULL;
+ do_div(ticks, kbdev->js_data.scheduling_period_ns);
+ kbdev->js_soft_stop_ticks = ticks;
+ } else {
+ kbdev->js_soft_stop_ticks = -1;
+ }
+
+ if (js_soft_stop_ms_cl >= 0) {
+ ticks = js_soft_stop_ms_cl * 1000000ULL;
+ do_div(ticks, kbdev->js_data.scheduling_period_ns);
+ kbdev->js_soft_stop_ticks_cl = ticks;
+ } else {
+ kbdev->js_soft_stop_ticks_cl = -1;
+ }
+
+ if (js_hard_stop_ms_ss >= 0) {
+ ticks = js_hard_stop_ms_ss * 1000000ULL;
+ do_div(ticks, kbdev->js_data.scheduling_period_ns);
+ kbdev->js_hard_stop_ticks_ss = ticks;
+ } else {
+ kbdev->js_hard_stop_ticks_ss = -1;
+ }
+
+ if (js_hard_stop_ms_cl >= 0) {
+ ticks = js_hard_stop_ms_cl * 1000000ULL;
+ do_div(ticks, kbdev->js_data.scheduling_period_ns);
+ kbdev->js_hard_stop_ticks_cl = ticks;
+ } else {
+ kbdev->js_hard_stop_ticks_cl = -1;
+ }
+
+ if (js_hard_stop_ms_dumping >= 0) {
+ ticks = js_hard_stop_ms_dumping * 1000000ULL;
+ do_div(ticks, kbdev->js_data.scheduling_period_ns);
+ kbdev->js_hard_stop_ticks_dumping = ticks;
+ } else {
+ kbdev->js_hard_stop_ticks_dumping = -1;
+ }
+
+ if (js_reset_ms_ss >= 0) {
+ ticks = js_reset_ms_ss * 1000000ULL;
+ do_div(ticks, kbdev->js_data.scheduling_period_ns);
+ kbdev->js_reset_ticks_ss = ticks;
+ } else {
+ kbdev->js_reset_ticks_ss = -1;
+ }
+
+ if (js_reset_ms_cl >= 0) {
+ ticks = js_reset_ms_cl * 1000000ULL;
+ do_div(ticks, kbdev->js_data.scheduling_period_ns);
+ kbdev->js_reset_ticks_cl = ticks;
+ } else {
+ kbdev->js_reset_ticks_cl = -1;
+ }
+
+ if (js_reset_ms_dumping >= 0) {
+ ticks = js_reset_ms_dumping * 1000000ULL;
+ do_div(ticks, kbdev->js_data.scheduling_period_ns);
+ kbdev->js_reset_ticks_dumping = ticks;
+ } else {
+ kbdev->js_reset_ticks_dumping = -1;
+ }
+
+ kbdev->js_timeouts_updated = true;
+
+ dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n",
+ (unsigned long)kbdev->js_soft_stop_ticks,
+ js_soft_stop_ms);
+ dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n",
+ (unsigned long)kbdev->js_soft_stop_ticks_cl,
+ js_soft_stop_ms_cl);
+ dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n",
+ (unsigned long)kbdev->js_hard_stop_ticks_ss,
+ js_hard_stop_ms_ss);
+ dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n",
+ (unsigned long)kbdev->js_hard_stop_ticks_cl,
+ js_hard_stop_ms_cl);
+ dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_DUMPING with %lu ticks (%lu ms)\n",
+ (unsigned long)
+ kbdev->js_hard_stop_ticks_dumping,
+ js_hard_stop_ms_dumping);
+ dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n",
+ (unsigned long)kbdev->js_reset_ticks_ss,
+ js_reset_ms_ss);
+ dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n",
+ (unsigned long)kbdev->js_reset_ticks_cl,
+ js_reset_ms_cl);
+ dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_DUMPING with %lu ticks (%lu ms)\n",
+ (unsigned long)kbdev->js_reset_ticks_dumping,
+ js_reset_ms_dumping);
+
+ return count;
+ }
+
+ dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n"
+ "Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n"
+ "Write 0 for no change, -1 to restore default timeout\n");
+ return -EINVAL;
+}
+
+/** Show callback for the @c js_timeouts sysfs file.
+ *
+ * This function is called to get the contents of the @c js_timeouts sysfs
+ * file. It returns the last set values written to the js_timeouts sysfs file.
+ * If the file didn't get written yet, the values will be current setting in
+ * use.
+ * @param dev The device this sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+ struct kbase_device *kbdev;
+ ssize_t ret;
+ u64 ms;
+ unsigned long js_soft_stop_ms;
+ unsigned long js_soft_stop_ms_cl;
+ unsigned long js_hard_stop_ms_ss;
+ unsigned long js_hard_stop_ms_cl;
+ unsigned long js_hard_stop_ms_dumping;
+ unsigned long js_reset_ms_ss;
+ unsigned long js_reset_ms_cl;
+ unsigned long js_reset_ms_dumping;
+ unsigned long ticks;
+ u32 scheduling_period_ns;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ /* If no contexts have been scheduled since js_timeouts was last written
+ * to, the new timeouts might not have been latched yet. So check if an
+ * update is pending and use the new values if necessary. */
+ if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0)
+ scheduling_period_ns = kbdev->js_scheduling_period_ns;
+ else
+ scheduling_period_ns = kbdev->js_data.scheduling_period_ns;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0)
+ ticks = kbdev->js_soft_stop_ticks;
+ else
+ ticks = kbdev->js_data.soft_stop_ticks;
+ ms = (u64)ticks * scheduling_period_ns;
+ do_div(ms, 1000000UL);
+ js_soft_stop_ms = (unsigned long)ms;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0)
+ ticks = kbdev->js_soft_stop_ticks_cl;
+ else
+ ticks = kbdev->js_data.soft_stop_ticks_cl;
+ ms = (u64)ticks * scheduling_period_ns;
+ do_div(ms, 1000000UL);
+ js_soft_stop_ms_cl = (unsigned long)ms;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0)
+ ticks = kbdev->js_hard_stop_ticks_ss;
+ else
+ ticks = kbdev->js_data.hard_stop_ticks_ss;
+ ms = (u64)ticks * scheduling_period_ns;
+ do_div(ms, 1000000UL);
+ js_hard_stop_ms_ss = (unsigned long)ms;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0)
+ ticks = kbdev->js_hard_stop_ticks_cl;
+ else
+ ticks = kbdev->js_data.hard_stop_ticks_cl;
+ ms = (u64)ticks * scheduling_period_ns;
+ do_div(ms, 1000000UL);
+ js_hard_stop_ms_cl = (unsigned long)ms;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0)
+ ticks = kbdev->js_hard_stop_ticks_dumping;
+ else
+ ticks = kbdev->js_data.hard_stop_ticks_dumping;
+ ms = (u64)ticks * scheduling_period_ns;
+ do_div(ms, 1000000UL);
+ js_hard_stop_ms_dumping = (unsigned long)ms;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0)
+ ticks = kbdev->js_reset_ticks_ss;
+ else
+ ticks = kbdev->js_data.gpu_reset_ticks_ss;
+ ms = (u64)ticks * scheduling_period_ns;
+ do_div(ms, 1000000UL);
+ js_reset_ms_ss = (unsigned long)ms;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0)
+ ticks = kbdev->js_reset_ticks_cl;
+ else
+ ticks = kbdev->js_data.gpu_reset_ticks_cl;
+ ms = (u64)ticks * scheduling_period_ns;
+ do_div(ms, 1000000UL);
+ js_reset_ms_cl = (unsigned long)ms;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0)
+ ticks = kbdev->js_reset_ticks_dumping;
+ else
+ ticks = kbdev->js_data.gpu_reset_ticks_dumping;
+ ms = (u64)ticks * scheduling_period_ns;
+ do_div(ms, 1000000UL);
+ js_reset_ms_dumping = (unsigned long)ms;
+
+ ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n",
+ js_soft_stop_ms, js_soft_stop_ms_cl,
+ js_hard_stop_ms_ss, js_hard_stop_ms_cl,
+ js_hard_stop_ms_dumping, js_reset_ms_ss,
+ js_reset_ms_cl, js_reset_ms_dumping);
+
+ if (ret >= PAGE_SIZE) {
+ buf[PAGE_SIZE - 2] = '\n';
+ buf[PAGE_SIZE - 1] = '\0';
+ ret = PAGE_SIZE - 1;
+ }
+
+ return ret;
+}
+
+/** The sysfs file @c js_timeouts.
+ *
+ * This is used to override the current job scheduler values for
+ * JS_STOP_STOP_TICKS_SS
+ * JS_STOP_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ */
+static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+
+/**
+ * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ * file
+ * @dev: The device the sysfs file is for
+ * @attr: The attributes of the sysfs file
+ * @buf: The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the js_scheduling_period sysfs file is written
+ * to. It checks the data written, and if valid updates the js_scheduling_period
+ * value
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_js_scheduling_period(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ int ret;
+ unsigned int js_scheduling_period;
+ u32 new_scheduling_period_ns;
+ u32 old_period;
+ u64 ticks;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ ret = kstrtouint(buf, 0, &js_scheduling_period);
+ if (ret || !js_scheduling_period) {
+ dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n"
+ "Use format <js_scheduling_period_ms>\n");
+ return -EINVAL;
+ }
+
+ new_scheduling_period_ns = js_scheduling_period * 1000000;
+
+ /* Update scheduling timeouts */
+ mutex_lock(&kbdev->js_data.runpool_mutex);
+
+ /* If no contexts have been scheduled since js_timeouts was last written
+ * to, the new timeouts might not have been latched yet. So check if an
+ * update is pending and use the new values if necessary. */
+
+ /* Use previous 'new' scheduling period as a base if present. */
+ if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns)
+ old_period = kbdev->js_scheduling_period_ns;
+ else
+ old_period = kbdev->js_data.scheduling_period_ns;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0)
+ ticks = (u64)kbdev->js_soft_stop_ticks * old_period;
+ else
+ ticks = (u64)kbdev->js_data.soft_stop_ticks *
+ kbdev->js_data.scheduling_period_ns;
+ do_div(ticks, new_scheduling_period_ns);
+ kbdev->js_soft_stop_ticks = ticks ? ticks : 1;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0)
+ ticks = (u64)kbdev->js_soft_stop_ticks_cl * old_period;
+ else
+ ticks = (u64)kbdev->js_data.soft_stop_ticks_cl *
+ kbdev->js_data.scheduling_period_ns;
+ do_div(ticks, new_scheduling_period_ns);
+ kbdev->js_soft_stop_ticks_cl = ticks ? ticks : 1;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0)
+ ticks = (u64)kbdev->js_hard_stop_ticks_ss * old_period;
+ else
+ ticks = (u64)kbdev->js_data.hard_stop_ticks_ss *
+ kbdev->js_data.scheduling_period_ns;
+ do_div(ticks, new_scheduling_period_ns);
+ kbdev->js_hard_stop_ticks_ss = ticks ? ticks : 1;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0)
+ ticks = (u64)kbdev->js_hard_stop_ticks_cl * old_period;
+ else
+ ticks = (u64)kbdev->js_data.hard_stop_ticks_cl *
+ kbdev->js_data.scheduling_period_ns;
+ do_div(ticks, new_scheduling_period_ns);
+ kbdev->js_hard_stop_ticks_cl = ticks ? ticks : 1;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0)
+ ticks = (u64)kbdev->js_hard_stop_ticks_dumping * old_period;
+ else
+ ticks = (u64)kbdev->js_data.hard_stop_ticks_dumping *
+ kbdev->js_data.scheduling_period_ns;
+ do_div(ticks, new_scheduling_period_ns);
+ kbdev->js_hard_stop_ticks_dumping = ticks ? ticks : 1;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0)
+ ticks = (u64)kbdev->js_reset_ticks_ss * old_period;
+ else
+ ticks = (u64)kbdev->js_data.gpu_reset_ticks_ss *
+ kbdev->js_data.scheduling_period_ns;
+ do_div(ticks, new_scheduling_period_ns);
+ kbdev->js_reset_ticks_ss = ticks ? ticks : 1;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0)
+ ticks = (u64)kbdev->js_reset_ticks_cl * old_period;
+ else
+ ticks = (u64)kbdev->js_data.gpu_reset_ticks_cl *
+ kbdev->js_data.scheduling_period_ns;
+ do_div(ticks, new_scheduling_period_ns);
+ kbdev->js_reset_ticks_cl = ticks ? ticks : 1;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0)
+ ticks = (u64)kbdev->js_reset_ticks_dumping * old_period;
+ else
+ ticks = (u64)kbdev->js_data.gpu_reset_ticks_dumping *
+ kbdev->js_data.scheduling_period_ns;
+ do_div(ticks, new_scheduling_period_ns);
+ kbdev->js_reset_ticks_dumping = ticks ? ticks : 1;
+
+ kbdev->js_scheduling_period_ns = new_scheduling_period_ns;
+ kbdev->js_timeouts_updated = true;
+
+ mutex_unlock(&kbdev->js_data.runpool_mutex);
+
+ dev_dbg(kbdev->dev, "JS scheduling period: %dms\n",
+ js_scheduling_period);
+
+ return count;
+}
+
+/**
+ * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ * entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the JS scheduling
+ * period.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_js_scheduling_period(struct device *dev,
+ struct device_attribute *attr, char * const buf)
+{
+ struct kbase_device *kbdev;
+ u32 period;
+ ssize_t ret;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0)
+ period = kbdev->js_scheduling_period_ns;
+ else
+ period = kbdev->js_data.scheduling_period_ns;
+
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n",
+ period / 1000000);
+
+ return ret;
+}
+
+static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
+ show_js_scheduling_period, set_js_scheduling_period);
+
+#if !MALI_CUSTOMER_RELEASE
+/** Store callback for the @c force_replay sysfs file.
+ *
+ * @param dev The device with sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The value written to the sysfs file
+ * @param count The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ if (!strncmp("limit=", buf, MIN(6, count))) {
+ int force_replay_limit;
+ int items = sscanf(buf, "limit=%u", &force_replay_limit);
+
+ if (items == 1) {
+ kbdev->force_replay_random = false;
+ kbdev->force_replay_limit = force_replay_limit;
+ kbdev->force_replay_count = 0;
+
+ return count;
+ }
+ } else if (!strncmp("random_limit", buf, MIN(12, count))) {
+ kbdev->force_replay_random = true;
+ kbdev->force_replay_count = 0;
+
+ return count;
+ } else if (!strncmp("norandom_limit", buf, MIN(14, count))) {
+ kbdev->force_replay_random = false;
+ kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED;
+ kbdev->force_replay_count = 0;
+
+ return count;
+ } else if (!strncmp("core_req=", buf, MIN(9, count))) {
+ unsigned int core_req;
+ int items = sscanf(buf, "core_req=%x", &core_req);
+
+ if (items == 1) {
+ kbdev->force_replay_core_req = (base_jd_core_req)core_req;
+
+ return count;
+ }
+ }
+ dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n");
+ return -EINVAL;
+}
+
+/** Show callback for the @c force_replay sysfs file.
+ *
+ * This function is called to get the contents of the @c force_replay sysfs
+ * file. It returns the last set value written to the force_replay sysfs file.
+ * If the file didn't get written yet, the values will be 0.
+ *
+ * @param dev The device this sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_force_replay(struct device *dev,
+ struct device_attribute *attr, char * const buf)
+{
+ struct kbase_device *kbdev;
+ ssize_t ret;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ if (kbdev->force_replay_random)
+ ret = scnprintf(buf, PAGE_SIZE,
+ "limit=0\nrandom_limit\ncore_req=%x\n",
+ kbdev->force_replay_core_req);
+ else
+ ret = scnprintf(buf, PAGE_SIZE,
+ "limit=%u\nnorandom_limit\ncore_req=%x\n",
+ kbdev->force_replay_limit,
+ kbdev->force_replay_core_req);
+
+ if (ret >= PAGE_SIZE) {
+ buf[PAGE_SIZE - 2] = '\n';
+ buf[PAGE_SIZE - 1] = '\0';
+ ret = PAGE_SIZE - 1;
+ }
+
+ return ret;
+}
+
+/** The sysfs file @c force_replay.
+ *
+ */
+static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay,
+ set_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG
+static ssize_t set_js_softstop_always(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ int ret;
+ int softstop_always;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ ret = kstrtoint(buf, 0, &softstop_always);
+ if (ret || ((softstop_always != 0) && (softstop_always != 1))) {
+ dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n"
+ "Use format <soft_stop_always>\n");
+ return -EINVAL;
+ }
+
+ kbdev->js_data.softstop_always = (bool) softstop_always;
+ dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n",
+ (kbdev->js_data.softstop_always) ?
+ "Enabled" : "Disabled");
+ return count;
+}
+
+static ssize_t show_js_softstop_always(struct device *dev,
+ struct device_attribute *attr, char * const buf)
+{
+ struct kbase_device *kbdev;
+ ssize_t ret;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always);
+
+ if (ret >= PAGE_SIZE) {
+ buf[PAGE_SIZE - 2] = '\n';
+ buf[PAGE_SIZE - 1] = '\0';
+ ret = PAGE_SIZE - 1;
+ }
+
+ return ret;
+}
+
+/*
+ * By default, soft-stops are disabled when only a single context is present. The ability to
+ * enable soft-stop when only a single context is present can be used for debug and unit-testing purposes.
+ * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
+ */
+static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG
+typedef void (kbasep_debug_command_func) (struct kbase_device *);
+
+enum kbasep_debug_command_code {
+ KBASEP_DEBUG_COMMAND_DUMPTRACE,
+
+ /* This must be the last enum */
+ KBASEP_DEBUG_COMMAND_COUNT
+};
+
+struct kbasep_debug_command {
+ char *str;
+ kbasep_debug_command_func *func;
+};
+
+/** Debug commands supported by the driver */
+static const struct kbasep_debug_command debug_commands[] = {
+ {
+ .str = "dumptrace",
+ .func = &kbasep_trace_dump,
+ }
+};
+
+/** Show callback for the @c debug_command sysfs file.
+ *
+ * This function is called to get the contents of the @c debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
+ * @param dev The device this sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The output buffer for the sysfs file contents
+ *
+ * @return The number of bytes output to @c buf.
+ */
+static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+{
+ struct kbase_device *kbdev;
+ int i;
+ ssize_t ret = 0;
+
+ kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str);
+
+ if (ret >= PAGE_SIZE) {
+ buf[PAGE_SIZE - 2] = '\n';
+ buf[PAGE_SIZE - 1] = '\0';
+ ret = PAGE_SIZE - 1;
+ }
+
+ return ret;
+}
+
+/** Store callback for the @c debug_command sysfs file.
+ *
+ * This function is called when the @c debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @ref debug_commands to issue the command.
+ *
+ * @param dev The device with sysfs file is for
+ * @param attr The attributes of the sysfs file
+ * @param buf The value written to the sysfs file
+ * @param count The number of bytes written to the sysfs file
+ *
+ * @return @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ int i;
+
+ kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) {
+ if (sysfs_streq(debug_commands[i].str, buf)) {
+ debug_commands[i].func(kbdev);
+ return count;
+ }
+ }
+
+ /* Debug Command not found */
+ dev_err(dev, "debug_command: command not known\n");
+ return -EINVAL;
+}
+
+/** The sysfs file @c debug_command.
+ *
+ * This is used to issue general debug commands to the device driver.
+ * Reading it will produce a list of debug commands, separated by newlines.
+ * Writing to it with one of those commands will issue said command.
+ */
+static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+#endif /* CONFIG_MALI_DEBUG */
+
+/**
+ * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get a description of the present Mali
+ * GPU via the gpuinfo sysfs entry. This includes the GPU family, the
+ * number of cores, the hardware version and the raw product id. For
+ * example:
+ *
+ * Mali-T60x MP4 r0p0 0x6956
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t kbase_show_gpuinfo(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ static const struct gpu_product_id_name {
+ unsigned id;
+ char *name;
+ } gpu_product_id_names[] = {
+ { .id = GPU_ID_PI_T60X, .name = "Mali-T60x" },
+ { .id = GPU_ID_PI_T62X, .name = "Mali-T62x" },
+ { .id = GPU_ID_PI_T72X, .name = "Mali-T72x" },
+ { .id = GPU_ID_PI_T76X, .name = "Mali-T76x" },
+ { .id = GPU_ID_PI_T82X, .name = "Mali-T82x" },
+ { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" },
+ { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" },
+ { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" },
+#ifdef MALI_INCLUDE_TMIX
+ { .id = GPU_ID_PI_TMIX, .name = "Mali-TMIx" },
+#endif /* MALI_INCLUDE_TMIX */
+ };
+ const char *product_name = "(Unknown Mali GPU)";
+ struct kbase_device *kbdev;
+ u32 gpu_id;
+ unsigned product_id;
+ unsigned i;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+ product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+ for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
+ if (gpu_product_id_names[i].id == product_id) {
+ product_name = gpu_product_id_names[i].name;
+ break;
+ }
+ }
+
+ return scnprintf(buf, PAGE_SIZE, "%s MP%d r%dp%d 0x%04X\n",
+ product_name, kbdev->gpu_props.num_cores,
+ (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
+ (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
+ product_id);
+}
+static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+
+/**
+ * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * @dev: The device with sysfs file is for
+ * @attr: The attributes of the sysfs file
+ * @buf: The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the dvfs_period sysfs file is written to. It
+ * checks the data written, and if valid updates the DVFS period variable,
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_dvfs_period(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ int ret;
+ int dvfs_period;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ ret = kstrtoint(buf, 0, &dvfs_period);
+ if (ret || dvfs_period <= 0) {
+ dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n"
+ "Use format <dvfs_period_ms>\n");
+ return -EINVAL;
+ }
+
+ kbdev->pm.dvfs_period = dvfs_period;
+ dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period);
+
+ return count;
+}
+
+/**
+ * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_dvfs_period(struct device *dev,
+ struct device_attribute *attr, char * const buf)
+{
+ struct kbase_device *kbdev;
+ ssize_t ret;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period);
+
+ return ret;
+}
+
+static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
+ set_dvfs_period);
+
+/**
+ * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * @dev: The device with sysfs file is for
+ * @attr: The attributes of the sysfs file
+ * @buf: The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the pm_poweroff sysfs file is written to.
+ *
+ * This file contains three values separated by whitespace. The values
+ * are gpu_poweroff_time (the period of the poweroff timer, in ns),
+ * poweroff_shader_ticks (the number of poweroff timer ticks before an idle
+ * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer
+ * ticks before the GPU is powered off), in that order.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_pm_poweroff(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ int items;
+ s64 gpu_poweroff_time;
+ int poweroff_shader_ticks, poweroff_gpu_ticks;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time,
+ &poweroff_shader_ticks,
+ &poweroff_gpu_ticks);
+ if (items != 3) {
+ dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n"
+ "Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n");
+ return -EINVAL;
+ }
+
+ kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time);
+ kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks;
+ kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks;
+
+ return count;
+}
+
+/**
+ * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current period used for the DVFS sample
+ * timer.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_pm_poweroff(struct device *dev,
+ struct device_attribute *attr, char * const buf)
+{
+ struct kbase_device *kbdev;
+ ssize_t ret;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n",
+ ktime_to_ns(kbdev->pm.gpu_poweroff_time),
+ kbdev->pm.poweroff_shader_ticks,
+ kbdev->pm.poweroff_gpu_ticks);
+
+ return ret;
+}
+
+static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
+ set_pm_poweroff);
+
+/**
+ * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * @dev: The device with sysfs file is for
+ * @attr: The attributes of the sysfs file
+ * @buf: The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the reset_timeout sysfs file is written to. It
+ * checks the data written, and if valid updates the reset timeout.
+ *
+ * Return: @c count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_reset_timeout(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ int ret;
+ int reset_timeout;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ ret = kstrtoint(buf, 0, &reset_timeout);
+ if (ret || reset_timeout <= 0) {
+ dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
+ "Use format <reset_timeout_ms>\n");
+ return -EINVAL;
+ }
+
+ kbdev->reset_timeout_ms = reset_timeout;
+ dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+
+ return count;
+}
+
+/**
+ * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current reset timeout.
+ *
+ * Return: The number of bytes output to buf.
+ */
+static ssize_t show_reset_timeout(struct device *dev,
+ struct device_attribute *attr, char * const buf)
+{
+ struct kbase_device *kbdev;
+ ssize_t ret;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms);
+
+ return ret;
+}
+
+static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
+ set_reset_timeout);
+
+#ifdef CONFIG_MALI_NO_MALI
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+ return 0;
+}
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+}
+#else /* CONFIG_MALI_NO_MALI */
+static int kbase_common_reg_map(struct kbase_device *kbdev)
+{
+ int err = -ENOMEM;
+
+ if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) {
+ dev_err(kbdev->dev, "Register window unavailable\n");
+ err = -EIO;
+ goto out_region;
+ }
+
+ kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+ if (!kbdev->reg) {
+ dev_err(kbdev->dev, "Can't remap register window\n");
+ err = -EINVAL;
+ goto out_ioremap;
+ }
+
+ return 0;
+
+ out_ioremap:
+ release_mem_region(kbdev->reg_start, kbdev->reg_size);
+ out_region:
+ return err;
+}
+
+static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
+{
+ iounmap(kbdev->reg);
+ release_mem_region(kbdev->reg_start, kbdev->reg_size);
+}
+#endif /* CONFIG_MALI_NO_MALI */
+
+
+#ifdef CONFIG_DEBUG_FS
+
+#if KBASE_GPU_RESET_EN
+#include <mali_kbase_hwaccess_jm.h>
+
+static void trigger_quirks_reload(struct kbase_device *kbdev)
+{
+ kbase_pm_context_active(kbdev);
+ if (kbase_prepare_to_reset_gpu(kbdev))
+ kbase_reset_gpu(kbdev);
+ kbase_pm_context_idle(kbdev);
+}
+
+#define MAKE_QUIRK_ACCESSORS(type) \
+static int type##_quirks_set(void *data, u64 val) \
+{ \
+ struct kbase_device *kbdev; \
+ kbdev = (struct kbase_device *)data; \
+ kbdev->hw_quirks_##type = (u32)val; \
+ trigger_quirks_reload(kbdev); \
+ return 0;\
+} \
+\
+static int type##_quirks_get(void *data, u64 *val) \
+{ \
+ struct kbase_device *kbdev;\
+ kbdev = (struct kbase_device *)data;\
+ *val = kbdev->hw_quirks_##type;\
+ return 0;\
+} \
+DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\
+ type##_quirks_set, "%llu\n")
+
+MAKE_QUIRK_ACCESSORS(sc);
+MAKE_QUIRK_ACCESSORS(tiler);
+MAKE_QUIRK_ACCESSORS(mmu);
+
+#endif /* KBASE_GPU_RESET_EN */
+
+static int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+ struct dentry *debugfs_ctx_defaults_directory;
+ int err;
+
+ kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname,
+ NULL);
+ if (!kbdev->mali_debugfs_directory) {
+ dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx",
+ kbdev->mali_debugfs_directory);
+ if (!kbdev->debugfs_ctx_directory) {
+ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ debugfs_ctx_defaults_directory = debugfs_create_dir("defaults",
+ kbdev->debugfs_ctx_directory);
+ if (!debugfs_ctx_defaults_directory) {
+ dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ kbasep_gpu_memory_debugfs_init(kbdev);
+#if KBASE_GPU_RESET_EN
+ debugfs_create_file("quirks_sc", 0644,
+ kbdev->mali_debugfs_directory, kbdev,
+ &fops_sc_quirks);
+ debugfs_create_file("quirks_tiler", 0644,
+ kbdev->mali_debugfs_directory, kbdev,
+ &fops_tiler_quirks);
+ debugfs_create_file("quirks_mmu", 0644,
+ kbdev->mali_debugfs_directory, kbdev,
+ &fops_mmu_quirks);
+#endif /* KBASE_GPU_RESET_EN */
+
+#ifndef CONFIG_MALI_COH_USER
+ debugfs_create_bool("infinite_cache", 0644,
+ debugfs_ctx_defaults_directory,
+ &kbdev->infinite_cache_active_default);
+#endif /* CONFIG_MALI_COH_USER */
+
+#if KBASE_TRACE_ENABLE
+ kbasep_trace_debugfs_init(kbdev);
+#endif /* KBASE_TRACE_ENABLE */
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+ kbasep_trace_timeline_debugfs_init(kbdev);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+ return 0;
+
+out:
+ debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+ return err;
+}
+
+static void kbase_device_debugfs_term(struct kbase_device *kbdev)
+{
+ debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+}
+
+#else /* CONFIG_DEBUG_FS */
+static inline int kbase_device_debugfs_init(struct kbase_device *kbdev)
+{
+ return 0;
+}
+
+static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { }
+#endif /* CONFIG_DEBUG_FS */
+
+
+static int kbase_common_device_init(struct kbase_device *kbdev)
+{
+ int err;
+ struct mali_base_gpu_core_props *core_props;
+ enum {
+ inited_mem = (1u << 0),
+ inited_js = (1u << 1),
+ inited_debug = (1u << 2),
+ inited_js_softstop = (1u << 3),
+ inited_js_timeouts = (1u << 4),
+#if !MALI_CUSTOMER_RELEASE
+ inited_force_replay = (1u << 5),
+#endif /* !MALI_CUSTOMER_RELEASE */
+ inited_pm_runtime_init = (1u << 6),
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+ inited_sc_split = (1u << 7),
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+ inited_timeline = (1u << 8),
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+#ifdef CONFIG_MALI_DEVFREQ
+ inited_devfreq = (1u << 9),
+#endif /* CONFIG_MALI_DEVFREQ */
+#ifdef CONFIG_MALI_MIPE_ENABLED
+ inited_tlstream = (1u << 10),
+#endif /* CONFIG_MALI_MIPE_ENABLED */
+ inited_backend_early = (1u << 11),
+ inited_backend_late = (1u << 12),
+ inited_device = (1u << 13),
+ inited_gpuinfo = (1u << 14),
+ inited_dvfs_period = (1u << 15),
+ inited_pm_poweroff = (1u << 16),
+ inited_reset_timeout = (1u << 17),
+ inited_js_scheduling_period = (1u << 18),
+ inited_vinstr = (1u << 19)
+ };
+
+ int inited = 0;
+#if defined(CONFIG_MALI_PLATFORM_VEXPRESS)
+ u32 ve_logic_tile = 0;
+#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */
+
+ dev_set_drvdata(kbdev->dev, kbdev);
+
+ err = kbase_backend_early_init(kbdev);
+ if (err)
+ goto out_partial;
+ inited |= inited_backend_early;
+
+ scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
+ kbase_dev_nr++);
+
+ kbase_disjoint_init(kbdev);
+
+ /* obtain min/max configured gpu frequencies */
+ core_props = &(kbdev->gpu_props.props.core_props);
+
+ /* For versatile express platforms, min and max values of GPU frequency
+ * depend on the type of the logic tile; these values may not be known
+ * at the build time so in some cases a platform config file with wrong
+ * GPU freguency values may be included; to ensure the correct value of
+ * min and max GPU frequency is obtained, the type of the logic tile is
+ * read from the corresponding register on the platform and frequency
+ * values assigned accordingly.*/
+#if defined(CONFIG_MALI_PLATFORM_VEXPRESS)
+ ve_logic_tile = kbase_get_platform_logic_tile_type();
+
+ switch (ve_logic_tile) {
+ case 0x217:
+ /* Virtex 6, HBI0217 */
+ core_props->gpu_freq_khz_min = VE_VIRTEX6_GPU_FREQ_MIN;
+ core_props->gpu_freq_khz_max = VE_VIRTEX6_GPU_FREQ_MAX;
+ break;
+ case 0x247:
+ /* Virtex 7, HBI0247 */
+ core_props->gpu_freq_khz_min = VE_VIRTEX7_GPU_FREQ_MIN;
+ core_props->gpu_freq_khz_max = VE_VIRTEX7_GPU_FREQ_MAX;
+ break;
+ default:
+ /* all other logic tiles, i.e., Virtex 5 HBI0192
+ * or unsuccessful reading from the platform -
+ * fall back to the config_platform default */
+ core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+ core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+ break;
+ }
+#else
+ core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN;
+ core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
+#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */
+
+ kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US;
+
+ err = kbase_device_init(kbdev);
+ if (err) {
+ dev_err(kbdev->dev, "Can't initialize device (%d)\n", err);
+ goto out_partial;
+ }
+
+ inited |= inited_device;
+
+ kbdev->vinstr_ctx = kbase_vinstr_init(kbdev);
+ if (!kbdev->vinstr_ctx) {
+ dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n");
+ goto out_partial;
+ }
+
+ inited |= inited_vinstr;
+
+ if (kbdev->pm.callback_power_runtime_init) {
+ err = kbdev->pm.callback_power_runtime_init(kbdev);
+ if (err)
+ goto out_partial;
+
+ inited |= inited_pm_runtime_init;
+ }
+
+ err = kbase_mem_init(kbdev);
+ if (err)
+ goto out_partial;
+
+ inited |= inited_mem;
+
+ kbdev->system_coherency = COHERENCY_NONE;
+
+
+ err = kbasep_js_devdata_init(kbdev);
+ if (err)
+ goto out_partial;
+
+ inited |= inited_js;
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+ err = device_create_file(kbdev->dev, &dev_attr_sc_split);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create sc_split sysfs file\n");
+ goto out_partial;
+ }
+
+ inited |= inited_sc_split;
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+#ifdef CONFIG_MALI_DEBUG
+
+ err = device_create_file(kbdev->dev, &dev_attr_debug_command);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create debug_command sysfs file\n");
+ goto out_partial;
+ }
+ inited |= inited_debug;
+
+ err = device_create_file(kbdev->dev, &dev_attr_js_softstop_always);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create js_softstop_always sysfs file\n");
+ goto out_partial;
+ }
+ inited |= inited_js_softstop;
+#endif /* CONFIG_MALI_DEBUG */
+
+ err = device_create_file(kbdev->dev, &dev_attr_js_timeouts);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create js_timeouts sysfs file\n");
+ goto out_partial;
+ }
+ inited |= inited_js_timeouts;
+
+#if !MALI_CUSTOMER_RELEASE
+ err = device_create_file(kbdev->dev, &dev_attr_force_replay);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create force_replay sysfs file\n");
+ goto out_partial;
+ }
+ inited |= inited_force_replay;
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+ err = device_create_file(kbdev->dev, &dev_attr_gpuinfo);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create gpuinfo sysfs file\n");
+ goto out_partial;
+ }
+ inited |= inited_gpuinfo;
+
+ err = device_create_file(kbdev->dev, &dev_attr_dvfs_period);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create dvfs_period sysfs file\n");
+ goto out_partial;
+ }
+ inited |= inited_dvfs_period;
+
+ err = device_create_file(kbdev->dev, &dev_attr_pm_poweroff);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create pm_poweroff sysfs file\n");
+ goto out_partial;
+ }
+ inited |= inited_pm_poweroff;
+
+ err = device_create_file(kbdev->dev, &dev_attr_reset_timeout);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create reset_timeout sysfs file\n");
+ goto out_partial;
+ }
+ inited |= inited_reset_timeout;
+
+ err = device_create_file(kbdev->dev, &dev_attr_js_scheduling_period);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create js_scheduling_period sysfs file\n");
+ goto out_partial;
+ }
+ inited |= inited_js_scheduling_period;
+
+#ifdef CONFIG_MALI_MIPE_ENABLED
+ err = kbase_tlstream_init();
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't initialize timeline stream\n");
+ goto out_partial;
+ }
+ inited |= inited_tlstream;
+#endif /* CONFIG_MALI_MIPE_ENABLED */
+
+ err = kbase_backend_late_init(kbdev);
+ if (err)
+ goto out_partial;
+ inited |= inited_backend_late;
+
+#ifdef CONFIG_MALI_DEVFREQ
+ err = kbase_devfreq_init(kbdev);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't initialize devfreq\n");
+ goto out_partial;
+ }
+ inited |= inited_devfreq;
+#endif /* CONFIG_MALI_DEVFREQ */
+
+#ifdef SECURE_CALLBACKS
+ kbdev->secure_ops = SECURE_CALLBACKS;
+#endif
+
+ err = kbase_device_debugfs_init(kbdev);
+ if (err)
+ goto out_partial;
+
+ /* intialise the kctx list */
+ mutex_init(&kbdev->kctx_list_lock);
+ INIT_LIST_HEAD(&kbdev->kctx_list);
+
+ kbdev->mdev.minor = MISC_DYNAMIC_MINOR;
+ kbdev->mdev.name = kbdev->devname;
+ kbdev->mdev.fops = &kbase_fops;
+ kbdev->mdev.parent = get_device(kbdev->dev);
+
+ err = misc_register(&kbdev->mdev);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't register misc dev %s\n",
+ kbdev->devname);
+ goto out_misc;
+ }
+
+ err = device_create_file(kbdev->dev, &dev_attr_power_policy);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create power_policy sysfs file\n");
+ goto out_file;
+ }
+
+ err = device_create_file(kbdev->dev,
+ &dev_attr_core_availability_policy);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create core_availability_policy sysfs file\n");
+ goto out_file_core_availability_policy;
+ }
+
+ err = device_create_file(kbdev->dev, &dev_attr_core_mask);
+ if (err) {
+ dev_err(kbdev->dev, "Couldn't create core_mask sysfs file\n");
+ goto out_file_core_mask;
+ }
+
+ {
+ const struct list_head *dev_list = kbase_dev_list_get();
+
+ list_add(&kbdev->entry, &kbase_dev_list);
+ kbase_dev_list_put(dev_list);
+ }
+
+ dev_info(kbdev->dev, "Probed as %s\n",
+ dev_name(kbdev->mdev.this_device));
+
+ return 0;
+
+out_file_core_mask:
+ device_remove_file(kbdev->dev, &dev_attr_core_availability_policy);
+out_file_core_availability_policy:
+ device_remove_file(kbdev->dev, &dev_attr_power_policy);
+out_file:
+ misc_deregister(&kbdev->mdev);
+out_misc:
+ put_device(kbdev->dev);
+ kbase_device_debugfs_term(kbdev);
+out_partial:
+ if (inited & inited_vinstr)
+ kbase_vinstr_term(kbdev->vinstr_ctx);
+#ifdef CONFIG_MALI_DEVFREQ
+ if (inited & inited_devfreq)
+ kbase_devfreq_term(kbdev);
+#endif /* CONFIG_MALI_DEVFREQ */
+ if (inited & inited_backend_late)
+ kbase_backend_late_term(kbdev);
+#ifdef CONFIG_MALI_MIPE_ENABLED
+ if (inited & inited_tlstream)
+ kbase_tlstream_term();
+#endif /* CONFIG_MALI_MIPE_ENABLED */
+
+ if (inited & inited_js_scheduling_period)
+ device_remove_file(kbdev->dev, &dev_attr_js_scheduling_period);
+ if (inited & inited_reset_timeout)
+ device_remove_file(kbdev->dev, &dev_attr_reset_timeout);
+ if (inited & inited_pm_poweroff)
+ device_remove_file(kbdev->dev, &dev_attr_pm_poweroff);
+ if (inited & inited_dvfs_period)
+ device_remove_file(kbdev->dev, &dev_attr_dvfs_period);
+#if !MALI_CUSTOMER_RELEASE
+ if (inited & inited_force_replay)
+ device_remove_file(kbdev->dev, &dev_attr_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+ if (inited & inited_js_timeouts)
+ device_remove_file(kbdev->dev, &dev_attr_js_timeouts);
+#ifdef CONFIG_MALI_DEBUG
+ if (inited & inited_js_softstop)
+ device_remove_file(kbdev->dev, &dev_attr_js_softstop_always);
+
+ if (inited & inited_debug)
+ device_remove_file(kbdev->dev, &dev_attr_debug_command);
+
+#endif /* CONFIG_MALI_DEBUG */
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+ if (inited & inited_sc_split)
+ device_remove_file(kbdev->dev, &dev_attr_sc_split);
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+
+ if (inited & inited_gpuinfo)
+ device_remove_file(kbdev->dev, &dev_attr_gpuinfo);
+
+ if (inited & inited_js)
+ kbasep_js_devdata_halt(kbdev);
+
+ if (inited & inited_mem)
+ kbase_mem_halt(kbdev);
+
+ if (inited & inited_js)
+ kbasep_js_devdata_term(kbdev);
+
+ if (inited & inited_mem)
+ kbase_mem_term(kbdev);
+
+ if (inited & inited_pm_runtime_init) {
+ if (kbdev->pm.callback_power_runtime_term)
+ kbdev->pm.callback_power_runtime_term(kbdev);
+ }
+
+ if (inited & inited_device)
+ kbase_device_term(kbdev);
+
+ if (inited & inited_backend_early)
+ kbase_backend_early_term(kbdev);
+
+ return err;
+}
+
+
+static int kbase_platform_device_probe(struct platform_device *pdev)
+{
+ struct kbase_device *kbdev;
+ struct resource *reg_res;
+ int err;
+ int i;
+
+#ifdef CONFIG_OF
+ err = kbase_platform_early_init();
+ if (err) {
+ dev_err(&pdev->dev, "Early platform initialization failed\n");
+ return err;
+ }
+#endif
+
+ kbdev = kbase_device_alloc();
+ if (!kbdev) {
+ dev_err(&pdev->dev, "Can't allocate device\n");
+ err = -ENOMEM;
+ goto out;
+ }
+#ifdef CONFIG_MALI_NO_MALI
+ err = gpu_device_create(kbdev);
+ if (err) {
+ dev_err(&pdev->dev, "Can't initialize dummy model\n");
+ goto out_midg;
+ }
+#endif /* CONFIG_MALI_NO_MALI */
+
+ kbdev->dev = &pdev->dev;
+
+ /* 3 IRQ resources */
+ for (i = 0; i < 3; i++) {
+ struct resource *irq_res;
+ int irqtag;
+
+ irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+ if (!irq_res) {
+ dev_err(kbdev->dev, "No IRQ resource at index %d\n", i);
+ err = -ENOENT;
+ goto out_platform_irq;
+ }
+
+#ifdef CONFIG_OF
+ if (!strcmp(irq_res->name, "JOB")) {
+ irqtag = JOB_IRQ_TAG;
+ } else if (!strcmp(irq_res->name, "MMU")) {
+ irqtag = MMU_IRQ_TAG;
+ } else if (!strcmp(irq_res->name, "GPU")) {
+ irqtag = GPU_IRQ_TAG;
+ } else {
+ dev_err(&pdev->dev, "Invalid irq res name: '%s'\n",
+ irq_res->name);
+ err = -EINVAL;
+ goto out_irq_name;
+ }
+#else
+ irqtag = i;
+#endif /* CONFIG_OF */
+ kbdev->irqs[irqtag].irq = irq_res->start;
+ kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK);
+ }
+
+ /* the first memory resource is the physical address of the GPU registers */
+ reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!reg_res) {
+ dev_err(kbdev->dev, "Invalid register resource\n");
+ err = -ENOENT;
+ goto out_platform_mem;
+ }
+
+ kbdev->reg_start = reg_res->start;
+ kbdev->reg_size = resource_size(reg_res);
+
+ err = kbase_common_reg_map(kbdev);
+ if (err)
+ goto out_reg_map;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+ && defined(CONFIG_REGULATOR)
+ kbdev->regulator = regulator_get_optional(kbdev->dev, "mali");
+ if (IS_ERR_OR_NULL(kbdev->regulator)) {
+ dev_info(kbdev->dev, "Continuing without Mali regulator control\n");
+ kbdev->regulator = NULL;
+ /* Allow probe to continue without regulator */
+ }
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+ pm_runtime_enable(kbdev->dev);
+#endif
+ kbdev->clock = clk_get(kbdev->dev, "clk_mali");
+ if (IS_ERR_OR_NULL(kbdev->clock)) {
+ dev_info(kbdev->dev, "Continuing without Mali clock control\n");
+ kbdev->clock = NULL;
+ /* Allow probe to continue without clock. */
+ } else {
+ err = clk_prepare_enable(kbdev->clock);
+ if (err) {
+ dev_err(kbdev->dev,
+ "Failed to prepare and enable clock (%d)\n", err);
+ goto out_clock_prepare;
+ }
+ }
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \
+ && defined(CONFIG_PM_OPP)
+ /* Register the OPPs if they are available in device tree */
+ if (of_init_opp_table(kbdev->dev) < 0)
+ dev_dbg(kbdev->dev, "OPP table not found\n");
+#endif
+
+
+ err = kbase_common_device_init(kbdev);
+ if (err) {
+ dev_err(kbdev->dev, "Failed kbase_common_device_init\n");
+ goto out_common_init;
+ }
+ return 0;
+
+out_common_init:
+ clk_disable_unprepare(kbdev->clock);
+out_clock_prepare:
+ clk_put(kbdev->clock);
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+ pm_runtime_disable(kbdev->dev);
+#endif
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+ && defined(CONFIG_REGULATOR)
+ regulator_put(kbdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+ kbase_common_reg_unmap(kbdev);
+out_reg_map:
+out_platform_mem:
+#ifdef CONFIG_OF
+out_irq_name:
+#endif
+out_platform_irq:
+#ifdef CONFIG_MALI_NO_MALI
+ gpu_device_destroy(kbdev);
+out_midg:
+#endif /* CONFIG_MALI_NO_MALI */
+ kbase_device_free(kbdev);
+out:
+ return err;
+}
+
+static int kbase_common_device_remove(struct kbase_device *kbdev)
+{
+ kbase_vinstr_term(kbdev->vinstr_ctx);
+#ifdef CONFIG_DEBUG_FS
+ debugfs_remove_recursive(kbdev->mali_debugfs_directory);
+#endif
+#ifdef CONFIG_MALI_DEVFREQ
+ kbase_devfreq_term(kbdev);
+#endif
+
+ kbase_backend_late_term(kbdev);
+
+ if (kbdev->pm.callback_power_runtime_term)
+ kbdev->pm.callback_power_runtime_term(kbdev);
+#ifdef CONFIG_MALI_PLATFORM_DEVICETREE
+ pm_runtime_disable(kbdev->dev);
+#endif
+ device_remove_file(kbdev->dev, &dev_attr_js_scheduling_period);
+ device_remove_file(kbdev->dev, &dev_attr_reset_timeout);
+ device_remove_file(kbdev->dev, &dev_attr_pm_poweroff);
+ device_remove_file(kbdev->dev, &dev_attr_dvfs_period);
+ device_remove_file(kbdev->dev, &dev_attr_power_policy);
+ device_remove_file(kbdev->dev, &dev_attr_core_availability_policy);
+ device_remove_file(kbdev->dev, &dev_attr_core_mask);
+
+#ifdef CONFIG_MALI_MIPE_ENABLED
+ kbase_tlstream_term();
+#endif /* CONFIG_MALI_MIPE_ENABLED */
+
+#ifdef CONFIG_MALI_DEBUG
+ device_remove_file(kbdev->dev, &dev_attr_js_softstop_always);
+ device_remove_file(kbdev->dev, &dev_attr_debug_command);
+#endif /* CONFIG_MALI_DEBUG */
+ device_remove_file(kbdev->dev, &dev_attr_js_timeouts);
+#if !MALI_CUSTOMER_RELEASE
+ device_remove_file(kbdev->dev, &dev_attr_force_replay);
+#endif /* !MALI_CUSTOMER_RELEASE */
+
+#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS
+ device_remove_file(kbdev->dev, &dev_attr_sc_split);
+#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */
+ device_remove_file(kbdev->dev, &dev_attr_gpuinfo);
+
+ kbasep_js_devdata_halt(kbdev);
+ kbase_mem_halt(kbdev);
+
+ kbasep_js_devdata_term(kbdev);
+ kbase_mem_term(kbdev);
+ kbase_backend_early_term(kbdev);
+
+ {
+ const struct list_head *dev_list = kbase_dev_list_get();
+
+ list_del(&kbdev->entry);
+ kbase_dev_list_put(dev_list);
+ }
+ misc_deregister(&kbdev->mdev);
+ put_device(kbdev->dev);
+ kbase_common_reg_unmap(kbdev);
+ kbase_device_term(kbdev);
+ if (kbdev->clock) {
+ clk_disable_unprepare(kbdev->clock);
+ clk_put(kbdev->clock);
+ kbdev->clock = NULL;
+ }
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \
+ && defined(CONFIG_REGULATOR)
+ regulator_put(kbdev->regulator);
+#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */
+#ifdef CONFIG_MALI_NO_MALI
+ gpu_device_destroy(kbdev);
+#endif /* CONFIG_MALI_NO_MALI */
+ kbase_device_free(kbdev);
+
+ return 0;
+}
+
+static int kbase_platform_device_remove(struct platform_device *pdev)
+{
+ struct kbase_device *kbdev = to_kbase_device(&pdev->dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ return kbase_common_device_remove(kbdev);
+}
+
+/** Suspend callback from the OS.
+ *
+ * This is called by Linux when the device should suspend.
+ *
+ * @param dev The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_suspend(struct device *dev)
+{
+ struct kbase_device *kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+ devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+ kbase_pm_suspend(kbdev);
+ return 0;
+}
+
+/** Resume callback from the OS.
+ *
+ * This is called by Linux when the device should resume from suspension.
+ *
+ * @param dev The device to resume
+ *
+ * @return A standard Linux error code
+ */
+static int kbase_device_resume(struct device *dev)
+{
+ struct kbase_device *kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ kbase_pm_resume(kbdev);
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+ devfreq_resume_device(kbdev->devfreq);
+#endif
+ return 0;
+}
+
+/** Runtime suspend callback from the OS.
+ *
+ * This is called by Linux when the device should prepare for a condition in which it will
+ * not be able to communicate with the CPU(s) and RAM due to power management.
+ *
+ * @param dev The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+#ifdef CONFIG_PM_RUNTIME
+static int kbase_device_runtime_suspend(struct device *dev)
+{
+ struct kbase_device *kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+ devfreq_suspend_device(kbdev->devfreq);
+#endif
+
+ if (kbdev->pm.backend.callback_power_runtime_off) {
+ kbdev->pm.backend.callback_power_runtime_off(kbdev);
+ dev_dbg(dev, "runtime suspend\n");
+ }
+ return 0;
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+/** Runtime resume callback from the OS.
+ *
+ * This is called by Linux when the device should go into a fully active state.
+ *
+ * @param dev The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef CONFIG_PM_RUNTIME
+int kbase_device_runtime_resume(struct device *dev)
+{
+ int ret = 0;
+ struct kbase_device *kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ if (kbdev->pm.backend.callback_power_runtime_on) {
+ ret = kbdev->pm.backend.callback_power_runtime_on(kbdev);
+ dev_dbg(dev, "runtime resume\n");
+ }
+
+#if defined(CONFIG_PM_DEVFREQ) && \
+ (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+ devfreq_resume_device(kbdev->devfreq);
+#endif
+
+ return ret;
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+/** Runtime idle callback from the OS.
+ *
+ * This is called by Linux when the device appears to be inactive and it might be
+ * placed into a low power state
+ *
+ * @param dev The device to suspend
+ *
+ * @return A standard Linux error code
+ */
+
+#ifdef CONFIG_PM_RUNTIME
+static int kbase_device_runtime_idle(struct device *dev)
+{
+ /* Avoid pm_runtime_suspend being called */
+ return 1;
+}
+#endif /* CONFIG_PM_RUNTIME */
+
+/** The power management operations for the platform driver.
+ */
+static const struct dev_pm_ops kbase_pm_ops = {
+ .suspend = kbase_device_suspend,
+ .resume = kbase_device_resume,
+#ifdef CONFIG_PM_RUNTIME
+ .runtime_suspend = kbase_device_runtime_suspend,
+ .runtime_resume = kbase_device_runtime_resume,
+ .runtime_idle = kbase_device_runtime_idle,
+#endif /* CONFIG_PM_RUNTIME */
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id kbase_dt_ids[] = {
+ { .compatible = "arm,malit6xx" },
+ { .compatible = "arm,mali-midgard" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, kbase_dt_ids);
+#endif
+
+static struct platform_driver kbase_platform_driver = {
+ .probe = kbase_platform_device_probe,
+ .remove = kbase_platform_device_remove,
+ .driver = {
+ .name = kbase_drv_name,
+ .owner = THIS_MODULE,
+ .pm = &kbase_pm_ops,
+ .of_match_table = of_match_ptr(kbase_dt_ids),
+ },
+};
+
+/*
+ * The driver will not provide a shortcut to create the Mali platform device
+ * anymore when using Device Tree.
+ */
+#ifdef CONFIG_OF
+module_platform_driver(kbase_platform_driver);
+#else
+
+static int __init kbase_driver_init(void)
+{
+ int ret;
+
+ ret = kbase_platform_early_init();
+ if (ret)
+ return ret;
+
+#ifndef CONFIG_MACH_MANTA
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+ ret = kbase_platform_fake_register();
+ if (ret)
+ return ret;
+#endif
+#endif
+ ret = platform_driver_register(&kbase_platform_driver);
+#ifndef CONFIG_MACH_MANTA
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+ if (ret)
+ kbase_platform_fake_unregister();
+#endif
+#endif
+ return ret;
+}
+
+static void __exit kbase_driver_exit(void)
+{
+ platform_driver_unregister(&kbase_platform_driver);
+#ifndef CONFIG_MACH_MANTA
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+ kbase_platform_fake_unregister();
+#endif
+#endif
+}
+
+module_init(kbase_driver_init);
+module_exit(kbase_driver_exit);
+
+#endif /* CONFIG_OF */
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
+ __stringify(BASE_UK_VERSION_MAJOR) "." \
+ __stringify(BASE_UK_VERSION_MINOR) ")");
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE)
+#define CREATE_TRACE_POINTS
+#endif
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+/* Create the trace points (otherwise we just get code to call a tracepoint) */
+#include "mali_linux_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counter);
+
+void kbase_trace_mali_pm_status(u32 event, u64 value)
+{
+ trace_mali_pm_status(event, value);
+}
+
+void kbase_trace_mali_pm_power_off(u32 event, u64 value)
+{
+ trace_mali_pm_power_off(event, value);
+}
+
+void kbase_trace_mali_pm_power_on(u32 event, u64 value)
+{
+ trace_mali_pm_power_on(event, value);
+}
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id)
+{
+ trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id);
+}
+
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value)
+{
+ trace_mali_page_fault_insert_pages(event, value);
+}
+
+void kbase_trace_mali_mmu_as_in_use(int event)
+{
+ trace_mali_mmu_as_in_use(event);
+}
+
+void kbase_trace_mali_mmu_as_released(int event)
+{
+ trace_mali_mmu_as_released(event);
+}
+
+void kbase_trace_mali_total_alloc_pages_change(long long int event)
+{
+ trace_mali_total_alloc_pages_change(event);
+}
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+#ifdef CONFIG_MALI_SYSTEM_TRACE
+#include "mali_linux_kbase_trace.h"
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c
new file mode 100644
index 00000000000..5b62539dfba
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c
@@ -0,0 +1,125 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include "mali_kbase.h"
+#ifdef BASE_LEGACY_UK7_SUPPORT
+
+#include "mali_kbase_cpuprops.h"
+#include "mali_kbase_uku.h"
+#include <mali_kbase_config.h>
+#include <mali_kbase_config_defaults.h>
+#include <linux/cache.h>
+#include <linux/cpufreq.h>
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+#include <asm/cputype.h>
+#endif
+
+#define KBASE_DEFAULT_CPU_NUM 0
+
+#define L1_DCACHE_LINE_SIZE_LOG2 L1_CACHE_SHIFT
+
+/*
+ * Macros used to extract cpu id info
+ * see documentation for Main ID register
+ */
+#define KBASE_CPUPROPS_ID_GET_REV(cpuid) ((cpuid) & 0x0F) /* [3:0] Revision */
+#define KBASE_CPUPROPS_ID_GET_PART_NR(cpuid)(((cpuid) >> 4) & 0xFFF) /* [15:4] Part number */
+#define KBASE_CPUPROPS_ID_GET_ARCH(cpuid) (((cpuid) >> 16) & 0x0F) /* [19:16] Architecture */
+#define KBASE_CPUPROPS_ID_GET_VARIANT(cpuid)(((cpuid) >> 20) & 0x0F) /* [23:20] Variant */
+#define KBASE_CPUPROPS_ID_GET_CODE(cpuid) (((cpuid) >> 24) & 0xFF) /* [31:23] ASCII code of implementer trademark */
+
+/*Below value sourced from OSK*/
+#define L1_DCACHE_SIZE ((u32)0x00008000)
+
+/**
+ * kbasep_cpuprops_uk_get_cpu_id_info - Retrieves detailed CPU info from given
+ * cpu_val ( ID reg )
+ * @kbase_props: CPU props to be filled-in with cpu id info
+ *
+ */
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+static void kbasep_cpuprops_uk_get_cpu_id_info(struct kbase_uk_cpuprops * const kbase_props)
+{
+ kbase_props->props.cpu_id.id = read_cpuid_id();
+
+ kbase_props->props.cpu_id.valid = 1;
+ kbase_props->props.cpu_id.rev = KBASE_CPUPROPS_ID_GET_REV(kbase_props->props.cpu_id.id);
+ kbase_props->props.cpu_id.part = KBASE_CPUPROPS_ID_GET_PART_NR(kbase_props->props.cpu_id.id);
+ kbase_props->props.cpu_id.arch = KBASE_CPUPROPS_ID_GET_ARCH(kbase_props->props.cpu_id.id);
+ kbase_props->props.cpu_id.variant = KBASE_CPUPROPS_ID_GET_VARIANT(kbase_props->props.cpu_id.id);
+ kbase_props->props.cpu_id.implementer = KBASE_CPUPROPS_ID_GET_CODE(kbase_props->props.cpu_id.id);
+}
+#else
+static void kbasep_cpuprops_uk_get_cpu_id_info(struct kbase_uk_cpuprops * const kbase_props)
+{
+ kbase_props->props.cpu_id.id = 0;
+ kbase_props->props.cpu_id.valid = 0;
+ kbase_props->props.cpu_id.rev = 0;
+ kbase_props->props.cpu_id.part = 0;
+ kbase_props->props.cpu_id.arch = 0;
+ kbase_props->props.cpu_id.variant = 0;
+ kbase_props->props.cpu_id.implementer = 'N';
+}
+#endif
+
+/*
+ * This function (and file!) is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+
+int kbase_cpuprops_uk_get_props(struct kbase_context *kctx,
+ struct kbase_uk_cpuprops * const props)
+{
+ unsigned int max_cpu_freq;
+
+ props->props.cpu_l1_dcache_line_size_log2 = L1_DCACHE_LINE_SIZE_LOG2;
+ props->props.cpu_l1_dcache_size = L1_DCACHE_SIZE;
+ props->props.cpu_flags = BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN;
+
+ props->props.nr_cores = num_possible_cpus();
+ props->props.cpu_page_size_log2 = PAGE_SHIFT;
+ props->props.available_memory_size = totalram_pages << PAGE_SHIFT;
+
+ kbasep_cpuprops_uk_get_cpu_id_info(props);
+
+ /* check if kernel supports dynamic frequency scaling */
+ max_cpu_freq = cpufreq_quick_get_max(KBASE_DEFAULT_CPU_NUM);
+ if (max_cpu_freq != 0) {
+ /* convert from kHz to mHz */
+ props->props.max_cpu_clock_speed_mhz = max_cpu_freq / 1000;
+ } else {
+ /* fallback if CONFIG_CPU_FREQ turned off */
+ int err;
+ kbase_cpu_clk_speed_func get_clock_speed;
+
+ get_clock_speed = (kbase_cpu_clk_speed_func) CPU_SPEED_FUNC;
+ err = get_clock_speed(&props->props.max_cpu_clock_speed_mhz);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+#endif /* BASE_LEGACY_UK7_SUPPORT */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h
new file mode 100644
index 00000000000..daa46e502e2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#ifdef BASE_LEGACY_UK7_SUPPORT
+
+/**
+ * @file mali_kbase_cpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_CPUPROPS_H_
+#define _KBASE_CPUPROPS_H_
+
+/* Forward declarations */
+struct kbase_uk_cpuprops;
+
+/**
+ * This file is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+
+/**
+ * @brief Provides CPU properties data.
+ *
+ * Fill the struct kbase_uk_cpuprops with values from CPU configuration.
+ *
+ * @param kctx The kbase context
+ * @param kbase_props A copy of the struct kbase_uk_cpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_cpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_cpuprops * const kbase_props);
+
+#endif /*_KBASE_CPUPROPS_H_*/
+#endif /* BASE_LEGACY_UK7_SUPPORT */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.c b/drivers/gpu/arm/midgard/mali_kbase_debug.c
new file mode 100644
index 00000000000..fb57ac2e31a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug.c
@@ -0,0 +1,39 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = {
+ NULL,
+ NULL
+};
+
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param)
+{
+ kbasep_debug_assert_registered_cb.func = func;
+ kbasep_debug_assert_registered_cb.param = param;
+}
+
+void kbasep_debug_assert_call_hook(void)
+{
+ if (kbasep_debug_assert_registered_cb.func != NULL)
+ kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param);
+}
+KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.h b/drivers/gpu/arm/midgard/mali_kbase_debug.h
new file mode 100644
index 00000000000..5fff2892bb5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug.h
@@ -0,0 +1,164 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_DEBUG_H
+#define _KBASE_DEBUG_H
+
+#include <linux/bug.h>
+
+/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */
+#define KBASE_DEBUG_SKIP_TRACE 0
+
+/** @brief If different from 0, the trace will only contain the file and line. */
+#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0
+
+/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */
+#ifndef KBASE_DEBUG_DISABLE_ASSERTS
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_DISABLE_ASSERTS 0
+#else
+#define KBASE_DEBUG_DISABLE_ASSERTS 1
+#endif
+#endif /* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */
+typedef void (kbase_debug_assert_hook) (void *);
+
+struct kbasep_debug_assert_cb {
+ kbase_debug_assert_hook *func;
+ void *param;
+};
+
+/**
+ * @def KBASEP_DEBUG_PRINT_TRACE
+ * @brief Private macro containing the format of the trace to display before every message
+ * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME
+ */
+#if !KBASE_DEBUG_SKIP_TRACE
+#define KBASEP_DEBUG_PRINT_TRACE \
+ "In file: " __FILE__ " line: " CSTD_STR2(__LINE__)
+#if !KBASE_DEBUG_SKIP_FUNCTION_NAME
+#define KBASEP_DEBUG_PRINT_FUNCTION __func__
+#else
+#define KBASEP_DEBUG_PRINT_FUNCTION ""
+#endif
+#else
+#define KBASEP_DEBUG_PRINT_TRACE ""
+#endif
+
+/**
+ * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)
+ * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event.
+ * @param trace location in the code from where the message is printed
+ * @param function function from where the message is printed
+ * @param ... Format string followed by format arguments.
+ * @note function parameter cannot be concatenated with other strings
+ */
+/* Select the correct system output function*/
+#ifdef CONFIG_MALI_DEBUG
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\
+ do { \
+ pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\
+ pr_err(__VA_ARGS__);\
+ pr_err("\n");\
+ } while (false)
+#else
+#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP()
+#endif
+
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook()
+#else
+#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP()
+#endif
+
+/**
+ * @def KBASE_DEBUG_ASSERT(expr)
+ * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ */
+#define KBASE_DEBUG_ASSERT(expr) \
+ KBASE_DEBUG_ASSERT_MSG(expr, #expr)
+
+#if KBASE_DEBUG_DISABLE_ASSERTS
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP()
+#else
+ /**
+ * @def KBASE_DEBUG_ASSERT_MSG(expr, ...)
+ * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false
+ *
+ * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1
+ *
+ * @param expr Boolean expression
+ * @param ... Message to display when @a expr is false, as a format string followed by format arguments.
+ */
+#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \
+ do { \
+ if (!(expr)) { \
+ KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\
+ KBASE_CALL_ASSERT_HOOK();\
+ BUG();\
+ } \
+ } while (false)
+#endif /* KBASE_DEBUG_DISABLE_ASSERTS */
+
+/**
+ * @def KBASE_DEBUG_CODE( X )
+ * @brief Executes the code inside the macro only in debug mode
+ *
+ * @param X Code to compile only in debug mode.
+ */
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_DEBUG_CODE(X) X
+#else
+#define KBASE_DEBUG_CODE(X) CSTD_NOP()
+#endif /* CONFIG_MALI_DEBUG */
+
+/** @} */
+
+/**
+ * @brief Register a function to call on ASSERT
+ *
+ * Such functions will \b only be called during Debug mode, and for debugging
+ * features \b only. Do not rely on them to be called in general use.
+ *
+ * To disable the hook, supply NULL to \a func.
+ *
+ * @note This function is not thread-safe, and should only be used to
+ * register/deregister once in the module's lifetime.
+ *
+ * @param[in] func the function to call when an assert is triggered.
+ * @param[in] param the parameter to pass to \a func when calling it
+ */
+void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param);
+
+/**
+ * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook()
+ *
+ * @note This function is not thread-safe with respect to multiple threads
+ * registering functions and parameters with
+ * kbase_debug_assert_register_hook(). Otherwise, thread safety is the
+ * responsibility of the registered hook.
+ */
+void kbasep_debug_assert_call_hook(void);
+
+#endif /* _KBASE_DEBUG_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
new file mode 100644
index 00000000000..1a3198e5b53
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c
@@ -0,0 +1,251 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Debugfs interface to dump the memory visible to the GPU
+ */
+
+#include "mali_kbase_debug_mem_view.h"
+#include "mali_kbase.h"
+
+#include <linux/list.h>
+#include <linux/file.h>
+
+#if CONFIG_DEBUG_FS
+
+struct debug_mem_mapping {
+ struct list_head node;
+
+ struct kbase_mem_phy_alloc *alloc;
+ unsigned long flags;
+
+ u64 start_pfn;
+ size_t nr_pages;
+};
+
+struct debug_mem_data {
+ struct list_head mapping_list;
+ struct kbase_context *kctx;
+};
+
+struct debug_mem_seq_off {
+ struct list_head *lh;
+ size_t offset;
+};
+
+static void *debug_mem_start(struct seq_file *m, loff_t *_pos)
+{
+ struct debug_mem_data *mem_data = m->private;
+ struct debug_mem_seq_off *data;
+ struct debug_mem_mapping *map;
+ loff_t pos = *_pos;
+
+ list_for_each_entry(map, &mem_data->mapping_list, node) {
+ if (pos >= map->nr_pages) {
+ pos -= map->nr_pages;
+ } else {
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return NULL;
+ data->lh = &map->node;
+ data->offset = pos;
+ return data;
+ }
+ }
+
+ /* Beyond the end */
+ return NULL;
+}
+
+static void debug_mem_stop(struct seq_file *m, void *v)
+{
+ kfree(v);
+}
+
+static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct debug_mem_data *mem_data = m->private;
+ struct debug_mem_seq_off *data = v;
+ struct debug_mem_mapping *map;
+
+ map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+ if (data->offset < map->nr_pages - 1) {
+ data->offset++;
+ ++*pos;
+ return data;
+ }
+
+ if (list_is_last(data->lh, &mem_data->mapping_list))
+ return NULL;
+
+ data->lh = data->lh->next;
+ data->offset = 0;
+ ++*pos;
+
+ return data;
+}
+
+static int debug_mem_show(struct seq_file *m, void *v)
+{
+ struct debug_mem_data *mem_data = m->private;
+ struct debug_mem_seq_off *data = v;
+ struct debug_mem_mapping *map;
+ int i, j;
+ struct page *page;
+ uint32_t *mapping;
+ pgprot_t prot = PAGE_KERNEL;
+
+ map = list_entry(data->lh, struct debug_mem_mapping, node);
+
+ kbase_gpu_vm_lock(mem_data->kctx);
+
+ if (data->offset >= map->alloc->nents) {
+ seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn +
+ data->offset) << PAGE_SHIFT);
+ goto out;
+ }
+
+ if (!(map->flags & KBASE_REG_CPU_CACHED))
+ prot = pgprot_writecombine(prot);
+
+ page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset]));
+ mapping = vmap(&page, 1, VM_MAP, prot);
+
+ for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
+ seq_printf(m, "%016llx:", i + ((map->start_pfn +
+ data->offset) << PAGE_SHIFT));
+
+ for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
+ seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
+ seq_putc(m, '\n');
+ }
+
+ vunmap(mapping);
+
+ seq_putc(m, '\n');
+
+out:
+ kbase_gpu_vm_unlock(mem_data->kctx);
+ return 0;
+}
+
+static const struct seq_operations ops = {
+ .start = debug_mem_start,
+ .next = debug_mem_next,
+ .stop = debug_mem_stop,
+ .show = debug_mem_show,
+};
+
+static int debug_mem_open(struct inode *i, struct file *file)
+{
+ struct file *kctx_file = i->i_private;
+ struct kbase_context *kctx = kctx_file->private_data;
+ struct rb_node *p;
+ struct debug_mem_data *mem_data;
+ int ret;
+
+ ret = seq_open(file, &ops);
+
+ if (ret)
+ return ret;
+
+ mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL);
+ mem_data->kctx = kctx;
+
+ INIT_LIST_HEAD(&mem_data->mapping_list);
+
+ get_file(kctx_file);
+
+ kbase_gpu_vm_lock(kctx);
+
+ for (p = rb_first(&kctx->reg_rbtree); p; p = rb_next(p)) {
+ struct kbase_va_region *reg;
+ struct debug_mem_mapping *mapping;
+
+ reg = rb_entry(p, struct kbase_va_region, rblink);
+
+ if (reg->gpu_alloc == NULL)
+ /* Empty region - ignore */
+ continue;
+
+ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
+
+ mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+ mapping->start_pfn = reg->start_pfn;
+ mapping->nr_pages = reg->nr_pages;
+ mapping->flags = reg->flags;
+ list_add_tail(&mapping->node, &mem_data->mapping_list);
+ }
+
+ kbase_gpu_vm_unlock(kctx);
+
+ ((struct seq_file *)file->private_data)->private = mem_data;
+
+ return 0;
+}
+
+static int debug_mem_release(struct inode *inode, struct file *file)
+{
+ struct file *kctx_file = inode->i_private;
+ struct seq_file *sfile = file->private_data;
+ struct debug_mem_data *mem_data = sfile->private;
+ struct debug_mem_mapping *mapping;
+
+ seq_release(inode, file);
+
+ while (!list_empty(&mem_data->mapping_list)) {
+ mapping = list_first_entry(&mem_data->mapping_list,
+ struct debug_mem_mapping, node);
+ kbase_mem_phy_alloc_put(mapping->alloc);
+ list_del(&mapping->node);
+ kfree(mapping);
+ }
+
+ kfree(mem_data);
+
+ fput(kctx_file);
+
+ return 0;
+}
+
+static const struct file_operations kbase_debug_mem_view_fops = {
+ .open = debug_mem_open,
+ .release = debug_mem_release,
+ .read = seq_read,
+ .llseek = seq_lseek
+};
+
+/**
+ * kbase_debug_mem_view_init - Initialise the mem_view sysfs file
+ * @kctx_file: The /dev/mali0 file instance for the context
+ *
+ * This function creates a "mem_view" file which can be used to get a view of
+ * the context's memory as the GPU sees it (i.e. using the GPU's page tables).
+ *
+ * The file is cleaned up by a call to debugfs_remove_recursive() deleting the
+ * parent directory.
+ */
+void kbase_debug_mem_view_init(struct file *kctx_file)
+{
+ struct kbase_context *kctx = kctx_file->private_data;
+
+ debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file,
+ &kbase_debug_mem_view_fops);
+}
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
new file mode 100644
index 00000000000..20ab51a776c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h
@@ -0,0 +1,25 @@
+/*
+ *
+ * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_DEBUG_MEM_VIEW_H
+#define _KBASE_DEBUG_MEM_VIEW_H
+
+#include <mali_kbase.h>
+
+void kbase_debug_mem_view_init(struct file *kctx_file);
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h
new file mode 100644
index 00000000000..8b107f20cc4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h
@@ -0,0 +1,1126 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_defs.h
+ *
+ * Defintions (types, defines, etcs) common to Kbase. They are placed here to
+ * allow the hierarchy of header files to work.
+ */
+
+#ifndef _KBASE_DEFS_H_
+#define _KBASE_DEFS_H_
+
+#include <mali_kbase_config.h>
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_kbase_mem_lowlevel.h>
+#include <mali_kbase_mem_alloc.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_mmu_mode.h>
+#include <mali_kbase_instr.h>
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+
+#ifdef CONFIG_KDS
+#include <linux/kds.h>
+#endif /* CONFIG_KDS */
+
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#endif /* CONFIG_SYNC */
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif /* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_PM_DEVFREQ
+#include <linux/devfreq.h>
+#endif /* CONFIG_DEVFREQ */
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+/** Enable SW tracing when set */
+#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE
+#define KBASE_TRACE_ENABLE 1
+#endif
+
+#ifndef KBASE_TRACE_ENABLE
+#ifdef CONFIG_MALI_DEBUG
+#define KBASE_TRACE_ENABLE 1
+#else
+#define KBASE_TRACE_ENABLE 0
+#endif /* CONFIG_MALI_DEBUG */
+#endif /* KBASE_TRACE_ENABLE */
+
+/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */
+#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1
+
+/**
+ * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware.
+ * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU
+ * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware
+ * before resetting.
+ */
+#define ZAP_TIMEOUT 1000
+
+/** Number of milliseconds before we time out on a GPU soft/hard reset */
+#define RESET_TIMEOUT 500
+
+/**
+ * Prevent soft-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * Therefore, soft stop may still be disabled due to HW issues.
+ *
+ * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0
+/**
+ * Prevent hard-stops from occuring in scheduling situations
+ *
+ * This is not due to HW issues, but when scheduling is desired to be more predictable.
+ *
+ * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context.
+ *
+ * @note if not in use, define this value to 0 instead of \#undef'ing it
+ */
+#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0
+
+/**
+ * The maximum number of Job Slots to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of job slots.
+ */
+#define BASE_JM_MAX_NR_SLOTS 3
+
+/**
+ * The maximum number of Address Spaces to support in the Hardware.
+ *
+ * You can optimize this down if your target devices will only ever support a
+ * small number of Address Spaces
+ */
+#define BASE_MAX_NR_AS 16
+
+/* mmu */
+#define MIDGARD_MMU_VA_BITS 48
+
+#if MIDGARD_MMU_VA_BITS > 39
+#define MIDGARD_MMU_TOPLEVEL 0
+#else
+#define MIDGARD_MMU_TOPLEVEL 1
+#endif
+
+#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
+
+/** setting in kbase_context::as_nr that indicates it's invalid */
+#define KBASEP_AS_NR_INVALID (-1)
+
+#define KBASE_LOCK_REGION_MAX_SIZE (63)
+#define KBASE_LOCK_REGION_MIN_SIZE (11)
+
+#define KBASE_TRACE_SIZE_LOG2 8 /* 256 entries */
+#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2)
+#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1)
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_hwaccess_defs.h"
+
+#define KBASEP_FORCE_REPLAY_DISABLED 0
+
+/* Maximum force replay limit when randomization is enabled */
+#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16
+
+/** Atom has been previously soft-stoppped */
+#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1)
+/** Atom has been previously retried to execute */
+#define KBASE_KATOM_FLAGS_RERUN (1<<2)
+#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3)
+/** Atom has been previously hard-stopped. */
+#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4)
+/** Atom has caused us to enter disjoint state */
+#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5)
+/* Atom has fail dependency on same-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_PREV (1<<6)
+/* Atom blocked on cross-slot dependency */
+#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7)
+/* Atom has fail dependency on cross-slot dependency */
+#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8)
+/* Atom has been submitted to JSCTX ringbuffers */
+#define KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED (1<<9)
+/* Atom is currently holding a context reference */
+#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10)
+/* Atom requires GPU to be in secure mode */
+#define KBASE_KATOM_FLAG_SECURE (1<<11)
+
+/* SW related flags about types of JS_COMMAND action
+ * NOTE: These must be masked off by JS_COMMAND_MASK */
+
+/** This command causes a disjoint event */
+#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100
+
+/** Bitmask of all SW related flags */
+#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT)
+
+#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK)
+#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks
+#endif
+
+/** Soft-stop command that causes a Disjoint event. This of course isn't
+ * entirely masked off by JS_COMMAND_MASK */
+#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \
+ (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP)
+
+#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT
+
+struct kbase_jd_atom_dependency {
+ struct kbase_jd_atom *atom;
+ u8 dep_type;
+};
+
+/**
+ * @brief The function retrieves a read-only reference to the atom field from
+ * the kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return readonly reference to dependent ATOM.
+ */
+static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep)
+{
+ LOCAL_ASSERT(dep != NULL);
+
+ return (const struct kbase_jd_atom * const)(dep->atom);
+}
+
+/**
+ * @brief The function retrieves a read-only reference to the dependency type field from
+ * the kbase_jd_atom_dependency structure
+ *
+ * @param[in] dep kbase jd atom dependency.
+ *
+ * @return A dependency type value.
+ */
+static inline const u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep)
+{
+ LOCAL_ASSERT(dep != NULL);
+
+ return dep->dep_type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep The kbase jd atom dependency.
+ * @param[in] a The ATOM to be set as a dependency.
+ * @param type The ATOM dependency type to be set.
+ *
+ */
+static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep,
+ struct kbase_jd_atom *a, u8 type)
+{
+ struct kbase_jd_atom_dependency *dep;
+
+ LOCAL_ASSERT(const_dep != NULL);
+
+ dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+ dep->atom = a;
+ dep->dep_type = type;
+}
+
+/**
+ * @brief Setter macro for dep_atom array entry in kbase_jd_atom
+ *
+ * @param[in] dep The kbase jd atom dependency to be cleared.
+ *
+ */
+static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep)
+{
+ struct kbase_jd_atom_dependency *dep;
+
+ LOCAL_ASSERT(const_dep != NULL);
+
+ dep = (struct kbase_jd_atom_dependency *)const_dep;
+
+ dep->atom = NULL;
+ dep->dep_type = BASE_JD_DEP_TYPE_INVALID;
+}
+
+enum kbase_atom_gpu_rb_state {
+ /* Atom is not currently present in slot ringbuffer */
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB,
+ /* Atom is in slot ringbuffer but is blocked on a previous atom */
+ KBASE_ATOM_GPU_RB_WAITING_BLOCKED,
+ /* Atom is in slot ringbuffer but is waiting for cores to become
+ * available */
+ KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE,
+ /* Atom is in slot ringbuffer but is blocked on affinity */
+ KBASE_ATOM_GPU_RB_WAITING_AFFINITY,
+ /* Atom is in slot ringbuffer but is waiting for secure mode switch */
+ KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE,
+ /* Atom is in slot ringbuffer and ready to run */
+ KBASE_ATOM_GPU_RB_READY,
+ /* Atom is in slot ringbuffer and has been submitted to the GPU */
+ KBASE_ATOM_GPU_RB_SUBMITTED,
+ /* Atom must be returned to JS as soon as it reaches the head of the
+ * ringbuffer due to a previous failure */
+ KBASE_ATOM_GPU_RB_RETURN_TO_JS
+};
+
+struct kbase_ext_res {
+ u64 gpu_address;
+ struct kbase_mem_phy_alloc *alloc;
+};
+
+struct kbase_jd_atom {
+ struct work_struct work;
+ ktime_t start_timestamp;
+ u64 time_spent_us; /**< Total time spent on the GPU in microseconds */
+
+ struct base_jd_udata udata;
+ struct kbase_context *kctx;
+
+ struct list_head dep_head[2];
+ struct list_head dep_item[2];
+ const struct kbase_jd_atom_dependency dep[2];
+
+ u16 nr_extres;
+ struct kbase_ext_res *extres;
+
+ u32 device_nr;
+ u64 affinity;
+ u64 jc;
+ enum kbase_atom_coreref_state coreref_state;
+#ifdef CONFIG_KDS
+ struct list_head node;
+ struct kds_resource_set *kds_rset;
+ bool kds_dep_satisfied;
+#endif /* CONFIG_KDS */
+#ifdef CONFIG_SYNC
+ struct sync_fence *fence;
+ struct sync_fence_waiter sync_waiter;
+#endif /* CONFIG_SYNC */
+
+ /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */
+ enum base_jd_event_code event_code;
+ base_jd_core_req core_req; /**< core requirements */
+ /** Job Slot to retry submitting to if submission from IRQ handler failed
+ *
+ * NOTE: see if this can be unified into the another member e.g. the event */
+ int retry_submit_on_slot;
+
+ union kbasep_js_policy_job_info sched_info;
+ /* JS atom priority with respect to other atoms on its kctx. */
+ int sched_priority;
+
+ int poking; /* BASE_HW_ISSUE_8316 */
+
+ wait_queue_head_t completed;
+ enum kbase_jd_atom_state status;
+#ifdef CONFIG_GPU_TRACEPOINTS
+ int work_id;
+#endif
+ /* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */
+ int slot_nr;
+
+ u32 atom_flags;
+
+ /* Number of times this atom has been retried. Used by replay soft job.
+ */
+ int retry_count;
+
+ enum kbase_atom_gpu_rb_state gpu_rb_state;
+
+ u64 need_cache_flush_cores_retained;
+
+ atomic_t blocked;
+
+ /* Pointer to atom that this atom has cross-slot dependency on */
+ struct kbase_jd_atom *x_pre_dep;
+ /* Pointer to atom that has cross-slot dependency on this atom */
+ struct kbase_jd_atom *x_post_dep;
+
+
+ struct kbase_jd_atom_backend backend;
+};
+
+static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom)
+{
+ return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_SECURE);
+}
+
+/*
+ * Theory of operations:
+ *
+ * Atom objects are statically allocated within the context structure.
+ *
+ * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set.
+ */
+
+#define KBASE_JD_DEP_QUEUE_SIZE 256
+
+struct kbase_jd_context {
+ struct mutex lock;
+ struct kbasep_js_kctx_info sched_info;
+ struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT];
+
+ /** Tracks all job-dispatch jobs. This includes those not tracked by
+ * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+ u32 job_nr;
+
+ /** Waitq that reflects whether there are no jobs (including SW-only
+ * dependency jobs). This is set when no jobs are present on the ctx,
+ * and clear when there are jobs.
+ *
+ * @note: Job Dispatcher knows about more jobs than the Job Scheduler:
+ * the Job Scheduler is unaware of jobs that are blocked on dependencies,
+ * and SW-only dependency jobs.
+ *
+ * This waitq can be waited upon to find out when the context jobs are all
+ * done/cancelled (including those that might've been blocked on
+ * dependencies) - and so, whether it can be terminated. However, it should
+ * only be terminated once it is neither present in the policy-queue (see
+ * kbasep_js_policy_try_evict_ctx() ) nor the run-pool (see
+ * kbasep_js_kctx_info::ctx::is_scheduled).
+ *
+ * Since the waitq is only set under kbase_jd_context::lock,
+ * the waiter should also briefly obtain and drop kbase_jd_context::lock to
+ * guarentee that the setter has completed its work on the kbase_context
+ *
+ * This must be updated atomically with:
+ * - kbase_jd_context::job_nr */
+ wait_queue_head_t zero_jobs_wait;
+
+ /** Job Done workqueue. */
+ struct workqueue_struct *job_done_wq;
+
+ spinlock_t tb_lock;
+ u32 *tb;
+ size_t tb_wrap_offset;
+
+#ifdef CONFIG_KDS
+ struct kds_callback kds_cb;
+#endif /* CONFIG_KDS */
+#ifdef CONFIG_GPU_TRACEPOINTS
+ atomic_t work_id;
+#endif
+};
+
+struct kbase_device_info {
+ u32 features;
+};
+
+/** Poking state for BASE_HW_ISSUE_8316 */
+enum {
+ KBASE_AS_POKE_STATE_IN_FLIGHT = 1<<0,
+ KBASE_AS_POKE_STATE_KILLING_POKE = 1<<1
+};
+
+/** Poking state for BASE_HW_ISSUE_8316 */
+typedef u32 kbase_as_poke_state;
+
+struct kbase_mmu_setup {
+ u64 transtab;
+ u64 memattr;
+};
+
+/**
+ * Important: Our code makes assumptions that a struct kbase_as structure is always at
+ * kbase_device->as[number]. This is used to recover the containing
+ * struct kbase_device from a struct kbase_as structure.
+ *
+ * Therefore, struct kbase_as structures must not be allocated anywhere else.
+ */
+struct kbase_as {
+ int number;
+
+ struct workqueue_struct *pf_wq;
+ struct work_struct work_pagefault;
+ struct work_struct work_busfault;
+ enum kbase_mmu_fault_type fault_type;
+ u32 fault_status;
+ u64 fault_addr;
+ struct mutex transaction_mutex;
+
+ struct kbase_mmu_setup current_setup;
+
+ /* BASE_HW_ISSUE_8316 */
+ struct workqueue_struct *poke_wq;
+ struct work_struct poke_work;
+ /** Protected by kbasep_js_device_data::runpool_irq::lock */
+ int poke_refcount;
+ /** Protected by kbasep_js_device_data::runpool_irq::lock */
+ kbase_as_poke_state poke_state;
+ struct hrtimer poke_timer;
+};
+
+static inline int kbase_as_has_bus_fault(struct kbase_as *as)
+{
+ return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS;
+}
+
+static inline int kbase_as_has_page_fault(struct kbase_as *as)
+{
+ return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE;
+}
+
+struct kbasep_mem_device {
+ atomic_t used_pages; /* Tracks usage of OS shared memory. Updated
+ when OS memory is allocated/freed. */
+
+};
+
+#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X
+
+enum kbase_trace_code {
+ /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+ * THIS MUST BE USED AT THE START OF THE ENUM */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X)
+#include "mali_kbase_trace_defs.h"
+#undef KBASE_TRACE_CODE_MAKE_CODE
+ /* Comma on its own, to extend the list */
+ ,
+ /* Must be the last in the enum */
+ KBASE_TRACE_CODE_COUNT
+};
+
+#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0)
+#define KBASE_TRACE_FLAG_JOBSLOT (((u8)1) << 1)
+
+struct kbase_trace {
+ struct timespec timestamp;
+ u32 thread_id;
+ u32 cpu;
+ void *ctx;
+ bool katom;
+ int atom_number;
+ u64 atom_udata[2];
+ u64 gpu_addr;
+ unsigned long info_val;
+ u8 code;
+ u8 jobslot;
+ u8 refcount;
+ u8 flags;
+};
+
+/** Event IDs for the power management framework.
+ *
+ * Any of these events might be missed, so they should not be relied upon to
+ * find the precise state of the GPU at a particular time in the
+ * trace. Overall, we should get a high percentage of these events for
+ * statisical purposes, and so a few missing should not be a problem */
+enum kbase_timeline_pm_event {
+ /* helper for tests */
+ KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+ /** Event reserved for backwards compatibility with 'init' events */
+ KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST,
+
+ /** The power state of the device has changed.
+ *
+ * Specifically, the device has reached a desired or available state.
+ */
+ KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED,
+
+ /** The GPU is becoming active.
+ *
+ * This event is sent when the first context is about to use the GPU.
+ */
+ KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE,
+
+ /** The GPU is becoming idle.
+ *
+ * This event is sent when the last context has finished using the GPU.
+ */
+ KBASE_TIMELINE_PM_EVENT_GPU_IDLE,
+
+ /** Event reserved for backwards compatibility with 'policy_change'
+ * events */
+ KBASE_TIMELINE_PM_EVENT_RESERVED_4,
+
+ /** Event reserved for backwards compatibility with 'system_suspend'
+ * events */
+ KBASE_TIMELINE_PM_EVENT_RESERVED_5,
+
+ /** Event reserved for backwards compatibility with 'system_resume'
+ * events */
+ KBASE_TIMELINE_PM_EVENT_RESERVED_6,
+
+ /** The job scheduler is requesting to power up/down cores.
+ *
+ * This event is sent when:
+ * - powered down cores are needed to complete a job
+ * - powered up cores are not needed anymore
+ */
+ KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+
+ KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE,
+};
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+struct kbase_trace_kctx_timeline {
+ atomic_t jd_atoms_in_flight;
+ u32 owner_tgid;
+};
+
+struct kbase_trace_kbdev_timeline {
+ /* Note: strictly speaking, not needed, because it's in sync with
+ * kbase_device::jm_slots[]::submitted_nr
+ *
+ * But it's kept as an example of how to add global timeline tracking
+ * information
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock when
+ * accessing this */
+ u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS];
+
+ /* Last UID for each PM event */
+ atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1];
+ /* Counter for generating PM event UIDs */
+ atomic_t pm_event_uid_counter;
+ /*
+ * L2 transition state - true indicates that the transition is ongoing
+ * Expected to be protected by pm.power_change_lock */
+ bool l2_transitioning;
+};
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+
+struct kbasep_kctx_list_element {
+ struct list_head link;
+ struct kbase_context *kctx;
+};
+
+/**
+ * Data stored per device for power management.
+ *
+ * This structure contains data for the power management framework. There is one
+ * instance of this structure per device in the system.
+ */
+struct kbase_pm_device_data {
+ /**
+ * The lock protecting Power Management structures accessed outside of
+ * IRQ.
+ *
+ * This lock must also be held whenever the GPU is being powered on or
+ * off.
+ */
+ struct mutex lock;
+
+ /** The reference count of active contexts on this device. */
+ int active_count;
+ /** Flag indicating suspending/suspended */
+ bool suspending;
+ /* Wait queue set when active_count == 0 */
+ wait_queue_head_t zero_active_count_wait;
+
+ /**
+ * A bit mask identifying the available shader cores that are specified
+ * via sysfs
+ */
+ u64 debug_core_mask;
+
+ /**
+ * Lock protecting the power state of the device.
+ *
+ * This lock must be held when accessing the shader_available_bitmap,
+ * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and
+ * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition
+ * and shader_poweroff_pending fields of kbase_pm_device_data. It is
+ * also held when the hardware power registers are being written to, to
+ * ensure that two threads do not conflict over the power transitions
+ * that the hardware should make.
+ */
+ spinlock_t power_change_lock;
+
+ /**
+ * Callback for initializing the runtime power management.
+ *
+ * @param kbdev The kbase device
+ *
+ * @return 0 on success, else error code
+ */
+ int (*callback_power_runtime_init)(struct kbase_device *kbdev);
+
+ /**
+ * Callback for terminating the runtime power management.
+ *
+ * @param kbdev The kbase device
+ */
+ void (*callback_power_runtime_term)(struct kbase_device *kbdev);
+
+ /* Time in milliseconds between each dvfs sample */
+ u32 dvfs_period;
+
+ /* Period of GPU poweroff timer */
+ ktime_t gpu_poweroff_time;
+
+ /* Number of ticks of GPU poweroff timer before shader is powered off */
+ int poweroff_shader_ticks;
+
+ /* Number of ticks of GPU poweroff timer before GPU is powered off */
+ int poweroff_gpu_ticks;
+
+ struct kbase_pm_backend_data backend;
+};
+
+/**
+ * struct kbase_secure_ops - Platform specific functions for GPU secure mode
+ * operations
+ * @secure_mode_enable: Callback to enable secure mode on the GPU
+ * @secure_mode_disable: Callback to disable secure mode on the GPU
+ */
+struct kbase_secure_ops {
+ /**
+ * secure_mode_enable() - Enable secure mode on the GPU
+ * @kbdev: The kbase device
+ *
+ * Return: 0 on success, non-zero on error
+ */
+ int (*secure_mode_enable)(struct kbase_device *kbdev);
+
+ /**
+ * secure_mode_disable() - Disable secure mode on the GPU
+ * @kbdev: The kbase device
+ *
+ * Return: 0 on success, non-zero on error
+ */
+ int (*secure_mode_disable)(struct kbase_device *kbdev);
+};
+
+
+#define DEVNAME_SIZE 16
+
+struct kbase_device {
+ s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS];
+
+ u32 hw_quirks_sc;
+ u32 hw_quirks_tiler;
+ u32 hw_quirks_mmu;
+
+ struct list_head entry;
+ struct device *dev;
+ struct miscdevice mdev;
+ u64 reg_start;
+ size_t reg_size;
+ void __iomem *reg;
+ struct {
+ int irq;
+ int flags;
+ } irqs[3];
+#ifdef CONFIG_HAVE_CLK
+ struct clk *clock;
+#endif
+#ifdef CONFIG_REGULATOR
+ struct regulator *regulator;
+#endif
+ char devname[DEVNAME_SIZE];
+
+#ifdef CONFIG_MALI_NO_MALI
+ void *model;
+ struct kmem_cache *irq_slab;
+ struct workqueue_struct *irq_workq;
+ atomic_t serving_job_irq;
+ atomic_t serving_gpu_irq;
+ atomic_t serving_mmu_irq;
+ spinlock_t reg_op_lock;
+#endif /* CONFIG_MALI_NO_MALI */
+
+ struct kbase_pm_device_data pm;
+ struct kbasep_js_device_data js_data;
+ struct kbasep_mem_device memdev;
+ struct kbase_mmu_mode const *mmu_mode;
+
+ struct kbase_as as[BASE_MAX_NR_AS];
+
+ spinlock_t mmu_mask_change;
+
+ struct kbase_gpu_props gpu_props;
+
+ /** List of SW workarounds for HW issues */
+ unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+ /** List of features available */
+ unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
+
+ /* Cached present bitmaps - these are the same as the corresponding hardware registers */
+ u64 shader_present_bitmap;
+ u64 tiler_present_bitmap;
+ u64 l2_present_bitmap;
+
+ /* Bitmaps of cores that are currently in use (running jobs).
+ * These should be kept up to date by the job scheduler.
+ *
+ * pm.power_change_lock should be held when accessing these members.
+ *
+ * kbase_pm_check_transitions_nolock() should be called when bits are
+ * cleared to update the power management system and allow transitions to
+ * occur. */
+ u64 shader_inuse_bitmap;
+
+ /* Refcount for cores in use */
+ u32 shader_inuse_cnt[64];
+
+ /* Bitmaps of cores the JS needs for jobs ready to run */
+ u64 shader_needed_bitmap;
+
+ /* Refcount for cores needed */
+ u32 shader_needed_cnt[64];
+
+ u32 tiler_inuse_cnt;
+
+ u32 tiler_needed_cnt;
+
+ /* struct for keeping track of the disjoint information
+ *
+ * The state is > 0 if the GPU is in a disjoint state. Otherwise 0
+ * The count is the number of disjoint events that have occurred on the GPU
+ */
+ struct {
+ atomic_t count;
+ atomic_t state;
+ } disjoint_event;
+
+ /* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */
+ u32 l2_users_count;
+
+ /* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be
+ * submitted to these cores. These are updated by the power management code. The job scheduler should avoid
+ * submitting new jobs to any cores that are not marked as available.
+ *
+ * pm.power_change_lock should be held when accessing these members.
+ */
+ u64 shader_available_bitmap;
+ u64 tiler_available_bitmap;
+ u64 l2_available_bitmap;
+
+ u64 shader_ready_bitmap;
+ u64 shader_transitioning_bitmap;
+
+ s8 nr_hw_address_spaces; /**< Number of address spaces in the GPU (constant after driver initialisation) */
+ s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */
+
+ /* Structure used for instrumentation and HW counters dumping */
+ struct {
+ /* The lock should be used when accessing any of the following members */
+ spinlock_t lock;
+
+ struct kbase_context *kctx;
+ u64 addr;
+
+ struct kbase_context *suspended_kctx;
+ struct kbase_uk_hwcnt_setup suspended_state;
+
+ struct kbase_instr_backend backend;
+ } hwcnt;
+
+ struct kbase_vinstr_context *vinstr_ctx;
+
+ /*value to be written to the irq_throttle register each time an irq is served */
+ atomic_t irq_throttle_cycles;
+
+#if KBASE_TRACE_ENABLE
+ spinlock_t trace_lock;
+ u16 trace_first_out;
+ u16 trace_next_in;
+ struct kbase_trace *trace_rbuf;
+#endif
+
+ /* This is used to override the current job scheduler values for
+ * JS_SCHEDULING_PERIOD_NS
+ * JS_SOFT_STOP_TICKS
+ * JS_SOFT_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_SS
+ * JS_HARD_STOP_TICKS_CL
+ * JS_HARD_STOP_TICKS_DUMPING
+ * JS_RESET_TICKS_SS
+ * JS_RESET_TICKS_CL
+ * JS_RESET_TICKS_DUMPING.
+ *
+ * These values are set via the js_timeouts sysfs file.
+ */
+ u32 js_scheduling_period_ns;
+ int js_soft_stop_ticks;
+ int js_soft_stop_ticks_cl;
+ int js_hard_stop_ticks_ss;
+ int js_hard_stop_ticks_cl;
+ int js_hard_stop_ticks_dumping;
+ int js_reset_ticks_ss;
+ int js_reset_ticks_cl;
+ int js_reset_ticks_dumping;
+ bool js_timeouts_updated;
+
+ u32 reset_timeout_ms;
+
+ struct mutex cacheclean_lock;
+
+ /* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */
+ void *platform_context;
+
+ /* List of kbase_contexts created */
+ struct list_head kctx_list;
+ struct mutex kctx_list_lock;
+
+#ifdef CONFIG_MALI_MIDGARD_RT_PM
+ struct delayed_work runtime_pm_workqueue;
+#endif
+
+#ifdef CONFIG_PM_DEVFREQ
+ struct devfreq_dev_profile devfreq_profile;
+ struct devfreq *devfreq;
+ unsigned long current_freq;
+ unsigned long current_voltage;
+#ifdef CONFIG_DEVFREQ_THERMAL
+ struct devfreq_cooling_device *devfreq_cooling;
+#endif
+#endif
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+ struct kbase_trace_kbdev_timeline timeline;
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+ /* directory for debugfs entries */
+ struct dentry *mali_debugfs_directory;
+ /* Root directory for per context entry */
+ struct dentry *debugfs_ctx_directory;
+#endif /* CONFIG_DEBUG_FS */
+
+ /* fbdump profiling controls set by gator */
+ u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX];
+
+
+#if MALI_CUSTOMER_RELEASE == 0
+ /* Number of jobs that are run before a job is forced to fail and
+ * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced
+ * failures. */
+ int force_replay_limit;
+ /* Count of jobs between forced failures. Incremented on each job. A
+ * job is forced to fail once this is greater than or equal to
+ * force_replay_limit. */
+ int force_replay_count;
+ /* Core requirement for jobs to be failed and replayed. May be zero. */
+ base_jd_core_req force_replay_core_req;
+ /* true if force_replay_limit should be randomized. The random
+ * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT.
+ */
+ bool force_replay_random;
+#endif
+
+
+ /* Total number of created contexts */
+ atomic_t ctx_num;
+
+ struct kbase_hwaccess_data hwaccess;
+
+ /* Count of page/bus faults waiting for workqueues to process */
+ atomic_t faults_pending;
+
+ /* true if GPU is powered off or power off operation is in progress */
+ bool poweroff_pending;
+
+
+ /* defaults for new context created for this device */
+ u32 infinite_cache_active_default;
+
+ /* system coherency mode */
+ u32 system_coherency;
+
+ /* Secure operations */
+ struct kbase_secure_ops *secure_ops;
+};
+
+/* JSCTX ringbuffer size must always be a power of 2 */
+#define JSCTX_RB_SIZE 256
+#define JSCTX_RB_MASK (JSCTX_RB_SIZE-1)
+
+/**
+ * struct jsctx_rb_entry - Entry in &struct jsctx_rb ring buffer
+ * @atom_id: Atom ID
+ */
+struct jsctx_rb_entry {
+ u16 atom_id;
+};
+
+/**
+ * struct jsctx_rb - JS context atom ring buffer
+ * @entries: Array of size %JSCTX_RB_SIZE which holds the &struct
+ * kbase_jd_atom pointers which make up the contents of the ring
+ * buffer.
+ * @read_idx: Index into @entries. Indicates the next entry in @entries to
+ * read, and is incremented when pulling an atom, and decremented
+ * when unpulling.
+ * HW access lock must be held when accessing.
+ * @write_idx: Index into @entries. Indicates the next entry to use when
+ * adding atoms into the ring buffer, and is incremented when
+ * adding a new atom.
+ * jctx->lock must be held when accessing.
+ * @running_idx: Index into @entries. Indicates the last valid entry, and is
+ * incremented when remving atoms from the ring buffer.
+ * HW access lock must be held when accessing.
+ *
+ * &struct jsctx_rb is a ring buffer of &struct kbase_jd_atom.
+ */
+struct jsctx_rb {
+ struct jsctx_rb_entry entries[JSCTX_RB_SIZE];
+
+ u16 read_idx; /* HW access lock must be held when accessing */
+ u16 write_idx; /* jctx->lock must be held when accessing */
+ u16 running_idx; /* HW access lock must be held when accessing */
+};
+
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \
+ (((minor) & 0xFFF) << 8) | \
+ ((0 & 0xFF) << 0))
+
+struct kbase_context {
+ struct kbase_device *kbdev;
+ int id; /* System wide unique id */
+ unsigned long api_version;
+ phys_addr_t pgd;
+ struct list_head event_list;
+ struct mutex event_mutex;
+ bool event_closed;
+ struct workqueue_struct *event_workq;
+
+ bool is_compat;
+
+ atomic_t setup_complete;
+ atomic_t setup_in_progress;
+
+ u64 *mmu_teardown_pages;
+
+ phys_addr_t aliasing_sink_page;
+
+ struct mutex reg_lock; /* To be converted to a rwlock? */
+ struct rb_root reg_rbtree; /* Red-Black tree of GPU regions (live regions) */
+
+ unsigned long cookies;
+ struct kbase_va_region *pending_regions[BITS_PER_LONG];
+
+ wait_queue_head_t event_queue;
+ pid_t tgid;
+ pid_t pid;
+
+ struct kbase_jd_context jctx;
+ atomic_t used_pages;
+ atomic_t nonmapped_pages;
+
+ struct kbase_mem_allocator osalloc;
+ struct kbase_mem_allocator *pgd_allocator;
+
+ struct list_head waiting_soft_jobs;
+#ifdef CONFIG_KDS
+ struct list_head waiting_kds_resource;
+#endif
+ /** This is effectively part of the Run Pool, because it only has a valid
+ * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in
+ *
+ * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing
+ * this.
+ *
+ * If the context relating to this as_nr is required, you must use
+ * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear
+ * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock
+ * to ensure the context doesn't disappear (but this has restrictions on what other locks
+ * you can take whilst doing this) */
+ int as_nr;
+
+ /* NOTE:
+ *
+ * Flags are in jctx.sched_info.ctx.flags
+ * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex
+ *
+ * All other flags must be added there */
+ spinlock_t mm_update_lock;
+ struct mm_struct *process_mm;
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+ struct kbase_trace_kctx_timeline timeline;
+#endif
+#ifdef CONFIG_DEBUG_FS
+ /* Content of mem_profile file */
+ char *mem_profile_data;
+ /* Size of @c mem_profile_data */
+ size_t mem_profile_size;
+ /* Spinlock guarding data */
+ spinlock_t mem_profile_lock;
+ struct dentry *kctx_dentry;
+#endif /* CONFIG_DEBUG_FS */
+
+ struct jsctx_rb jsctx_rb
+ [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
+
+ /* Number of atoms currently pulled from this context */
+ atomic_t atoms_pulled;
+ /* Number of atoms currently pulled from this context, per slot */
+ atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS];
+ /* true if last kick() caused atoms to be pulled from this context */
+ bool pulled;
+ /* true if infinite cache is to be enabled for new allocations. Existing
+ * allocations will not change. bool stored as a u32 per Linux API */
+ u32 infinite_cache_active;
+ /* Bitmask of slots that can be pulled from */
+ u32 slots_pullable;
+
+ /* true if address space assignment is pending */
+ bool as_pending;
+
+ /* Backend specific data */
+ struct kbase_context_backend backend;
+
+ /* Work structure used for deferred ASID assignment */
+ struct work_struct work;
+
+ /* Only one userspace vinstr client per kbase context */
+ struct kbase_vinstr_client *vinstr_cli;
+ struct mutex vinstr_cli_lock;
+};
+
+enum kbase_reg_access_type {
+ REG_READ,
+ REG_WRITE
+};
+
+enum kbase_share_attr_bits {
+ /* (1ULL << 8) bit is reserved */
+ SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */
+ SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */
+};
+
+/* Conversion helpers for setting up high resolution timers */
+#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U))
+#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
+
+/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
+#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000
+/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
+#define KBASE_AS_INACTIVE_MAX_LOOPS 100000
+
+/* Maximum number of times a job can be replayed */
+#define BASEP_JD_REPLAY_LIMIT 15
+
+#endif /* _KBASE_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c
new file mode 100644
index 00000000000..01e41462f17
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_device.c
@@ -0,0 +1,659 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel device APIs
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_config_defaults.h>
+
+#include <mali_kbase_profiling_gator_api.h>
+
+/* NOTE: Magic - 0x45435254 (TRCE in ASCII).
+ * Supports tracing feature provided in the base module.
+ * Please keep it in sync with the value of base module.
+ */
+#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254
+
+#if KBASE_TRACE_ENABLE
+static const char *kbasep_trace_code_string[] = {
+ /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE
+ * THIS MUST BE USED AT THE START OF THE ARRAY */
+#define KBASE_TRACE_CODE_MAKE_CODE(X) # X
+#include "mali_kbase_trace_defs.h"
+#undef KBASE_TRACE_CODE_MAKE_CODE
+};
+#endif
+
+#define DEBUG_MESSAGE_SIZE 256
+
+static int kbasep_trace_init(struct kbase_device *kbdev);
+static void kbasep_trace_term(struct kbase_device *kbdev);
+static void kbasep_trace_hook_wrapper(void *param);
+
+struct kbase_device *kbase_device_alloc(void)
+{
+ return kzalloc(sizeof(struct kbase_device), GFP_KERNEL);
+}
+
+static int kbase_device_as_init(struct kbase_device *kbdev, int i)
+{
+ const char format[] = "mali_mmu%d";
+ char name[sizeof(format)];
+ const char poke_format[] = "mali_mmu%d_poker";
+ char poke_name[sizeof(poke_format)];
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+ snprintf(poke_name, sizeof(poke_name), poke_format, i);
+
+ snprintf(name, sizeof(name), format, i);
+
+ kbdev->as[i].number = i;
+ kbdev->as[i].fault_addr = 0ULL;
+
+ kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1);
+ if (!kbdev->as[i].pf_wq)
+ return -EINVAL;
+
+ mutex_init(&kbdev->as[i].transaction_mutex);
+ INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker);
+ INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker);
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) {
+ struct hrtimer *poke_timer = &kbdev->as[i].poke_timer;
+ struct work_struct *poke_work = &kbdev->as[i].poke_work;
+
+ kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1);
+ if (!kbdev->as[i].poke_wq) {
+ destroy_workqueue(kbdev->as[i].pf_wq);
+ return -EINVAL;
+ }
+ KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work));
+ INIT_WORK(poke_work, kbasep_as_do_poke);
+
+ hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+ poke_timer->function = kbasep_as_poke_timer_callback;
+
+ kbdev->as[i].poke_refcount = 0;
+ kbdev->as[i].poke_state = 0u;
+ }
+
+ return 0;
+}
+
+static void kbase_device_as_term(struct kbase_device *kbdev, int i)
+{
+ destroy_workqueue(kbdev->as[i].pf_wq);
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+ destroy_workqueue(kbdev->as[i].poke_wq);
+}
+
+static int kbase_device_all_as_init(struct kbase_device *kbdev)
+{
+ int i, err;
+
+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+ err = kbase_device_as_init(kbdev, i);
+ if (err)
+ goto free_workqs;
+ }
+
+ return 0;
+
+free_workqs:
+ for (; i > 0; i--)
+ kbase_device_as_term(kbdev, i);
+
+ return err;
+}
+
+static void kbase_device_all_as_term(struct kbase_device *kbdev)
+{
+ int i;
+
+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++)
+ kbase_device_as_term(kbdev, i);
+}
+
+int kbase_device_init(struct kbase_device * const kbdev)
+{
+ int i, err;
+
+ spin_lock_init(&kbdev->mmu_mask_change);
+ /* Get the list of workarounds for issues on the current HW
+ * (identified by the GPU_ID register)
+ */
+ err = kbase_hw_set_issues_mask(kbdev);
+ if (err)
+ goto fail;
+ /* Set the list of features available on the current HW
+ * (identified by the GPU_ID register)
+ */
+ kbase_hw_set_features_mask(kbdev);
+
+#if defined(CONFIG_ARM64)
+ set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops);
+#endif /* CONFIG_ARM64 */
+
+ /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our
+ * device structure was created by device-tree
+ */
+ if (!kbdev->dev->dma_mask)
+ kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask;
+
+ err = dma_set_mask(kbdev->dev,
+ DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+ if (err)
+ goto dma_set_mask_failed;
+
+ err = dma_set_coherent_mask(kbdev->dev,
+ DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits));
+ if (err)
+ goto dma_set_mask_failed;
+
+ err = kbase_mem_lowlevel_init(kbdev);
+ if (err)
+ goto mem_lowlevel_init_failed;
+
+ kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces;
+
+ err = kbase_device_all_as_init(kbdev);
+ if (err)
+ goto term_lowlevel_mem;
+
+ spin_lock_init(&kbdev->hwcnt.lock);
+
+ err = kbasep_trace_init(kbdev);
+ if (err)
+ goto term_as;
+
+ mutex_init(&kbdev->cacheclean_lock);
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+ kbdev->timeline.slot_atoms_submitted[i] = 0;
+
+ for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i)
+ atomic_set(&kbdev->timeline.pm_event_uid[i], 0);
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+ /* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */
+ for (i = 0; i < FBDUMP_CONTROL_MAX; i++)
+ kbdev->kbase_profiling_controls[i] = 0;
+
+ kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev);
+
+ atomic_set(&kbdev->ctx_num, 0);
+
+ err = kbase_instr_backend_init(kbdev);
+ if (err)
+ goto term_trace;
+
+ kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
+
+ kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS;
+
+ kbdev->mmu_mode = kbase_mmu_mode_get_lpae();
+
+ return 0;
+term_trace:
+ kbasep_trace_term(kbdev);
+term_as:
+ kbase_device_all_as_term(kbdev);
+term_lowlevel_mem:
+ kbase_mem_lowlevel_term(kbdev);
+mem_lowlevel_init_failed:
+dma_set_mask_failed:
+fail:
+ return err;
+}
+
+void kbase_device_term(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev);
+
+#if KBASE_TRACE_ENABLE
+ kbase_debug_assert_register_hook(NULL, NULL);
+#endif
+
+ kbase_instr_backend_term(kbdev);
+
+ kbasep_trace_term(kbdev);
+
+ kbase_device_all_as_term(kbdev);
+
+ kbase_mem_lowlevel_term(kbdev);
+}
+
+void kbase_device_free(struct kbase_device *kbdev)
+{
+ kfree(kbdev);
+}
+
+void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size)
+{
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(tb);
+
+ /* set up the header */
+ /* magic number in the first 4 bytes */
+ tb[0] = TRACE_BUFFER_HEADER_SPECIAL;
+ /* Store (write offset = 0, wrap counter = 0, transaction active = no)
+ * write offset 0 means never written.
+ * Offsets 1 to (wrap_offset - 1) used to store values when trace started
+ */
+ tb[1] = 0;
+
+ /* install trace buffer */
+ spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+ kctx->jctx.tb_wrap_offset = size / 8;
+ kctx->jctx.tb = tb;
+ spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx)
+{
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(kctx);
+ spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+ kctx->jctx.tb = NULL;
+ kctx->jctx.tb_wrap_offset = 0;
+ spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->jctx.tb_lock, flags);
+ if (kctx->jctx.tb) {
+ u16 wrap_count;
+ u16 write_offset;
+ u32 *tb = kctx->jctx.tb;
+ u32 header_word;
+
+ header_word = tb[1];
+ KBASE_DEBUG_ASSERT(0 == (header_word & 0x1));
+
+ wrap_count = (header_word >> 1) & 0x7FFF;
+ write_offset = (header_word >> 16) & 0xFFFF;
+
+ /* mark as transaction in progress */
+ tb[1] |= 0x1;
+ mb();
+
+ /* calculate new offset */
+ write_offset++;
+ if (write_offset == kctx->jctx.tb_wrap_offset) {
+ /* wrap */
+ write_offset = 1;
+ wrap_count++;
+ wrap_count &= 0x7FFF; /* 15bit wrap counter */
+ }
+
+ /* store the trace entry at the selected offset */
+ tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0);
+ tb[write_offset * 2 + 1] = reg_value;
+ mb();
+
+ /* new header word */
+ header_word = (write_offset << 16) | (wrap_count << 1) | 0x0; /* transaction complete */
+ tb[1] = header_word;
+ }
+ spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags);
+}
+
+/*
+ * Device trace functions
+ */
+#if KBASE_TRACE_ENABLE
+
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+ struct kbase_trace *rbuf;
+
+ rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL);
+
+ if (!rbuf)
+ return -EINVAL;
+
+ kbdev->trace_rbuf = rbuf;
+ spin_lock_init(&kbdev->trace_lock);
+ return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+ kfree(kbdev->trace_rbuf);
+}
+
+static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len)
+{
+ s32 written = 0;
+
+ /* Initial part of message */
+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0);
+
+ if (trace_msg->katom)
+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0);
+
+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0);
+
+ /* NOTE: Could add function callbacks to handle different message types */
+ /* Jobslot present */
+ if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT)
+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0);
+
+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+ /* Refcount present */
+ if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT)
+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0);
+
+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0);
+
+ /* Rest of message */
+ written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0);
+}
+
+static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg)
+{
+ char buffer[DEBUG_MESSAGE_SIZE];
+
+ kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+ dev_dbg(kbdev->dev, "%s", buffer);
+}
+
+void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val)
+{
+ unsigned long irqflags;
+ struct kbase_trace *trace_msg;
+
+ spin_lock_irqsave(&kbdev->trace_lock, irqflags);
+
+ trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in];
+
+ /* Fill the message */
+ trace_msg->thread_id = task_pid_nr(current);
+ trace_msg->cpu = task_cpu(current);
+
+ getnstimeofday(&trace_msg->timestamp);
+
+ trace_msg->code = code;
+ trace_msg->ctx = ctx;
+
+ if (NULL == katom) {
+ trace_msg->katom = false;
+ } else {
+ trace_msg->katom = true;
+ trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom);
+ trace_msg->atom_udata[0] = katom->udata.blob[0];
+ trace_msg->atom_udata[1] = katom->udata.blob[1];
+ }
+
+ trace_msg->gpu_addr = gpu_addr;
+ trace_msg->jobslot = jobslot;
+ trace_msg->refcount = MIN((unsigned int)refcount, 0xFF);
+ trace_msg->info_val = info_val;
+ trace_msg->flags = flags;
+
+ /* Update the ringbuffer indices */
+ kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK;
+ if (kbdev->trace_next_in == kbdev->trace_first_out)
+ kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK;
+
+ /* Done */
+
+ spin_unlock_irqrestore(&kbdev->trace_lock, irqflags);
+}
+
+void kbasep_trace_clear(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->trace_lock, flags);
+ kbdev->trace_first_out = kbdev->trace_next_in;
+ spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ u32 start;
+ u32 end;
+
+ dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val");
+ spin_lock_irqsave(&kbdev->trace_lock, flags);
+ start = kbdev->trace_first_out;
+ end = kbdev->trace_next_in;
+
+ while (start != end) {
+ struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start];
+
+ kbasep_trace_dump_msg(kbdev, trace_msg);
+
+ start = (start + 1) & KBASE_TRACE_MASK;
+ }
+ dev_dbg(kbdev->dev, "TRACE_END");
+
+ spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+ KBASE_TRACE_CLEAR(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+ struct kbase_device *kbdev = (struct kbase_device *)param;
+
+ kbasep_trace_dump(kbdev);
+}
+
+#ifdef CONFIG_DEBUG_FS
+struct trace_seq_state {
+ struct kbase_trace trace_buf[KBASE_TRACE_SIZE];
+ u32 start;
+ u32 end;
+};
+
+static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct trace_seq_state *state = s->private;
+ int i;
+
+ if (*pos > KBASE_TRACE_SIZE)
+ return NULL;
+ i = state->start + *pos;
+ if ((state->end >= state->start && i >= state->end) ||
+ i >= state->end + KBASE_TRACE_SIZE)
+ return NULL;
+
+ i &= KBASE_TRACE_MASK;
+
+ return &state->trace_buf[i];
+}
+
+static void kbasep_trace_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+ struct trace_seq_state *state = s->private;
+ int i;
+
+ (*pos)++;
+
+ i = (state->start + *pos) & KBASE_TRACE_MASK;
+ if (i == state->end)
+ return NULL;
+
+ return &state->trace_buf[i];
+}
+
+static int kbasep_trace_seq_show(struct seq_file *s, void *data)
+{
+ struct kbase_trace *trace_msg = data;
+ char buffer[DEBUG_MESSAGE_SIZE];
+
+ kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE);
+ seq_printf(s, "%s\n", buffer);
+ return 0;
+}
+
+static const struct seq_operations kbasep_trace_seq_ops = {
+ .start = kbasep_trace_seq_start,
+ .next = kbasep_trace_seq_next,
+ .stop = kbasep_trace_seq_stop,
+ .show = kbasep_trace_seq_show,
+};
+
+static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file)
+{
+ struct kbase_device *kbdev = inode->i_private;
+ unsigned long flags;
+
+ struct trace_seq_state *state;
+
+ state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state));
+ if (!state)
+ return -ENOMEM;
+
+ spin_lock_irqsave(&kbdev->trace_lock, flags);
+ state->start = kbdev->trace_first_out;
+ state->end = kbdev->trace_next_in;
+ memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf));
+ spin_unlock_irqrestore(&kbdev->trace_lock, flags);
+
+ return 0;
+}
+
+static const struct file_operations kbasep_trace_debugfs_fops = {
+ .open = kbasep_trace_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+ debugfs_create_file("mali_trace", S_IRUGO,
+ kbdev->mali_debugfs_directory, kbdev,
+ &kbasep_trace_debugfs_fops);
+}
+
+#else
+void kbasep_trace_debugfs_init(struct kbase_device *kbdev)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+#else /* KBASE_TRACE_ENABLE */
+static int kbasep_trace_init(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+ return 0;
+}
+
+static void kbasep_trace_term(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+static void kbasep_trace_hook_wrapper(void *param)
+{
+ CSTD_UNUSED(param);
+}
+
+void kbasep_trace_dump(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+#endif /* KBASE_TRACE_ENABLE */
+
+void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value)
+{
+ switch (control) {
+ case FBDUMP_CONTROL_ENABLE:
+ /* fall through */
+ case FBDUMP_CONTROL_RATE:
+ /* fall through */
+ case SW_COUNTER_ENABLE:
+ /* fall through */
+ case FBDUMP_CONTROL_RESIZE_FACTOR:
+ kbdev->kbase_profiling_controls[control] = value;
+ break;
+ default:
+ dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+ break;
+ }
+}
+
+u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control)
+{
+ u32 ret_value = 0;
+
+ switch (control) {
+ case FBDUMP_CONTROL_ENABLE:
+ /* fall through */
+ case FBDUMP_CONTROL_RATE:
+ /* fall through */
+ case SW_COUNTER_ENABLE:
+ /* fall through */
+ case FBDUMP_CONTROL_RESIZE_FACTOR:
+ ret_value = kbdev->kbase_profiling_controls[control];
+ break;
+ default:
+ dev_err(kbdev->dev, "Profiling control %d not found\n", control);
+ break;
+ }
+
+ return ret_value;
+}
+
+/*
+ * Called by gator to control the production of
+ * profiling information at runtime
+ * */
+
+void _mali_profiling_control(u32 action, u32 value)
+{
+ struct kbase_device *kbdev = NULL;
+
+ /* find the first i.e. call with -1 */
+ kbdev = kbase_find_device(-1);
+
+ if (NULL != kbdev)
+ kbase_set_profiling_control(kbdev, action, value);
+}
+KBASE_EXPORT_SYMBOL(_mali_profiling_control);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
new file mode 100644
index 00000000000..f70bcccf405
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/*
+ * Base kernel disjoint events helper functions
+ */
+
+#include <mali_kbase.h>
+
+void kbase_disjoint_init(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ atomic_set(&kbdev->disjoint_event.count, 0);
+ atomic_set(&kbdev->disjoint_event.state, 0);
+}
+
+/* increment the disjoint event count */
+void kbase_disjoint_event(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ atomic_inc(&kbdev->disjoint_event.count);
+}
+
+/* increment the state and the event counter */
+void kbase_disjoint_state_up(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ atomic_inc(&kbdev->disjoint_event.state);
+
+ kbase_disjoint_event(kbdev);
+}
+
+/* decrement the state */
+void kbase_disjoint_state_down(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0);
+
+ kbase_disjoint_event(kbdev);
+
+ atomic_dec(&kbdev->disjoint_event.state);
+}
+
+/* increments the count only if the state is > 0 */
+void kbase_disjoint_event_potential(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ if (atomic_read(&kbdev->disjoint_event.state))
+ kbase_disjoint_event(kbdev);
+}
+
+u32 kbase_disjoint_event_get(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ return atomic_read(&kbdev->disjoint_event.count);
+}
+KBASE_EXPORT_TEST_API(kbase_disjoint_event_get);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c
new file mode 100644
index 00000000000..784acecae17
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_event.c
@@ -0,0 +1,195 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_debug.h>
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+
+static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ struct base_jd_udata data;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(katom != NULL);
+ KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+ data = katom->udata;
+
+ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight));
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_tl_nret_atom_ctx(katom, kctx);
+ kbase_tlstream_tl_del_atom(katom);
+#endif
+
+ mutex_lock(&kctx->jctx.lock);
+ katom->status = KBASE_JD_ATOM_STATE_UNUSED;
+ mutex_unlock(&kctx->jctx.lock);
+
+ wake_up(&katom->completed);
+
+ return data;
+}
+
+int kbase_event_pending(struct kbase_context *ctx)
+{
+ int ret;
+
+ KBASE_DEBUG_ASSERT(ctx);
+
+ mutex_lock(&ctx->event_mutex);
+ ret = (!list_empty(&ctx->event_list)) || (true == ctx->event_closed);
+ mutex_unlock(&ctx->event_mutex);
+
+ return ret;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_pending);
+
+int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent)
+{
+ struct kbase_jd_atom *atom;
+
+ KBASE_DEBUG_ASSERT(ctx);
+
+ mutex_lock(&ctx->event_mutex);
+
+ if (list_empty(&ctx->event_list)) {
+ if (!ctx->event_closed) {
+ mutex_unlock(&ctx->event_mutex);
+ return -1;
+ }
+
+ /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */
+ mutex_unlock(&ctx->event_mutex);
+ uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED;
+ memset(&uevent->udata, 0, sizeof(uevent->udata));
+ dev_dbg(ctx->kbdev->dev,
+ "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n",
+ BASE_JD_EVENT_DRV_TERMINATED);
+ return 0;
+ }
+
+ /* normal event processing */
+ atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]);
+ list_del(ctx->event_list.next);
+
+ mutex_unlock(&ctx->event_mutex);
+
+ dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom);
+ uevent->event_code = atom->event_code;
+ uevent->atom_number = (atom - ctx->jctx.atoms);
+ uevent->udata = kbase_event_process(ctx, atom);
+
+ return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_dequeue);
+
+static void kbase_event_post_worker(struct work_struct *data)
+{
+ struct kbase_jd_atom *atom = container_of(data, struct kbase_jd_atom, work);
+ struct kbase_context *ctx = atom->kctx;
+
+ if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+ kbase_jd_free_external_resources(atom);
+
+ if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) {
+ if (atom->event_code == BASE_JD_EVENT_DONE) {
+ /* Don't report the event */
+ kbase_event_process(ctx, atom);
+ return;
+ }
+ }
+
+ if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) {
+ /* Don't report the event */
+ kbase_event_process(ctx, atom);
+ return;
+ }
+
+ mutex_lock(&ctx->event_mutex);
+ list_add_tail(&atom->dep_item[0], &ctx->event_list);
+ mutex_unlock(&ctx->event_mutex);
+
+ kbase_event_wakeup(ctx);
+}
+
+void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom)
+{
+ KBASE_DEBUG_ASSERT(ctx);
+ KBASE_DEBUG_ASSERT(ctx->event_workq);
+ KBASE_DEBUG_ASSERT(atom);
+
+ INIT_WORK(&atom->work, kbase_event_post_worker);
+ queue_work(ctx->event_workq, &atom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_post);
+
+void kbase_event_close(struct kbase_context *kctx)
+{
+ mutex_lock(&kctx->event_mutex);
+ kctx->event_closed = true;
+ mutex_unlock(&kctx->event_mutex);
+ kbase_event_wakeup(kctx);
+}
+
+int kbase_event_init(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(kctx);
+
+ INIT_LIST_HEAD(&kctx->event_list);
+ mutex_init(&kctx->event_mutex);
+ kctx->event_closed = false;
+ kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1);
+
+ if (NULL == kctx->event_workq)
+ return -EINVAL;
+
+ return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_init);
+
+void kbase_event_cleanup(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(kctx->event_workq);
+
+ flush_workqueue(kctx->event_workq);
+ destroy_workqueue(kctx->event_workq);
+
+ /* We use kbase_event_dequeue to remove the remaining events as that
+ * deals with all the cleanup needed for the atoms.
+ *
+ * Note: use of kctx->event_list without a lock is safe because this must be the last
+ * thread using it (because we're about to terminate the lock)
+ */
+ while (!list_empty(&kctx->event_list)) {
+ struct base_jd_event_v2 event;
+
+ kbase_event_dequeue(kctx, &event);
+ }
+}
+
+KBASE_EXPORT_TEST_API(kbase_event_cleanup);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator.h b/drivers/gpu/arm/midgard/mali_kbase_gator.h
new file mode 100644
index 00000000000..ce65b5562a2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/* NB taken from gator */
+/*
+ * List of possible actions to be controlled by DS-5 Streamline.
+ * The following numbers are used by gator to control the frame buffer dumping
+ * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because
+ * they are unknown inside gator.
+ */
+#ifndef _KBASE_GATOR_H_
+#define _KBASE_GATOR_H_
+
+#ifdef CONFIG_MALI_GATOR_SUPPORT
+#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16))
+#define GATOR_JOB_SLOT_START 1
+#define GATOR_JOB_SLOT_STOP 2
+#define GATOR_JOB_SLOT_SOFT_STOPPED 3
+
+void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id);
+void kbase_trace_mali_pm_status(u32 event, u64 value);
+void kbase_trace_mali_pm_power_off(u32 event, u64 value);
+void kbase_trace_mali_pm_power_on(u32 event, u64 value);
+void kbase_trace_mali_page_fault_insert_pages(int event, u32 value);
+void kbase_trace_mali_mmu_as_in_use(int event);
+void kbase_trace_mali_mmu_as_released(int event);
+void kbase_trace_mali_total_alloc_pages_change(long long int event);
+
+#endif /* CONFIG_MALI_GATOR_SUPPORT */
+
+#endif /* _KBASE_GATOR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
new file mode 100644
index 00000000000..6ef4e39c5cc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c
@@ -0,0 +1,322 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase.h"
+#include "mali_kbase_mem_linux.h"
+#include "mali_kbase_gator_api.h"
+#include "mali_kbase_gator_hwcnt_names.h"
+#include "mali_kbase_instr.h"
+
+#define MALI_MAX_CORES_PER_GROUP 4
+#define MALI_MAX_NUM_BLOCKS_PER_GROUP 8
+#define MALI_COUNTERS_PER_BLOCK 64
+#define MALI_BYTES_PER_COUNTER 4
+
+struct kbase_gator_hwcnt_handles {
+ struct kbase_device *kbdev;
+ struct kbase_context *kctx;
+ u64 hwcnt_gpu_va;
+ void *hwcnt_cpu_va;
+ struct kbase_vmap_struct hwcnt_map;
+};
+
+const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters)
+{
+ uint32_t gpu_id;
+ const char * const *hardware_counters;
+ struct kbase_device *kbdev;
+
+ if (!total_counters)
+ return NULL;
+
+ /* Get the first device - it doesn't matter in this case */
+ kbdev = kbase_find_device(-1);
+ if (!kbdev)
+ return NULL;
+
+ gpu_id = kbdev->gpu_props.props.core_props.product_id;
+
+ switch (gpu_id) {
+ /* If we are using a Mali-T60x device */
+ case GPU_ID_PI_T60X:
+ hardware_counters = hardware_counters_mali_t60x;
+ *total_counters = ARRAY_SIZE(hardware_counters_mali_t60x);
+ break;
+ /* If we are using a Mali-T62x device */
+ case GPU_ID_PI_T62X:
+ hardware_counters = hardware_counters_mali_t62x;
+ *total_counters = ARRAY_SIZE(hardware_counters_mali_t62x);
+ break;
+ /* If we are using a Mali-T72x device */
+ case GPU_ID_PI_T72X:
+ hardware_counters = hardware_counters_mali_t72x;
+ *total_counters = ARRAY_SIZE(hardware_counters_mali_t72x);
+ break;
+ /* If we are using a Mali-T76x device */
+ case GPU_ID_PI_T76X:
+ hardware_counters = hardware_counters_mali_t76x;
+ *total_counters = ARRAY_SIZE(hardware_counters_mali_t76x);
+ break;
+ /* If we are using a Mali-T82x device */
+ case GPU_ID_PI_T82X:
+ hardware_counters = hardware_counters_mali_t82x;
+ *total_counters = ARRAY_SIZE(hardware_counters_mali_t82x);
+ break;
+ /* If we are using a Mali-T83x device */
+ case GPU_ID_PI_T83X:
+ hardware_counters = hardware_counters_mali_t83x;
+ *total_counters = ARRAY_SIZE(hardware_counters_mali_t83x);
+ break;
+ /* If we are using a Mali-T86x device */
+ case GPU_ID_PI_T86X:
+ hardware_counters = hardware_counters_mali_t86x;
+ *total_counters = ARRAY_SIZE(hardware_counters_mali_t86x);
+ break;
+ /* If we are using a Mali-T88x device */
+ case GPU_ID_PI_TFRX:
+ hardware_counters = hardware_counters_mali_t88x;
+ *total_counters = ARRAY_SIZE(hardware_counters_mali_t88x);
+ break;
+ default:
+ hardware_counters = NULL;
+ *total_counters = 0;
+ dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", gpu_id);
+ break;
+ }
+
+ /* Release the kbdev reference. */
+ kbase_release_device(kbdev);
+
+ /* If we return a string array take a reference on the module (or fail). */
+ if (hardware_counters && !try_module_get(THIS_MODULE))
+ return NULL;
+
+ return hardware_counters;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names);
+
+void kbase_gator_hwcnt_term_names(void)
+{
+ /* Release the module reference. */
+ module_put(THIS_MODULE);
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names);
+
+struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info)
+{
+ struct kbase_gator_hwcnt_handles *hand;
+ struct kbase_uk_hwcnt_setup setup;
+ int err;
+ uint32_t dump_size = 0, i = 0;
+ struct kbase_va_region *reg;
+ u64 flags;
+ u64 nr_pages;
+ u16 va_alignment = 0;
+
+ if (!in_out_info)
+ return NULL;
+
+ hand = kzalloc(sizeof(*hand), GFP_KERNEL);
+ if (!hand)
+ return NULL;
+
+ /* Get the first device */
+ hand->kbdev = kbase_find_device(-1);
+ if (!hand->kbdev)
+ goto free_hand;
+
+ /* Create a kbase_context */
+ hand->kctx = kbase_create_context(hand->kbdev, true);
+ if (!hand->kctx)
+ goto release_device;
+
+ in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores;
+ in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups;
+ in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id;
+
+ /* If we are using a Mali-T6xx or Mali-T72x device */
+ if (in_out_info->gpu_id == GPU_ID_PI_T60X ||
+ in_out_info->gpu_id == GPU_ID_PI_T62X ||
+ in_out_info->gpu_id == GPU_ID_PI_T72X) {
+ uint32_t cg, j;
+ uint64_t core_mask;
+
+ /* There are 8 hardware counters blocks per core group */
+ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) *
+ MALI_MAX_NUM_BLOCKS_PER_GROUP *
+ in_out_info->nr_core_groups, GFP_KERNEL);
+
+ if (!in_out_info->hwc_layout)
+ goto destroy_context;
+
+ dump_size = in_out_info->nr_core_groups *
+ MALI_MAX_NUM_BLOCKS_PER_GROUP *
+ MALI_COUNTERS_PER_BLOCK *
+ MALI_BYTES_PER_COUNTER;
+
+ for (cg = 0; cg < in_out_info->nr_core_groups; cg++) {
+ core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask;
+
+ for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) {
+ if (core_mask & (1u << j))
+ in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+ else
+ in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+ }
+
+ in_out_info->hwc_layout[i++] = TILER_BLOCK;
+ in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+ in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+
+ if (0 == cg)
+ in_out_info->hwc_layout[i++] = JM_BLOCK;
+ else
+ in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+ }
+ /* If we are using any other device */
+ } else {
+ uint32_t nr_l2, nr_sc, j;
+ uint64_t core_mask;
+
+ nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices;
+
+ core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask;
+
+ nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores;
+
+ /* The job manager and tiler sets of counters
+ * are always present */
+ in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL);
+
+ if (!in_out_info->hwc_layout)
+ goto destroy_context;
+
+ dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER;
+
+ in_out_info->hwc_layout[i++] = JM_BLOCK;
+ in_out_info->hwc_layout[i++] = TILER_BLOCK;
+
+ for (j = 0; j < nr_l2; j++)
+ in_out_info->hwc_layout[i++] = MMU_L2_BLOCK;
+
+ while (core_mask != 0ull) {
+ if ((core_mask & 1ull) != 0ull)
+ in_out_info->hwc_layout[i++] = SHADER_BLOCK;
+ else
+ in_out_info->hwc_layout[i++] = RESERVED_BLOCK;
+ core_mask >>= 1;
+ }
+ }
+
+ in_out_info->nr_hwc_blocks = i;
+
+ in_out_info->size = dump_size;
+
+ flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+ nr_pages = PFN_UP(dump_size);
+ reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0,
+ &flags, &hand->hwcnt_gpu_va, &va_alignment);
+ if (!reg)
+ goto free_layout;
+
+ hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va,
+ dump_size, &hand->hwcnt_map);
+
+ if (!hand->hwcnt_cpu_va)
+ goto free_buffer;
+
+ in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va;
+
+ /*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/
+ setup.dump_buffer = hand->hwcnt_gpu_va;
+ setup.jm_bm = in_out_info->bitmask[0];
+ setup.tiler_bm = in_out_info->bitmask[1];
+ setup.shader_bm = in_out_info->bitmask[2];
+ setup.mmu_l2_bm = in_out_info->bitmask[3];
+
+ err = kbase_instr_hwcnt_enable(hand->kctx, &setup);
+ if (err)
+ goto free_unmap;
+
+ kbase_instr_hwcnt_clear(hand->kctx);
+
+ return hand;
+
+free_unmap:
+ kbase_vunmap(hand->kctx, &hand->hwcnt_map);
+
+free_buffer:
+ kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va);
+
+free_layout:
+ kfree(in_out_info->hwc_layout);
+
+destroy_context:
+ kbase_destroy_context(hand->kctx);
+
+release_device:
+ kbase_release_device(hand->kbdev);
+
+free_hand:
+ kfree(hand);
+
+ return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init);
+
+void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+ if (in_out_info)
+ kfree(in_out_info->hwc_layout);
+
+ if (opaque_handles) {
+ kbase_instr_hwcnt_disable(opaque_handles->kctx);
+ kbase_vunmap(opaque_handles->kctx, &opaque_handles->hwcnt_map);
+ kbase_mem_free(opaque_handles->kctx, opaque_handles->hwcnt_gpu_va);
+ kbase_destroy_context(opaque_handles->kctx);
+ kbase_release_device(opaque_handles->kbdev);
+ kfree(opaque_handles);
+ }
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term);
+
+uint32_t kbase_gator_instr_hwcnt_dump_complete(
+ struct kbase_gator_hwcnt_handles *opaque_handles,
+ uint32_t * const success)
+{
+ bool ret_res, success_res;
+
+ if (opaque_handles && success) {
+ ret_res = kbase_instr_hwcnt_dump_complete(opaque_handles->kctx,
+ &success_res);
+ *success = (uint32_t)success_res;
+ return (uint32_t)(ret_res != 0);
+ }
+ return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete);
+
+uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles)
+{
+ if (opaque_handles)
+ return (kbase_instr_hwcnt_request_dump(
+ opaque_handles->kctx) == 0);
+
+ return 0;
+}
+KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
new file mode 100644
index 00000000000..ef9ac0f7b63
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h
@@ -0,0 +1,219 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_API_H_
+#define _KBASE_GATOR_API_H_
+
+/**
+ * @brief This file describes the API used by Gator to fetch hardware counters.
+ */
+
+/* This define is used by the gator kernel module compile to select which DDK
+ * API calling convention to use. If not defined (legacy DDK) gator assumes
+ * version 1. The version to DDK release mapping is:
+ * Version 1 API: DDK versions r1px, r2px
+ * Version 2 API: DDK versions r3px, r4px
+ * Version 3 API: DDK version r5p0 and newer
+ *
+ * API Usage
+ * =========
+ *
+ * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter
+ * names for the GPU present in this device.
+ *
+ * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for
+ * the counters you want enabled. The enables can all be set for simplicity in
+ * most use cases, but disabling some will let you minimize bandwidth impact.
+ *
+ * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a
+ * counter context. On successful return the DDK will have populated the
+ * structure with a variety of useful information.
+ *
+ * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a
+ * counter dump. If this returns a non-zero value the request has been queued,
+ * otherwise the driver has been unable to do so (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the previously
+ * requested dump has been succesful. If this returns non-zero the counter dump
+ * has resolved, but the value of *success must also be tested as the dump
+ * may have not been successful. If it returns zero the counter dump was
+ * abandoned due to the device being busy (typically because of another
+ * user of the instrumentation exists concurrently).
+ *
+ * 6] Process the counters stored in the buffer pointed to by ...
+ *
+ * kbase_gator_hwcnt_info->kernel_dump_buffer
+ *
+ * In pseudo code you can find all of the counters via this approach:
+ *
+ *
+ * hwcnt_info # pointer to kbase_gator_hwcnt_info structure
+ * hwcnt_name # pointer to name list
+ *
+ * u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer
+ *
+ * # Iterate over each 64-counter block in this GPU configuration
+ * for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) {
+ * hwc_type type = hwcnt_info->hwc_layout[i];
+ *
+ * # Skip reserved type blocks - they contain no counters at all
+ * if( type == RESERVED_BLOCK ) {
+ * continue;
+ * }
+ *
+ * size_t name_offset = type * 64;
+ * size_t data_offset = i * 64;
+ *
+ * # Iterate over the names of the counters in this block type
+ * for( j = 0; j < 64; j++) {
+ * const char * name = hwcnt_name[name_offset+j];
+ *
+ * # Skip empty name strings - there is no counter here
+ * if( name[0] == '\0' ) {
+ * continue;
+ * }
+ *
+ * u32 data = hwcnt_data[data_offset+j];
+ *
+ * printk( "COUNTER: %s DATA: %u\n", name, data );
+ * }
+ * }
+ *
+ *
+ * Note that in most implementations you typically want to either SUM or
+ * AVERAGE multiple instances of the same counter if, for example, you have
+ * multiple shader cores or multiple L2 caches. The most sensible view for
+ * analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU
+ * counters.
+ *
+ * 7] Goto 4, repeating until you want to stop collecting counters.
+ *
+ * 8] Release the dump resources by calling kbase_gator_hwcnt_term().
+ *
+ * 9] Release the name table resources by calling
+ * kbase_gator_hwcnt_term_names(). This function must only be called if
+ * init_names() returned a non-NULL value.
+ **/
+
+#define MALI_DDK_GATOR_API_VERSION 3
+
+enum hwc_type {
+ JM_BLOCK = 0,
+ TILER_BLOCK,
+ SHADER_BLOCK,
+ MMU_L2_BLOCK,
+ RESERVED_BLOCK
+};
+
+struct kbase_gator_hwcnt_info {
+ /* Passed from Gator to kbase */
+
+ /* the bitmask of enabled hardware counters for each counter block */
+ uint16_t bitmask[4];
+
+ /* Passed from kbase to Gator */
+
+ /* ptr to counter dump memory */
+ void *kernel_dump_buffer;
+
+ /* size of counter dump memory */
+ uint32_t size;
+
+ /* the ID of the Mali device */
+ uint32_t gpu_id;
+
+ /* the number of shader cores in the GPU */
+ uint32_t nr_cores;
+
+ /* the number of core groups */
+ uint32_t nr_core_groups;
+
+ /* the memory layout of the performance counters */
+ enum hwc_type *hwc_layout;
+
+ /* the total number of hardware couter blocks */
+ uint32_t nr_hwc_blocks;
+};
+
+/**
+ * @brief Opaque block of Mali data which Gator needs to return to the API later.
+ */
+struct kbase_gator_hwcnt_handles;
+
+/**
+ * @brief Initialize the resources Gator needs for performance profiling.
+ *
+ * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the Mali
+ * specific information that will be returned to Gator. On entry Gator must have populated the
+ * 'bitmask' field with the counters it wishes to enable for each class of counter block.
+ * Each entry in the array corresponds to a single counter class based on the "hwc_type"
+ * enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables
+ * the first 4 counters in the block, and so on). See the GPU counter array as returned by
+ * kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU.
+ *
+ * @return Pointer to an opaque handle block on success, NULL on error.
+ */
+extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info);
+
+/**
+ * @brief Free all resources once Gator has finished using performance counters.
+ *
+ * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the
+ * Mali specific information that will be returned to Gator.
+ * @param opaque_handles A wrapper structure for kbase structures.
+ */
+extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief Poll whether a counter dump is successful.
+ *
+ * @param opaque_handles A wrapper structure for kbase structures.
+ * @param[out] success Non-zero on success, zero on failure.
+ *
+ * @return Zero if the dump is still pending, non-zero if the dump has completed. Note that a
+ * completed dump may not have dumped succesfully, so the caller must test for both
+ * a completed and successful dump before processing counters.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success);
+
+/**
+ * @brief Request the generation of a new counter dump.
+ *
+ * @param opaque_handles A wrapper structure for kbase structures.
+ *
+ * @return Zero if the hardware device is busy and cannot handle the request, non-zero otherwise.
+ */
+extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles);
+
+/**
+ * @brief This function is used to fetch the names table based on the Mali device in use.
+ *
+ * @param[out] total_counters The total number of counters short names in the Mali devices' list.
+ *
+ * @return Pointer to an array of strings of length *total_counters.
+ */
+extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters);
+
+/**
+ * @brief This function is used to terminate the use of the names table.
+ *
+ * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value.
+ */
+extern void kbase_gator_hwcnt_term_names(void);
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
new file mode 100644
index 00000000000..d124e82edd0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h
@@ -0,0 +1,2159 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_GATOR_HWCNT_NAMES_H_
+#define _KBASE_GATOR_HWCNT_NAMES_H_
+
+/*
+ * "Short names" for hardware counters used by Streamline. Counters names are
+ * stored in accordance with their memory layout in the binary counter block
+ * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block
+ * of 64 counters, and each GPU implements the same set of "masters" although
+ * the counters each master exposes within its block of 64 may vary.
+ *
+ * Counters which are an empty string are simply "holes" in the counter memory
+ * where no counter exists.
+ */
+
+static const char * const hardware_counters_mali_t60x[] = {
+ /* Job Manager */
+ "",
+ "",
+ "",
+ "",
+ "T60x_MESSAGES_SENT",
+ "T60x_MESSAGES_RECEIVED",
+ "T60x_GPU_ACTIVE",
+ "T60x_IRQ_ACTIVE",
+ "T60x_JS0_JOBS",
+ "T60x_JS0_TASKS",
+ "T60x_JS0_ACTIVE",
+ "",
+ "T60x_JS0_WAIT_READ",
+ "T60x_JS0_WAIT_ISSUE",
+ "T60x_JS0_WAIT_DEPEND",
+ "T60x_JS0_WAIT_FINISH",
+ "T60x_JS1_JOBS",
+ "T60x_JS1_TASKS",
+ "T60x_JS1_ACTIVE",
+ "",
+ "T60x_JS1_WAIT_READ",
+ "T60x_JS1_WAIT_ISSUE",
+ "T60x_JS1_WAIT_DEPEND",
+ "T60x_JS1_WAIT_FINISH",
+ "T60x_JS2_JOBS",
+ "T60x_JS2_TASKS",
+ "T60x_JS2_ACTIVE",
+ "",
+ "T60x_JS2_WAIT_READ",
+ "T60x_JS2_WAIT_ISSUE",
+ "T60x_JS2_WAIT_DEPEND",
+ "T60x_JS2_WAIT_FINISH",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /*Tiler */
+ "",
+ "",
+ "",
+ "T60x_TI_JOBS_PROCESSED",
+ "T60x_TI_TRIANGLES",
+ "T60x_TI_QUADS",
+ "T60x_TI_POLYGONS",
+ "T60x_TI_POINTS",
+ "T60x_TI_LINES",
+ "T60x_TI_VCACHE_HIT",
+ "T60x_TI_VCACHE_MISS",
+ "T60x_TI_FRONT_FACING",
+ "T60x_TI_BACK_FACING",
+ "T60x_TI_PRIM_VISIBLE",
+ "T60x_TI_PRIM_CULLED",
+ "T60x_TI_PRIM_CLIPPED",
+ "T60x_TI_LEVEL0",
+ "T60x_TI_LEVEL1",
+ "T60x_TI_LEVEL2",
+ "T60x_TI_LEVEL3",
+ "T60x_TI_LEVEL4",
+ "T60x_TI_LEVEL5",
+ "T60x_TI_LEVEL6",
+ "T60x_TI_LEVEL7",
+ "T60x_TI_COMMAND_1",
+ "T60x_TI_COMMAND_2",
+ "T60x_TI_COMMAND_3",
+ "T60x_TI_COMMAND_4",
+ "T60x_TI_COMMAND_4_7",
+ "T60x_TI_COMMAND_8_15",
+ "T60x_TI_COMMAND_16_63",
+ "T60x_TI_COMMAND_64",
+ "T60x_TI_COMPRESS_IN",
+ "T60x_TI_COMPRESS_OUT",
+ "T60x_TI_COMPRESS_FLUSH",
+ "T60x_TI_TIMESTAMPS",
+ "T60x_TI_PCACHE_HIT",
+ "T60x_TI_PCACHE_MISS",
+ "T60x_TI_PCACHE_LINE",
+ "T60x_TI_PCACHE_STALL",
+ "T60x_TI_WRBUF_HIT",
+ "T60x_TI_WRBUF_MISS",
+ "T60x_TI_WRBUF_LINE",
+ "T60x_TI_WRBUF_PARTIAL",
+ "T60x_TI_WRBUF_STALL",
+ "T60x_TI_ACTIVE",
+ "T60x_TI_LOADING_DESC",
+ "T60x_TI_INDEX_WAIT",
+ "T60x_TI_INDEX_RANGE_WAIT",
+ "T60x_TI_VERTEX_WAIT",
+ "T60x_TI_PCACHE_WAIT",
+ "T60x_TI_WRBUF_WAIT",
+ "T60x_TI_BUS_READ",
+ "T60x_TI_BUS_WRITE",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T60x_TI_UTLB_STALL",
+ "T60x_TI_UTLB_REPLAY_MISS",
+ "T60x_TI_UTLB_REPLAY_FULL",
+ "T60x_TI_UTLB_NEW_MISS",
+ "T60x_TI_UTLB_HIT",
+
+ /* Shader Core */
+ "",
+ "",
+ "",
+ "",
+ "T60x_FRAG_ACTIVE",
+ "T60x_FRAG_PRIMITIVES",
+ "T60x_FRAG_PRIMITIVES_DROPPED",
+ "T60x_FRAG_CYCLES_DESC",
+ "T60x_FRAG_CYCLES_PLR",
+ "T60x_FRAG_CYCLES_VERT",
+ "T60x_FRAG_CYCLES_TRISETUP",
+ "T60x_FRAG_CYCLES_RAST",
+ "T60x_FRAG_THREADS",
+ "T60x_FRAG_DUMMY_THREADS",
+ "T60x_FRAG_QUADS_RAST",
+ "T60x_FRAG_QUADS_EZS_TEST",
+ "T60x_FRAG_QUADS_EZS_KILLED",
+ "T60x_FRAG_THREADS_LZS_TEST",
+ "T60x_FRAG_THREADS_LZS_KILLED",
+ "T60x_FRAG_CYCLES_NO_TILE",
+ "T60x_FRAG_NUM_TILES",
+ "T60x_FRAG_TRANS_ELIM",
+ "T60x_COMPUTE_ACTIVE",
+ "T60x_COMPUTE_TASKS",
+ "T60x_COMPUTE_THREADS",
+ "T60x_COMPUTE_CYCLES_DESC",
+ "T60x_TRIPIPE_ACTIVE",
+ "T60x_ARITH_WORDS",
+ "T60x_ARITH_CYCLES_REG",
+ "T60x_ARITH_CYCLES_L0",
+ "T60x_ARITH_FRAG_DEPEND",
+ "T60x_LS_WORDS",
+ "T60x_LS_ISSUES",
+ "T60x_LS_RESTARTS",
+ "T60x_LS_REISSUES_MISS",
+ "T60x_LS_REISSUES_VD",
+ "T60x_LS_REISSUE_ATTRIB_MISS",
+ "T60x_LS_NO_WB",
+ "T60x_TEX_WORDS",
+ "T60x_TEX_BUBBLES",
+ "T60x_TEX_WORDS_L0",
+ "T60x_TEX_WORDS_DESC",
+ "T60x_TEX_ISSUES",
+ "T60x_TEX_RECIRC_FMISS",
+ "T60x_TEX_RECIRC_DESC",
+ "T60x_TEX_RECIRC_MULTI",
+ "T60x_TEX_RECIRC_PMISS",
+ "T60x_TEX_RECIRC_CONF",
+ "T60x_LSC_READ_HITS",
+ "T60x_LSC_READ_MISSES",
+ "T60x_LSC_WRITE_HITS",
+ "T60x_LSC_WRITE_MISSES",
+ "T60x_LSC_ATOMIC_HITS",
+ "T60x_LSC_ATOMIC_MISSES",
+ "T60x_LSC_LINE_FETCHES",
+ "T60x_LSC_DIRTY_LINE",
+ "T60x_LSC_SNOOPS",
+ "T60x_AXI_TLB_STALL",
+ "T60x_AXI_TLB_MIESS",
+ "T60x_AXI_TLB_TRANSACTION",
+ "T60x_LS_TLB_MISS",
+ "T60x_LS_TLB_HIT",
+ "T60x_AXI_BEATS_READ",
+ "T60x_AXI_BEATS_WRITTEN",
+
+ /*L2 and MMU */
+ "",
+ "",
+ "",
+ "",
+ "T60x_MMU_HIT",
+ "T60x_MMU_NEW_MISS",
+ "T60x_MMU_REPLAY_FULL",
+ "T60x_MMU_REPLAY_MISS",
+ "T60x_MMU_TABLE_WALK",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T60x_UTLB_HIT",
+ "T60x_UTLB_NEW_MISS",
+ "T60x_UTLB_REPLAY_FULL",
+ "T60x_UTLB_REPLAY_MISS",
+ "T60x_UTLB_STALL",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T60x_L2_EXT_WRITE_BEATS",
+ "T60x_L2_EXT_READ_BEATS",
+ "T60x_L2_ANY_LOOKUP",
+ "T60x_L2_READ_LOOKUP",
+ "T60x_L2_SREAD_LOOKUP",
+ "T60x_L2_READ_REPLAY",
+ "T60x_L2_READ_SNOOP",
+ "T60x_L2_READ_HIT",
+ "T60x_L2_CLEAN_MISS",
+ "T60x_L2_WRITE_LOOKUP",
+ "T60x_L2_SWRITE_LOOKUP",
+ "T60x_L2_WRITE_REPLAY",
+ "T60x_L2_WRITE_SNOOP",
+ "T60x_L2_WRITE_HIT",
+ "T60x_L2_EXT_READ_FULL",
+ "T60x_L2_EXT_READ_HALF",
+ "T60x_L2_EXT_WRITE_FULL",
+ "T60x_L2_EXT_WRITE_HALF",
+ "T60x_L2_EXT_READ",
+ "T60x_L2_EXT_READ_LINE",
+ "T60x_L2_EXT_WRITE",
+ "T60x_L2_EXT_WRITE_LINE",
+ "T60x_L2_EXT_WRITE_SMALL",
+ "T60x_L2_EXT_BARRIER",
+ "T60x_L2_EXT_AR_STALL",
+ "T60x_L2_EXT_R_BUF_FULL",
+ "T60x_L2_EXT_RD_BUF_FULL",
+ "T60x_L2_EXT_R_RAW",
+ "T60x_L2_EXT_W_STALL",
+ "T60x_L2_EXT_W_BUF_FULL",
+ "T60x_L2_EXT_R_W_HAZARD",
+ "T60x_L2_TAG_HAZARD",
+ "T60x_L2_SNOOP_FULL",
+ "T60x_L2_REPLAY_FULL"
+};
+static const char * const hardware_counters_mali_t62x[] = {
+ /* Job Manager */
+ "",
+ "",
+ "",
+ "",
+ "T62x_MESSAGES_SENT",
+ "T62x_MESSAGES_RECEIVED",
+ "T62x_GPU_ACTIVE",
+ "T62x_IRQ_ACTIVE",
+ "T62x_JS0_JOBS",
+ "T62x_JS0_TASKS",
+ "T62x_JS0_ACTIVE",
+ "",
+ "T62x_JS0_WAIT_READ",
+ "T62x_JS0_WAIT_ISSUE",
+ "T62x_JS0_WAIT_DEPEND",
+ "T62x_JS0_WAIT_FINISH",
+ "T62x_JS1_JOBS",
+ "T62x_JS1_TASKS",
+ "T62x_JS1_ACTIVE",
+ "",
+ "T62x_JS1_WAIT_READ",
+ "T62x_JS1_WAIT_ISSUE",
+ "T62x_JS1_WAIT_DEPEND",
+ "T62x_JS1_WAIT_FINISH",
+ "T62x_JS2_JOBS",
+ "T62x_JS2_TASKS",
+ "T62x_JS2_ACTIVE",
+ "",
+ "T62x_JS2_WAIT_READ",
+ "T62x_JS2_WAIT_ISSUE",
+ "T62x_JS2_WAIT_DEPEND",
+ "T62x_JS2_WAIT_FINISH",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /*Tiler */
+ "",
+ "",
+ "",
+ "T62x_TI_JOBS_PROCESSED",
+ "T62x_TI_TRIANGLES",
+ "T62x_TI_QUADS",
+ "T62x_TI_POLYGONS",
+ "T62x_TI_POINTS",
+ "T62x_TI_LINES",
+ "T62x_TI_VCACHE_HIT",
+ "T62x_TI_VCACHE_MISS",
+ "T62x_TI_FRONT_FACING",
+ "T62x_TI_BACK_FACING",
+ "T62x_TI_PRIM_VISIBLE",
+ "T62x_TI_PRIM_CULLED",
+ "T62x_TI_PRIM_CLIPPED",
+ "T62x_TI_LEVEL0",
+ "T62x_TI_LEVEL1",
+ "T62x_TI_LEVEL2",
+ "T62x_TI_LEVEL3",
+ "T62x_TI_LEVEL4",
+ "T62x_TI_LEVEL5",
+ "T62x_TI_LEVEL6",
+ "T62x_TI_LEVEL7",
+ "T62x_TI_COMMAND_1",
+ "T62x_TI_COMMAND_2",
+ "T62x_TI_COMMAND_3",
+ "T62x_TI_COMMAND_4",
+ "T62x_TI_COMMAND_5_7",
+ "T62x_TI_COMMAND_8_15",
+ "T62x_TI_COMMAND_16_63",
+ "T62x_TI_COMMAND_64",
+ "T62x_TI_COMPRESS_IN",
+ "T62x_TI_COMPRESS_OUT",
+ "T62x_TI_COMPRESS_FLUSH",
+ "T62x_TI_TIMESTAMPS",
+ "T62x_TI_PCACHE_HIT",
+ "T62x_TI_PCACHE_MISS",
+ "T62x_TI_PCACHE_LINE",
+ "T62x_TI_PCACHE_STALL",
+ "T62x_TI_WRBUF_HIT",
+ "T62x_TI_WRBUF_MISS",
+ "T62x_TI_WRBUF_LINE",
+ "T62x_TI_WRBUF_PARTIAL",
+ "T62x_TI_WRBUF_STALL",
+ "T62x_TI_ACTIVE",
+ "T62x_TI_LOADING_DESC",
+ "T62x_TI_INDEX_WAIT",
+ "T62x_TI_INDEX_RANGE_WAIT",
+ "T62x_TI_VERTEX_WAIT",
+ "T62x_TI_PCACHE_WAIT",
+ "T62x_TI_WRBUF_WAIT",
+ "T62x_TI_BUS_READ",
+ "T62x_TI_BUS_WRITE",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T62x_TI_UTLB_STALL",
+ "T62x_TI_UTLB_REPLAY_MISS",
+ "T62x_TI_UTLB_REPLAY_FULL",
+ "T62x_TI_UTLB_NEW_MISS",
+ "T62x_TI_UTLB_HIT",
+
+ /* Shader Core */
+ "",
+ "",
+ "",
+ "T62x_SHADER_CORE_ACTIVE",
+ "T62x_FRAG_ACTIVE",
+ "T62x_FRAG_PRIMITIVES",
+ "T62x_FRAG_PRIMITIVES_DROPPED",
+ "T62x_FRAG_CYCLES_DESC",
+ "T62x_FRAG_CYCLES_FPKQ_ACTIVE",
+ "T62x_FRAG_CYCLES_VERT",
+ "T62x_FRAG_CYCLES_TRISETUP",
+ "T62x_FRAG_CYCLES_EZS_ACTIVE",
+ "T62x_FRAG_THREADS",
+ "T62x_FRAG_DUMMY_THREADS",
+ "T62x_FRAG_QUADS_RAST",
+ "T62x_FRAG_QUADS_EZS_TEST",
+ "T62x_FRAG_QUADS_EZS_KILLED",
+ "T62x_FRAG_THREADS_LZS_TEST",
+ "T62x_FRAG_THREADS_LZS_KILLED",
+ "T62x_FRAG_CYCLES_NO_TILE",
+ "T62x_FRAG_NUM_TILES",
+ "T62x_FRAG_TRANS_ELIM",
+ "T62x_COMPUTE_ACTIVE",
+ "T62x_COMPUTE_TASKS",
+ "T62x_COMPUTE_THREADS",
+ "T62x_COMPUTE_CYCLES_DESC",
+ "T62x_TRIPIPE_ACTIVE",
+ "T62x_ARITH_WORDS",
+ "T62x_ARITH_CYCLES_REG",
+ "T62x_ARITH_CYCLES_L0",
+ "T62x_ARITH_FRAG_DEPEND",
+ "T62x_LS_WORDS",
+ "T62x_LS_ISSUES",
+ "T62x_LS_RESTARTS",
+ "T62x_LS_REISSUES_MISS",
+ "T62x_LS_REISSUES_VD",
+ "T62x_LS_REISSUE_ATTRIB_MISS",
+ "T62x_LS_NO_WB",
+ "T62x_TEX_WORDS",
+ "T62x_TEX_BUBBLES",
+ "T62x_TEX_WORDS_L0",
+ "T62x_TEX_WORDS_DESC",
+ "T62x_TEX_ISSUES",
+ "T62x_TEX_RECIRC_FMISS",
+ "T62x_TEX_RECIRC_DESC",
+ "T62x_TEX_RECIRC_MULTI",
+ "T62x_TEX_RECIRC_PMISS",
+ "T62x_TEX_RECIRC_CONF",
+ "T62x_LSC_READ_HITS",
+ "T62x_LSC_READ_MISSES",
+ "T62x_LSC_WRITE_HITS",
+ "T62x_LSC_WRITE_MISSES",
+ "T62x_LSC_ATOMIC_HITS",
+ "T62x_LSC_ATOMIC_MISSES",
+ "T62x_LSC_LINE_FETCHES",
+ "T62x_LSC_DIRTY_LINE",
+ "T62x_LSC_SNOOPS",
+ "T62x_AXI_TLB_STALL",
+ "T62x_AXI_TLB_MIESS",
+ "T62x_AXI_TLB_TRANSACTION",
+ "T62x_LS_TLB_MISS",
+ "T62x_LS_TLB_HIT",
+ "T62x_AXI_BEATS_READ",
+ "T62x_AXI_BEATS_WRITTEN",
+
+ /*L2 and MMU */
+ "",
+ "",
+ "",
+ "",
+ "T62x_MMU_HIT",
+ "T62x_MMU_NEW_MISS",
+ "T62x_MMU_REPLAY_FULL",
+ "T62x_MMU_REPLAY_MISS",
+ "T62x_MMU_TABLE_WALK",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T62x_UTLB_HIT",
+ "T62x_UTLB_NEW_MISS",
+ "T62x_UTLB_REPLAY_FULL",
+ "T62x_UTLB_REPLAY_MISS",
+ "T62x_UTLB_STALL",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T62x_L2_EXT_WRITE_BEATS",
+ "T62x_L2_EXT_READ_BEATS",
+ "T62x_L2_ANY_LOOKUP",
+ "T62x_L2_READ_LOOKUP",
+ "T62x_L2_SREAD_LOOKUP",
+ "T62x_L2_READ_REPLAY",
+ "T62x_L2_READ_SNOOP",
+ "T62x_L2_READ_HIT",
+ "T62x_L2_CLEAN_MISS",
+ "T62x_L2_WRITE_LOOKUP",
+ "T62x_L2_SWRITE_LOOKUP",
+ "T62x_L2_WRITE_REPLAY",
+ "T62x_L2_WRITE_SNOOP",
+ "T62x_L2_WRITE_HIT",
+ "T62x_L2_EXT_READ_FULL",
+ "T62x_L2_EXT_READ_HALF",
+ "T62x_L2_EXT_WRITE_FULL",
+ "T62x_L2_EXT_WRITE_HALF",
+ "T62x_L2_EXT_READ",
+ "T62x_L2_EXT_READ_LINE",
+ "T62x_L2_EXT_WRITE",
+ "T62x_L2_EXT_WRITE_LINE",
+ "T62x_L2_EXT_WRITE_SMALL",
+ "T62x_L2_EXT_BARRIER",
+ "T62x_L2_EXT_AR_STALL",
+ "T62x_L2_EXT_R_BUF_FULL",
+ "T62x_L2_EXT_RD_BUF_FULL",
+ "T62x_L2_EXT_R_RAW",
+ "T62x_L2_EXT_W_STALL",
+ "T62x_L2_EXT_W_BUF_FULL",
+ "T62x_L2_EXT_R_W_HAZARD",
+ "T62x_L2_TAG_HAZARD",
+ "T62x_L2_SNOOP_FULL",
+ "T62x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t72x[] = {
+ /* Job Manager */
+ "",
+ "",
+ "",
+ "",
+ "T72x_GPU_ACTIVE",
+ "T72x_IRQ_ACTIVE",
+ "T72x_JS0_JOBS",
+ "T72x_JS0_TASKS",
+ "T72x_JS0_ACTIVE",
+ "T72x_JS1_JOBS",
+ "T72x_JS1_TASKS",
+ "T72x_JS1_ACTIVE",
+ "T72x_JS2_JOBS",
+ "T72x_JS2_TASKS",
+ "T72x_JS2_ACTIVE",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /*Tiler */
+ "",
+ "",
+ "",
+ "T72x_TI_JOBS_PROCESSED",
+ "T72x_TI_TRIANGLES",
+ "T72x_TI_QUADS",
+ "T72x_TI_POLYGONS",
+ "T72x_TI_POINTS",
+ "T72x_TI_LINES",
+ "T72x_TI_FRONT_FACING",
+ "T72x_TI_BACK_FACING",
+ "T72x_TI_PRIM_VISIBLE",
+ "T72x_TI_PRIM_CULLED",
+ "T72x_TI_PRIM_CLIPPED",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T72x_TI_ACTIVE",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /* Shader Core */
+ "",
+ "",
+ "",
+ "",
+ "T72x_FRAG_ACTIVE",
+ "T72x_FRAG_PRIMITIVES",
+ "T72x_FRAG_PRIMITIVES_DROPPED",
+ "T72x_FRAG_THREADS",
+ "T72x_FRAG_DUMMY_THREADS",
+ "T72x_FRAG_QUADS_RAST",
+ "T72x_FRAG_QUADS_EZS_TEST",
+ "T72x_FRAG_QUADS_EZS_KILLED",
+ "T72x_FRAG_THREADS_LZS_TEST",
+ "T72x_FRAG_THREADS_LZS_KILLED",
+ "T72x_FRAG_CYCLES_NO_TILE",
+ "T72x_FRAG_NUM_TILES",
+ "T72x_FRAG_TRANS_ELIM",
+ "T72x_COMPUTE_ACTIVE",
+ "T72x_COMPUTE_TASKS",
+ "T72x_COMPUTE_THREADS",
+ "T72x_TRIPIPE_ACTIVE",
+ "T72x_ARITH_WORDS",
+ "T72x_ARITH_CYCLES_REG",
+ "T72x_LS_WORDS",
+ "T72x_LS_ISSUES",
+ "T72x_LS_RESTARTS",
+ "T72x_LS_REISSUES_MISS",
+ "T72x_TEX_WORDS",
+ "T72x_TEX_BUBBLES",
+ "T72x_TEX_ISSUES",
+ "T72x_LSC_READ_HITS",
+ "T72x_LSC_READ_MISSES",
+ "T72x_LSC_WRITE_HITS",
+ "T72x_LSC_WRITE_MISSES",
+ "T72x_LSC_ATOMIC_HITS",
+ "T72x_LSC_ATOMIC_MISSES",
+ "T72x_LSC_LINE_FETCHES",
+ "T72x_LSC_DIRTY_LINE",
+ "T72x_LSC_SNOOPS",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /*L2 and MMU */
+ "",
+ "",
+ "",
+ "",
+ "T72x_L2_EXT_WRITE_BEAT",
+ "T72x_L2_EXT_READ_BEAT",
+ "T72x_L2_READ_SNOOP",
+ "T72x_L2_READ_HIT",
+ "T72x_L2_WRITE_SNOOP",
+ "T72x_L2_WRITE_HIT",
+ "T72x_L2_EXT_WRITE_SMALL",
+ "T72x_L2_EXT_BARRIER",
+ "T72x_L2_EXT_AR_STALL",
+ "T72x_L2_EXT_W_STALL",
+ "T72x_L2_SNOOP_FULL",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ ""
+};
+
+static const char * const hardware_counters_mali_t76x[] = {
+ /* Job Manager */
+ "",
+ "",
+ "",
+ "",
+ "T76x_MESSAGES_SENT",
+ "T76x_MESSAGES_RECEIVED",
+ "T76x_GPU_ACTIVE",
+ "T76x_IRQ_ACTIVE",
+ "T76x_JS0_JOBS",
+ "T76x_JS0_TASKS",
+ "T76x_JS0_ACTIVE",
+ "",
+ "T76x_JS0_WAIT_READ",
+ "T76x_JS0_WAIT_ISSUE",
+ "T76x_JS0_WAIT_DEPEND",
+ "T76x_JS0_WAIT_FINISH",
+ "T76x_JS1_JOBS",
+ "T76x_JS1_TASKS",
+ "T76x_JS1_ACTIVE",
+ "",
+ "T76x_JS1_WAIT_READ",
+ "T76x_JS1_WAIT_ISSUE",
+ "T76x_JS1_WAIT_DEPEND",
+ "T76x_JS1_WAIT_FINISH",
+ "T76x_JS2_JOBS",
+ "T76x_JS2_TASKS",
+ "T76x_JS2_ACTIVE",
+ "",
+ "T76x_JS2_WAIT_READ",
+ "T76x_JS2_WAIT_ISSUE",
+ "T76x_JS2_WAIT_DEPEND",
+ "T76x_JS2_WAIT_FINISH",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /*Tiler */
+ "",
+ "",
+ "",
+ "T76x_TI_JOBS_PROCESSED",
+ "T76x_TI_TRIANGLES",
+ "T76x_TI_QUADS",
+ "T76x_TI_POLYGONS",
+ "T76x_TI_POINTS",
+ "T76x_TI_LINES",
+ "T76x_TI_VCACHE_HIT",
+ "T76x_TI_VCACHE_MISS",
+ "T76x_TI_FRONT_FACING",
+ "T76x_TI_BACK_FACING",
+ "T76x_TI_PRIM_VISIBLE",
+ "T76x_TI_PRIM_CULLED",
+ "T76x_TI_PRIM_CLIPPED",
+ "T76x_TI_LEVEL0",
+ "T76x_TI_LEVEL1",
+ "T76x_TI_LEVEL2",
+ "T76x_TI_LEVEL3",
+ "T76x_TI_LEVEL4",
+ "T76x_TI_LEVEL5",
+ "T76x_TI_LEVEL6",
+ "T76x_TI_LEVEL7",
+ "T76x_TI_COMMAND_1",
+ "T76x_TI_COMMAND_2",
+ "T76x_TI_COMMAND_3",
+ "T76x_TI_COMMAND_4",
+ "T76x_TI_COMMAND_5_7",
+ "T76x_TI_COMMAND_8_15",
+ "T76x_TI_COMMAND_16_63",
+ "T76x_TI_COMMAND_64",
+ "T76x_TI_COMPRESS_IN",
+ "T76x_TI_COMPRESS_OUT",
+ "T76x_TI_COMPRESS_FLUSH",
+ "T76x_TI_TIMESTAMPS",
+ "T76x_TI_PCACHE_HIT",
+ "T76x_TI_PCACHE_MISS",
+ "T76x_TI_PCACHE_LINE",
+ "T76x_TI_PCACHE_STALL",
+ "T76x_TI_WRBUF_HIT",
+ "T76x_TI_WRBUF_MISS",
+ "T76x_TI_WRBUF_LINE",
+ "T76x_TI_WRBUF_PARTIAL",
+ "T76x_TI_WRBUF_STALL",
+ "T76x_TI_ACTIVE",
+ "T76x_TI_LOADING_DESC",
+ "T76x_TI_INDEX_WAIT",
+ "T76x_TI_INDEX_RANGE_WAIT",
+ "T76x_TI_VERTEX_WAIT",
+ "T76x_TI_PCACHE_WAIT",
+ "T76x_TI_WRBUF_WAIT",
+ "T76x_TI_BUS_READ",
+ "T76x_TI_BUS_WRITE",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T76x_TI_UTLB_HIT",
+ "T76x_TI_UTLB_NEW_MISS",
+ "T76x_TI_UTLB_REPLAY_FULL",
+ "T76x_TI_UTLB_REPLAY_MISS",
+ "T76x_TI_UTLB_STALL",
+
+ /* Shader Core */
+ "",
+ "",
+ "",
+ "",
+ "T76x_FRAG_ACTIVE",
+ "T76x_FRAG_PRIMITIVES",
+ "T76x_FRAG_PRIMITIVES_DROPPED",
+ "T76x_FRAG_CYCLES_DESC",
+ "T76x_FRAG_CYCLES_FPKQ_ACTIVE",
+ "T76x_FRAG_CYCLES_VERT",
+ "T76x_FRAG_CYCLES_TRISETUP",
+ "T76x_FRAG_CYCLES_EZS_ACTIVE",
+ "T76x_FRAG_THREADS",
+ "T76x_FRAG_DUMMY_THREADS",
+ "T76x_FRAG_QUADS_RAST",
+ "T76x_FRAG_QUADS_EZS_TEST",
+ "T76x_FRAG_QUADS_EZS_KILLED",
+ "T76x_FRAG_THREADS_LZS_TEST",
+ "T76x_FRAG_THREADS_LZS_KILLED",
+ "T76x_FRAG_CYCLES_NO_TILE",
+ "T76x_FRAG_NUM_TILES",
+ "T76x_FRAG_TRANS_ELIM",
+ "T76x_COMPUTE_ACTIVE",
+ "T76x_COMPUTE_TASKS",
+ "T76x_COMPUTE_THREADS",
+ "T76x_COMPUTE_CYCLES_DESC",
+ "T76x_TRIPIPE_ACTIVE",
+ "T76x_ARITH_WORDS",
+ "T76x_ARITH_CYCLES_REG",
+ "T76x_ARITH_CYCLES_L0",
+ "T76x_ARITH_FRAG_DEPEND",
+ "T76x_LS_WORDS",
+ "T76x_LS_ISSUES",
+ "T76x_LS_REISSUE_ATTR",
+ "T76x_LS_REISSUES_VARY",
+ "T76x_LS_VARY_RV_MISS",
+ "T76x_LS_VARY_RV_HIT",
+ "T76x_LS_NO_UNPARK",
+ "T76x_TEX_WORDS",
+ "T76x_TEX_BUBBLES",
+ "T76x_TEX_WORDS_L0",
+ "T76x_TEX_WORDS_DESC",
+ "T76x_TEX_ISSUES",
+ "T76x_TEX_RECIRC_FMISS",
+ "T76x_TEX_RECIRC_DESC",
+ "T76x_TEX_RECIRC_MULTI",
+ "T76x_TEX_RECIRC_PMISS",
+ "T76x_TEX_RECIRC_CONF",
+ "T76x_LSC_READ_HITS",
+ "T76x_LSC_READ_OP",
+ "T76x_LSC_WRITE_HITS",
+ "T76x_LSC_WRITE_OP",
+ "T76x_LSC_ATOMIC_HITS",
+ "T76x_LSC_ATOMIC_OP",
+ "T76x_LSC_LINE_FETCHES",
+ "T76x_LSC_DIRTY_LINE",
+ "T76x_LSC_SNOOPS",
+ "T76x_AXI_TLB_STALL",
+ "T76x_AXI_TLB_MIESS",
+ "T76x_AXI_TLB_TRANSACTION",
+ "T76x_LS_TLB_MISS",
+ "T76x_LS_TLB_HIT",
+ "T76x_AXI_BEATS_READ",
+ "T76x_AXI_BEATS_WRITTEN",
+
+ /*L2 and MMU */
+ "",
+ "",
+ "",
+ "",
+ "T76x_MMU_HIT",
+ "T76x_MMU_NEW_MISS",
+ "T76x_MMU_REPLAY_FULL",
+ "T76x_MMU_REPLAY_MISS",
+ "T76x_MMU_TABLE_WALK",
+ "T76x_MMU_REQUESTS",
+ "",
+ "",
+ "T76x_UTLB_HIT",
+ "T76x_UTLB_NEW_MISS",
+ "T76x_UTLB_REPLAY_FULL",
+ "T76x_UTLB_REPLAY_MISS",
+ "T76x_UTLB_STALL",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T76x_L2_EXT_WRITE_BEATS",
+ "T76x_L2_EXT_READ_BEATS",
+ "T76x_L2_ANY_LOOKUP",
+ "T76x_L2_READ_LOOKUP",
+ "T76x_L2_SREAD_LOOKUP",
+ "T76x_L2_READ_REPLAY",
+ "T76x_L2_READ_SNOOP",
+ "T76x_L2_READ_HIT",
+ "T76x_L2_CLEAN_MISS",
+ "T76x_L2_WRITE_LOOKUP",
+ "T76x_L2_SWRITE_LOOKUP",
+ "T76x_L2_WRITE_REPLAY",
+ "T76x_L2_WRITE_SNOOP",
+ "T76x_L2_WRITE_HIT",
+ "T76x_L2_EXT_READ_FULL",
+ "",
+ "T76x_L2_EXT_WRITE_FULL",
+ "T76x_L2_EXT_R_W_HAZARD",
+ "T76x_L2_EXT_READ",
+ "T76x_L2_EXT_READ_LINE",
+ "T76x_L2_EXT_WRITE",
+ "T76x_L2_EXT_WRITE_LINE",
+ "T76x_L2_EXT_WRITE_SMALL",
+ "T76x_L2_EXT_BARRIER",
+ "T76x_L2_EXT_AR_STALL",
+ "T76x_L2_EXT_R_BUF_FULL",
+ "T76x_L2_EXT_RD_BUF_FULL",
+ "T76x_L2_EXT_R_RAW",
+ "T76x_L2_EXT_W_STALL",
+ "T76x_L2_EXT_W_BUF_FULL",
+ "T76x_L2_EXT_R_BUF_FULL",
+ "T76x_L2_TAG_HAZARD",
+ "T76x_L2_SNOOP_FULL",
+ "T76x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t82x[] = {
+ /* Job Manager */
+ "",
+ "",
+ "",
+ "",
+ "T82x_MESSAGES_SENT",
+ "T82x_MESSAGES_RECEIVED",
+ "T82x_GPU_ACTIVE",
+ "T82x_IRQ_ACTIVE",
+ "T82x_JS0_JOBS",
+ "T82x_JS0_TASKS",
+ "T82x_JS0_ACTIVE",
+ "",
+ "T82x_JS0_WAIT_READ",
+ "T82x_JS0_WAIT_ISSUE",
+ "T82x_JS0_WAIT_DEPEND",
+ "T82x_JS0_WAIT_FINISH",
+ "T82x_JS1_JOBS",
+ "T82x_JS1_TASKS",
+ "T82x_JS1_ACTIVE",
+ "",
+ "T82x_JS1_WAIT_READ",
+ "T82x_JS1_WAIT_ISSUE",
+ "T82x_JS1_WAIT_DEPEND",
+ "T82x_JS1_WAIT_FINISH",
+ "T82x_JS2_JOBS",
+ "T82x_JS2_TASKS",
+ "T82x_JS2_ACTIVE",
+ "",
+ "T82x_JS2_WAIT_READ",
+ "T82x_JS2_WAIT_ISSUE",
+ "T82x_JS2_WAIT_DEPEND",
+ "T82x_JS2_WAIT_FINISH",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /*Tiler */
+ "",
+ "",
+ "",
+ "T82x_TI_JOBS_PROCESSED",
+ "T82x_TI_TRIANGLES",
+ "T82x_TI_QUADS",
+ "T82x_TI_POLYGONS",
+ "T82x_TI_POINTS",
+ "T82x_TI_LINES",
+ "T82x_TI_FRONT_FACING",
+ "T82x_TI_BACK_FACING",
+ "T82x_TI_PRIM_VISIBLE",
+ "T82x_TI_PRIM_CULLED",
+ "T82x_TI_PRIM_CLIPPED",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T82x_TI_ACTIVE",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /* Shader Core */
+ "",
+ "",
+ "",
+ "",
+ "T82x_FRAG_ACTIVE",
+ "T82x_FRAG_PRIMITIVES",
+ "T82x_FRAG_PRIMITIVES_DROPPED",
+ "T82x_FRAG_CYCLES_DESC",
+ "T82x_FRAG_CYCLES_FPKQ_ACTIVE",
+ "T82x_FRAG_CYCLES_VERT",
+ "T82x_FRAG_CYCLES_TRISETUP",
+ "T82x_FRAG_CYCLES_EZS_ACTIVE",
+ "T82x_FRAG_THREADS",
+ "T82x_FRAG_DUMMY_THREADS",
+ "T82x_FRAG_QUADS_RAST",
+ "T82x_FRAG_QUADS_EZS_TEST",
+ "T82x_FRAG_QUADS_EZS_KILLED",
+ "T82x_FRAG_THREADS_LZS_TEST",
+ "T82x_FRAG_THREADS_LZS_KILLED",
+ "T82x_FRAG_CYCLES_NO_TILE",
+ "T82x_FRAG_NUM_TILES",
+ "T82x_FRAG_TRANS_ELIM",
+ "T82x_COMPUTE_ACTIVE",
+ "T82x_COMPUTE_TASKS",
+ "T82x_COMPUTE_THREADS",
+ "T82x_COMPUTE_CYCLES_DESC",
+ "T82x_TRIPIPE_ACTIVE",
+ "T82x_ARITH_WORDS",
+ "T82x_ARITH_CYCLES_REG",
+ "T82x_ARITH_CYCLES_L0",
+ "T82x_ARITH_FRAG_DEPEND",
+ "T82x_LS_WORDS",
+ "T82x_LS_ISSUES",
+ "T82x_LS_REISSUE_ATTR",
+ "T82x_LS_REISSUES_VARY",
+ "T82x_LS_VARY_RV_MISS",
+ "T82x_LS_VARY_RV_HIT",
+ "T82x_LS_NO_UNPARK",
+ "T82x_TEX_WORDS",
+ "T82x_TEX_BUBBLES",
+ "T82x_TEX_WORDS_L0",
+ "T82x_TEX_WORDS_DESC",
+ "T82x_TEX_ISSUES",
+ "T82x_TEX_RECIRC_FMISS",
+ "T82x_TEX_RECIRC_DESC",
+ "T82x_TEX_RECIRC_MULTI",
+ "T82x_TEX_RECIRC_PMISS",
+ "T82x_TEX_RECIRC_CONF",
+ "T82x_LSC_READ_HITS",
+ "T82x_LSC_READ_OP",
+ "T82x_LSC_WRITE_HITS",
+ "T82x_LSC_WRITE_OP",
+ "T82x_LSC_ATOMIC_HITS",
+ "T82x_LSC_ATOMIC_OP",
+ "T82x_LSC_LINE_FETCHES",
+ "T82x_LSC_DIRTY_LINE",
+ "T82x_LSC_SNOOPS",
+ "T82x_AXI_TLB_STALL",
+ "T82x_AXI_TLB_MIESS",
+ "T82x_AXI_TLB_TRANSACTION",
+ "T82x_LS_TLB_MISS",
+ "T82x_LS_TLB_HIT",
+ "T82x_AXI_BEATS_READ",
+ "T82x_AXI_BEATS_WRITTEN",
+
+ /*L2 and MMU */
+ "",
+ "",
+ "",
+ "",
+ "T82x_MMU_HIT",
+ "T82x_MMU_NEW_MISS",
+ "T82x_MMU_REPLAY_FULL",
+ "T82x_MMU_REPLAY_MISS",
+ "T82x_MMU_TABLE_WALK",
+ "T82x_MMU_REQUESTS",
+ "",
+ "",
+ "T82x_UTLB_HIT",
+ "T82x_UTLB_NEW_MISS",
+ "T82x_UTLB_REPLAY_FULL",
+ "T82x_UTLB_REPLAY_MISS",
+ "T82x_UTLB_STALL",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T82x_L2_EXT_WRITE_BEATS",
+ "T82x_L2_EXT_READ_BEATS",
+ "T82x_L2_ANY_LOOKUP",
+ "T82x_L2_READ_LOOKUP",
+ "T82x_L2_SREAD_LOOKUP",
+ "T82x_L2_READ_REPLAY",
+ "T82x_L2_READ_SNOOP",
+ "T82x_L2_READ_HIT",
+ "T82x_L2_CLEAN_MISS",
+ "T82x_L2_WRITE_LOOKUP",
+ "T82x_L2_SWRITE_LOOKUP",
+ "T82x_L2_WRITE_REPLAY",
+ "T82x_L2_WRITE_SNOOP",
+ "T82x_L2_WRITE_HIT",
+ "T82x_L2_EXT_READ_FULL",
+ "",
+ "T82x_L2_EXT_WRITE_FULL",
+ "T82x_L2_EXT_R_W_HAZARD",
+ "T82x_L2_EXT_READ",
+ "T82x_L2_EXT_READ_LINE",
+ "T82x_L2_EXT_WRITE",
+ "T82x_L2_EXT_WRITE_LINE",
+ "T82x_L2_EXT_WRITE_SMALL",
+ "T82x_L2_EXT_BARRIER",
+ "T82x_L2_EXT_AR_STALL",
+ "T82x_L2_EXT_R_BUF_FULL",
+ "T82x_L2_EXT_RD_BUF_FULL",
+ "T82x_L2_EXT_R_RAW",
+ "T82x_L2_EXT_W_STALL",
+ "T82x_L2_EXT_W_BUF_FULL",
+ "T82x_L2_EXT_R_BUF_FULL",
+ "T82x_L2_TAG_HAZARD",
+ "T82x_L2_SNOOP_FULL",
+ "T82x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t83x[] = {
+ /* Job Manager */
+ "",
+ "",
+ "",
+ "",
+ "T83x_MESSAGES_SENT",
+ "T83x_MESSAGES_RECEIVED",
+ "T83x_GPU_ACTIVE",
+ "T83x_IRQ_ACTIVE",
+ "T83x_JS0_JOBS",
+ "T83x_JS0_TASKS",
+ "T83x_JS0_ACTIVE",
+ "",
+ "T83x_JS0_WAIT_READ",
+ "T83x_JS0_WAIT_ISSUE",
+ "T83x_JS0_WAIT_DEPEND",
+ "T83x_JS0_WAIT_FINISH",
+ "T83x_JS1_JOBS",
+ "T83x_JS1_TASKS",
+ "T83x_JS1_ACTIVE",
+ "",
+ "T83x_JS1_WAIT_READ",
+ "T83x_JS1_WAIT_ISSUE",
+ "T83x_JS1_WAIT_DEPEND",
+ "T83x_JS1_WAIT_FINISH",
+ "T83x_JS2_JOBS",
+ "T83x_JS2_TASKS",
+ "T83x_JS2_ACTIVE",
+ "",
+ "T83x_JS2_WAIT_READ",
+ "T83x_JS2_WAIT_ISSUE",
+ "T83x_JS2_WAIT_DEPEND",
+ "T83x_JS2_WAIT_FINISH",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /*Tiler */
+ "",
+ "",
+ "",
+ "T83x_TI_JOBS_PROCESSED",
+ "T83x_TI_TRIANGLES",
+ "T83x_TI_QUADS",
+ "T83x_TI_POLYGONS",
+ "T83x_TI_POINTS",
+ "T83x_TI_LINES",
+ "T83x_TI_FRONT_FACING",
+ "T83x_TI_BACK_FACING",
+ "T83x_TI_PRIM_VISIBLE",
+ "T83x_TI_PRIM_CULLED",
+ "T83x_TI_PRIM_CLIPPED",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T83x_TI_ACTIVE",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /* Shader Core */
+ "",
+ "",
+ "",
+ "",
+ "T83x_FRAG_ACTIVE",
+ "T83x_FRAG_PRIMITIVES",
+ "T83x_FRAG_PRIMITIVES_DROPPED",
+ "T83x_FRAG_CYCLES_DESC",
+ "T83x_FRAG_CYCLES_FPKQ_ACTIVE",
+ "T83x_FRAG_CYCLES_VERT",
+ "T83x_FRAG_CYCLES_TRISETUP",
+ "T83x_FRAG_CYCLES_EZS_ACTIVE",
+ "T83x_FRAG_THREADS",
+ "T83x_FRAG_DUMMY_THREADS",
+ "T83x_FRAG_QUADS_RAST",
+ "T83x_FRAG_QUADS_EZS_TEST",
+ "T83x_FRAG_QUADS_EZS_KILLED",
+ "T83x_FRAG_THREADS_LZS_TEST",
+ "T83x_FRAG_THREADS_LZS_KILLED",
+ "T83x_FRAG_CYCLES_NO_TILE",
+ "T83x_FRAG_NUM_TILES",
+ "T83x_FRAG_TRANS_ELIM",
+ "T83x_COMPUTE_ACTIVE",
+ "T83x_COMPUTE_TASKS",
+ "T83x_COMPUTE_THREADS",
+ "T83x_COMPUTE_CYCLES_DESC",
+ "T83x_TRIPIPE_ACTIVE",
+ "T83x_ARITH_WORDS",
+ "T83x_ARITH_CYCLES_REG",
+ "T83x_ARITH_CYCLES_L0",
+ "T83x_ARITH_FRAG_DEPEND",
+ "T83x_LS_WORDS",
+ "T83x_LS_ISSUES",
+ "T83x_LS_REISSUE_ATTR",
+ "T83x_LS_REISSUES_VARY",
+ "T83x_LS_VARY_RV_MISS",
+ "T83x_LS_VARY_RV_HIT",
+ "T83x_LS_NO_UNPARK",
+ "T83x_TEX_WORDS",
+ "T83x_TEX_BUBBLES",
+ "T83x_TEX_WORDS_L0",
+ "T83x_TEX_WORDS_DESC",
+ "T83x_TEX_ISSUES",
+ "T83x_TEX_RECIRC_FMISS",
+ "T83x_TEX_RECIRC_DESC",
+ "T83x_TEX_RECIRC_MULTI",
+ "T83x_TEX_RECIRC_PMISS",
+ "T83x_TEX_RECIRC_CONF",
+ "T83x_LSC_READ_HITS",
+ "T83x_LSC_READ_OP",
+ "T83x_LSC_WRITE_HITS",
+ "T83x_LSC_WRITE_OP",
+ "T83x_LSC_ATOMIC_HITS",
+ "T83x_LSC_ATOMIC_OP",
+ "T83x_LSC_LINE_FETCHES",
+ "T83x_LSC_DIRTY_LINE",
+ "T83x_LSC_SNOOPS",
+ "T83x_AXI_TLB_STALL",
+ "T83x_AXI_TLB_MIESS",
+ "T83x_AXI_TLB_TRANSACTION",
+ "T83x_LS_TLB_MISS",
+ "T83x_LS_TLB_HIT",
+ "T83x_AXI_BEATS_READ",
+ "T83x_AXI_BEATS_WRITTEN",
+
+ /*L2 and MMU */
+ "",
+ "",
+ "",
+ "",
+ "T83x_MMU_HIT",
+ "T83x_MMU_NEW_MISS",
+ "T83x_MMU_REPLAY_FULL",
+ "T83x_MMU_REPLAY_MISS",
+ "T83x_MMU_TABLE_WALK",
+ "T83x_MMU_REQUESTS",
+ "",
+ "",
+ "T83x_UTLB_HIT",
+ "T83x_UTLB_NEW_MISS",
+ "T83x_UTLB_REPLAY_FULL",
+ "T83x_UTLB_REPLAY_MISS",
+ "T83x_UTLB_STALL",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T83x_L2_EXT_WRITE_BEATS",
+ "T83x_L2_EXT_READ_BEATS",
+ "T83x_L2_ANY_LOOKUP",
+ "T83x_L2_READ_LOOKUP",
+ "T83x_L2_SREAD_LOOKUP",
+ "T83x_L2_READ_REPLAY",
+ "T83x_L2_READ_SNOOP",
+ "T83x_L2_READ_HIT",
+ "T83x_L2_CLEAN_MISS",
+ "T83x_L2_WRITE_LOOKUP",
+ "T83x_L2_SWRITE_LOOKUP",
+ "T83x_L2_WRITE_REPLAY",
+ "T83x_L2_WRITE_SNOOP",
+ "T83x_L2_WRITE_HIT",
+ "T83x_L2_EXT_READ_FULL",
+ "",
+ "T83x_L2_EXT_WRITE_FULL",
+ "T83x_L2_EXT_R_W_HAZARD",
+ "T83x_L2_EXT_READ",
+ "T83x_L2_EXT_READ_LINE",
+ "T83x_L2_EXT_WRITE",
+ "T83x_L2_EXT_WRITE_LINE",
+ "T83x_L2_EXT_WRITE_SMALL",
+ "T83x_L2_EXT_BARRIER",
+ "T83x_L2_EXT_AR_STALL",
+ "T83x_L2_EXT_R_BUF_FULL",
+ "T83x_L2_EXT_RD_BUF_FULL",
+ "T83x_L2_EXT_R_RAW",
+ "T83x_L2_EXT_W_STALL",
+ "T83x_L2_EXT_W_BUF_FULL",
+ "T83x_L2_EXT_R_BUF_FULL",
+ "T83x_L2_TAG_HAZARD",
+ "T83x_L2_SNOOP_FULL",
+ "T83x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t86x[] = {
+ /* Job Manager */
+ "",
+ "",
+ "",
+ "",
+ "T86x_MESSAGES_SENT",
+ "T86x_MESSAGES_RECEIVED",
+ "T86x_GPU_ACTIVE",
+ "T86x_IRQ_ACTIVE",
+ "T86x_JS0_JOBS",
+ "T86x_JS0_TASKS",
+ "T86x_JS0_ACTIVE",
+ "",
+ "T86x_JS0_WAIT_READ",
+ "T86x_JS0_WAIT_ISSUE",
+ "T86x_JS0_WAIT_DEPEND",
+ "T86x_JS0_WAIT_FINISH",
+ "T86x_JS1_JOBS",
+ "T86x_JS1_TASKS",
+ "T86x_JS1_ACTIVE",
+ "",
+ "T86x_JS1_WAIT_READ",
+ "T86x_JS1_WAIT_ISSUE",
+ "T86x_JS1_WAIT_DEPEND",
+ "T86x_JS1_WAIT_FINISH",
+ "T86x_JS2_JOBS",
+ "T86x_JS2_TASKS",
+ "T86x_JS2_ACTIVE",
+ "",
+ "T86x_JS2_WAIT_READ",
+ "T86x_JS2_WAIT_ISSUE",
+ "T86x_JS2_WAIT_DEPEND",
+ "T86x_JS2_WAIT_FINISH",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /*Tiler */
+ "",
+ "",
+ "",
+ "T86x_TI_JOBS_PROCESSED",
+ "T86x_TI_TRIANGLES",
+ "T86x_TI_QUADS",
+ "T86x_TI_POLYGONS",
+ "T86x_TI_POINTS",
+ "T86x_TI_LINES",
+ "T86x_TI_VCACHE_HIT",
+ "T86x_TI_VCACHE_MISS",
+ "T86x_TI_FRONT_FACING",
+ "T86x_TI_BACK_FACING",
+ "T86x_TI_PRIM_VISIBLE",
+ "T86x_TI_PRIM_CULLED",
+ "T86x_TI_PRIM_CLIPPED",
+ "T86x_TI_LEVEL0",
+ "T86x_TI_LEVEL1",
+ "T86x_TI_LEVEL2",
+ "T86x_TI_LEVEL3",
+ "T86x_TI_LEVEL4",
+ "T86x_TI_LEVEL5",
+ "T86x_TI_LEVEL6",
+ "T86x_TI_LEVEL7",
+ "T86x_TI_COMMAND_1",
+ "T86x_TI_COMMAND_2",
+ "T86x_TI_COMMAND_3",
+ "T86x_TI_COMMAND_4",
+ "T86x_TI_COMMAND_5_7",
+ "T86x_TI_COMMAND_8_15",
+ "T86x_TI_COMMAND_16_63",
+ "T86x_TI_COMMAND_64",
+ "T86x_TI_COMPRESS_IN",
+ "T86x_TI_COMPRESS_OUT",
+ "T86x_TI_COMPRESS_FLUSH",
+ "T86x_TI_TIMESTAMPS",
+ "T86x_TI_PCACHE_HIT",
+ "T86x_TI_PCACHE_MISS",
+ "T86x_TI_PCACHE_LINE",
+ "T86x_TI_PCACHE_STALL",
+ "T86x_TI_WRBUF_HIT",
+ "T86x_TI_WRBUF_MISS",
+ "T86x_TI_WRBUF_LINE",
+ "T86x_TI_WRBUF_PARTIAL",
+ "T86x_TI_WRBUF_STALL",
+ "T86x_TI_ACTIVE",
+ "T86x_TI_LOADING_DESC",
+ "T86x_TI_INDEX_WAIT",
+ "T86x_TI_INDEX_RANGE_WAIT",
+ "T86x_TI_VERTEX_WAIT",
+ "T86x_TI_PCACHE_WAIT",
+ "T86x_TI_WRBUF_WAIT",
+ "T86x_TI_BUS_READ",
+ "T86x_TI_BUS_WRITE",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T86x_TI_UTLB_HIT",
+ "T86x_TI_UTLB_NEW_MISS",
+ "T86x_TI_UTLB_REPLAY_FULL",
+ "T86x_TI_UTLB_REPLAY_MISS",
+ "T86x_TI_UTLB_STALL",
+
+ /* Shader Core */
+ "",
+ "",
+ "",
+ "",
+ "T86x_FRAG_ACTIVE",
+ "T86x_FRAG_PRIMITIVES",
+ "T86x_FRAG_PRIMITIVES_DROPPED",
+ "T86x_FRAG_CYCLES_DESC",
+ "T86x_FRAG_CYCLES_FPKQ_ACTIVE",
+ "T86x_FRAG_CYCLES_VERT",
+ "T86x_FRAG_CYCLES_TRISETUP",
+ "T86x_FRAG_CYCLES_EZS_ACTIVE",
+ "T86x_FRAG_THREADS",
+ "T86x_FRAG_DUMMY_THREADS",
+ "T86x_FRAG_QUADS_RAST",
+ "T86x_FRAG_QUADS_EZS_TEST",
+ "T86x_FRAG_QUADS_EZS_KILLED",
+ "T86x_FRAG_THREADS_LZS_TEST",
+ "T86x_FRAG_THREADS_LZS_KILLED",
+ "T86x_FRAG_CYCLES_NO_TILE",
+ "T86x_FRAG_NUM_TILES",
+ "T86x_FRAG_TRANS_ELIM",
+ "T86x_COMPUTE_ACTIVE",
+ "T86x_COMPUTE_TASKS",
+ "T86x_COMPUTE_THREADS",
+ "T86x_COMPUTE_CYCLES_DESC",
+ "T86x_TRIPIPE_ACTIVE",
+ "T86x_ARITH_WORDS",
+ "T86x_ARITH_CYCLES_REG",
+ "T86x_ARITH_CYCLES_L0",
+ "T86x_ARITH_FRAG_DEPEND",
+ "T86x_LS_WORDS",
+ "T86x_LS_ISSUES",
+ "T86x_LS_REISSUE_ATTR",
+ "T86x_LS_REISSUES_VARY",
+ "T86x_LS_VARY_RV_MISS",
+ "T86x_LS_VARY_RV_HIT",
+ "T86x_LS_NO_UNPARK",
+ "T86x_TEX_WORDS",
+ "T86x_TEX_BUBBLES",
+ "T86x_TEX_WORDS_L0",
+ "T86x_TEX_WORDS_DESC",
+ "T86x_TEX_ISSUES",
+ "T86x_TEX_RECIRC_FMISS",
+ "T86x_TEX_RECIRC_DESC",
+ "T86x_TEX_RECIRC_MULTI",
+ "T86x_TEX_RECIRC_PMISS",
+ "T86x_TEX_RECIRC_CONF",
+ "T86x_LSC_READ_HITS",
+ "T86x_LSC_READ_OP",
+ "T86x_LSC_WRITE_HITS",
+ "T86x_LSC_WRITE_OP",
+ "T86x_LSC_ATOMIC_HITS",
+ "T86x_LSC_ATOMIC_OP",
+ "T86x_LSC_LINE_FETCHES",
+ "T86x_LSC_DIRTY_LINE",
+ "T86x_LSC_SNOOPS",
+ "T86x_AXI_TLB_STALL",
+ "T86x_AXI_TLB_MIESS",
+ "T86x_AXI_TLB_TRANSACTION",
+ "T86x_LS_TLB_MISS",
+ "T86x_LS_TLB_HIT",
+ "T86x_AXI_BEATS_READ",
+ "T86x_AXI_BEATS_WRITTEN",
+
+ /*L2 and MMU */
+ "",
+ "",
+ "",
+ "",
+ "T86x_MMU_HIT",
+ "T86x_MMU_NEW_MISS",
+ "T86x_MMU_REPLAY_FULL",
+ "T86x_MMU_REPLAY_MISS",
+ "T86x_MMU_TABLE_WALK",
+ "T86x_MMU_REQUESTS",
+ "",
+ "",
+ "T86x_UTLB_HIT",
+ "T86x_UTLB_NEW_MISS",
+ "T86x_UTLB_REPLAY_FULL",
+ "T86x_UTLB_REPLAY_MISS",
+ "T86x_UTLB_STALL",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T86x_L2_EXT_WRITE_BEATS",
+ "T86x_L2_EXT_READ_BEATS",
+ "T86x_L2_ANY_LOOKUP",
+ "T86x_L2_READ_LOOKUP",
+ "T86x_L2_SREAD_LOOKUP",
+ "T86x_L2_READ_REPLAY",
+ "T86x_L2_READ_SNOOP",
+ "T86x_L2_READ_HIT",
+ "T86x_L2_CLEAN_MISS",
+ "T86x_L2_WRITE_LOOKUP",
+ "T86x_L2_SWRITE_LOOKUP",
+ "T86x_L2_WRITE_REPLAY",
+ "T86x_L2_WRITE_SNOOP",
+ "T86x_L2_WRITE_HIT",
+ "T86x_L2_EXT_READ_FULL",
+ "",
+ "T86x_L2_EXT_WRITE_FULL",
+ "T86x_L2_EXT_R_W_HAZARD",
+ "T86x_L2_EXT_READ",
+ "T86x_L2_EXT_READ_LINE",
+ "T86x_L2_EXT_WRITE",
+ "T86x_L2_EXT_WRITE_LINE",
+ "T86x_L2_EXT_WRITE_SMALL",
+ "T86x_L2_EXT_BARRIER",
+ "T86x_L2_EXT_AR_STALL",
+ "T86x_L2_EXT_R_BUF_FULL",
+ "T86x_L2_EXT_RD_BUF_FULL",
+ "T86x_L2_EXT_R_RAW",
+ "T86x_L2_EXT_W_STALL",
+ "T86x_L2_EXT_W_BUF_FULL",
+ "T86x_L2_EXT_R_BUF_FULL",
+ "T86x_L2_TAG_HAZARD",
+ "T86x_L2_SNOOP_FULL",
+ "T86x_L2_REPLAY_FULL"
+};
+
+static const char * const hardware_counters_mali_t88x[] = {
+ /* Job Manager */
+ "",
+ "",
+ "",
+ "",
+ "T88x_MESSAGES_SENT",
+ "T88x_MESSAGES_RECEIVED",
+ "T88x_GPU_ACTIVE",
+ "T88x_IRQ_ACTIVE",
+ "T88x_JS0_JOBS",
+ "T88x_JS0_TASKS",
+ "T88x_JS0_ACTIVE",
+ "",
+ "T88x_JS0_WAIT_READ",
+ "T88x_JS0_WAIT_ISSUE",
+ "T88x_JS0_WAIT_DEPEND",
+ "T88x_JS0_WAIT_FINISH",
+ "T88x_JS1_JOBS",
+ "T88x_JS1_TASKS",
+ "T88x_JS1_ACTIVE",
+ "",
+ "T88x_JS1_WAIT_READ",
+ "T88x_JS1_WAIT_ISSUE",
+ "T88x_JS1_WAIT_DEPEND",
+ "T88x_JS1_WAIT_FINISH",
+ "T88x_JS2_JOBS",
+ "T88x_JS2_TASKS",
+ "T88x_JS2_ACTIVE",
+ "",
+ "T88x_JS2_WAIT_READ",
+ "T88x_JS2_WAIT_ISSUE",
+ "T88x_JS2_WAIT_DEPEND",
+ "T88x_JS2_WAIT_FINISH",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+
+ /*Tiler */
+ "",
+ "",
+ "",
+ "T88x_TI_JOBS_PROCESSED",
+ "T88x_TI_TRIANGLES",
+ "T88x_TI_QUADS",
+ "T88x_TI_POLYGONS",
+ "T88x_TI_POINTS",
+ "T88x_TI_LINES",
+ "T88x_TI_VCACHE_HIT",
+ "T88x_TI_VCACHE_MISS",
+ "T88x_TI_FRONT_FACING",
+ "T88x_TI_BACK_FACING",
+ "T88x_TI_PRIM_VISIBLE",
+ "T88x_TI_PRIM_CULLED",
+ "T88x_TI_PRIM_CLIPPED",
+ "T88x_TI_LEVEL0",
+ "T88x_TI_LEVEL1",
+ "T88x_TI_LEVEL2",
+ "T88x_TI_LEVEL3",
+ "T88x_TI_LEVEL4",
+ "T88x_TI_LEVEL5",
+ "T88x_TI_LEVEL6",
+ "T88x_TI_LEVEL7",
+ "T88x_TI_COMMAND_1",
+ "T88x_TI_COMMAND_2",
+ "T88x_TI_COMMAND_3",
+ "T88x_TI_COMMAND_4",
+ "T88x_TI_COMMAND_5_7",
+ "T88x_TI_COMMAND_8_15",
+ "T88x_TI_COMMAND_16_63",
+ "T88x_TI_COMMAND_64",
+ "T88x_TI_COMPRESS_IN",
+ "T88x_TI_COMPRESS_OUT",
+ "T88x_TI_COMPRESS_FLUSH",
+ "T88x_TI_TIMESTAMPS",
+ "T88x_TI_PCACHE_HIT",
+ "T88x_TI_PCACHE_MISS",
+ "T88x_TI_PCACHE_LINE",
+ "T88x_TI_PCACHE_STALL",
+ "T88x_TI_WRBUF_HIT",
+ "T88x_TI_WRBUF_MISS",
+ "T88x_TI_WRBUF_LINE",
+ "T88x_TI_WRBUF_PARTIAL",
+ "T88x_TI_WRBUF_STALL",
+ "T88x_TI_ACTIVE",
+ "T88x_TI_LOADING_DESC",
+ "T88x_TI_INDEX_WAIT",
+ "T88x_TI_INDEX_RANGE_WAIT",
+ "T88x_TI_VERTEX_WAIT",
+ "T88x_TI_PCACHE_WAIT",
+ "T88x_TI_WRBUF_WAIT",
+ "T88x_TI_BUS_READ",
+ "T88x_TI_BUS_WRITE",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T88x_TI_UTLB_HIT",
+ "T88x_TI_UTLB_NEW_MISS",
+ "T88x_TI_UTLB_REPLAY_FULL",
+ "T88x_TI_UTLB_REPLAY_MISS",
+ "T88x_TI_UTLB_STALL",
+
+ /* Shader Core */
+ "",
+ "",
+ "",
+ "",
+ "T88x_FRAG_ACTIVE",
+ "T88x_FRAG_PRIMITIVES",
+ "T88x_FRAG_PRIMITIVES_DROPPED",
+ "T88x_FRAG_CYCLES_DESC",
+ "T88x_FRAG_CYCLES_FPKQ_ACTIVE",
+ "T88x_FRAG_CYCLES_VERT",
+ "T88x_FRAG_CYCLES_TRISETUP",
+ "T88x_FRAG_CYCLES_EZS_ACTIVE",
+ "T88x_FRAG_THREADS",
+ "T88x_FRAG_DUMMY_THREADS",
+ "T88x_FRAG_QUADS_RAST",
+ "T88x_FRAG_QUADS_EZS_TEST",
+ "T88x_FRAG_QUADS_EZS_KILLED",
+ "T88x_FRAG_THREADS_LZS_TEST",
+ "T88x_FRAG_THREADS_LZS_KILLED",
+ "T88x_FRAG_CYCLES_NO_TILE",
+ "T88x_FRAG_NUM_TILES",
+ "T88x_FRAG_TRANS_ELIM",
+ "T88x_COMPUTE_ACTIVE",
+ "T88x_COMPUTE_TASKS",
+ "T88x_COMPUTE_THREADS",
+ "T88x_COMPUTE_CYCLES_DESC",
+ "T88x_TRIPIPE_ACTIVE",
+ "T88x_ARITH_WORDS",
+ "T88x_ARITH_CYCLES_REG",
+ "T88x_ARITH_CYCLES_L0",
+ "T88x_ARITH_FRAG_DEPEND",
+ "T88x_LS_WORDS",
+ "T88x_LS_ISSUES",
+ "T88x_LS_REISSUE_ATTR",
+ "T88x_LS_REISSUES_VARY",
+ "T88x_LS_VARY_RV_MISS",
+ "T88x_LS_VARY_RV_HIT",
+ "T88x_LS_NO_UNPARK",
+ "T88x_TEX_WORDS",
+ "T88x_TEX_BUBBLES",
+ "T88x_TEX_WORDS_L0",
+ "T88x_TEX_WORDS_DESC",
+ "T88x_TEX_ISSUES",
+ "T88x_TEX_RECIRC_FMISS",
+ "T88x_TEX_RECIRC_DESC",
+ "T88x_TEX_RECIRC_MULTI",
+ "T88x_TEX_RECIRC_PMISS",
+ "T88x_TEX_RECIRC_CONF",
+ "T88x_LSC_READ_HITS",
+ "T88x_LSC_READ_OP",
+ "T88x_LSC_WRITE_HITS",
+ "T88x_LSC_WRITE_OP",
+ "T88x_LSC_ATOMIC_HITS",
+ "T88x_LSC_ATOMIC_OP",
+ "T88x_LSC_LINE_FETCHES",
+ "T88x_LSC_DIRTY_LINE",
+ "T88x_LSC_SNOOPS",
+ "T88x_AXI_TLB_STALL",
+ "T88x_AXI_TLB_MIESS",
+ "T88x_AXI_TLB_TRANSACTION",
+ "T88x_LS_TLB_MISS",
+ "T88x_LS_TLB_HIT",
+ "T88x_AXI_BEATS_READ",
+ "T88x_AXI_BEATS_WRITTEN",
+
+ /*L2 and MMU */
+ "",
+ "",
+ "",
+ "",
+ "T88x_MMU_HIT",
+ "T88x_MMU_NEW_MISS",
+ "T88x_MMU_REPLAY_FULL",
+ "T88x_MMU_REPLAY_MISS",
+ "T88x_MMU_TABLE_WALK",
+ "T88x_MMU_REQUESTS",
+ "",
+ "",
+ "T88x_UTLB_HIT",
+ "T88x_UTLB_NEW_MISS",
+ "T88x_UTLB_REPLAY_FULL",
+ "T88x_UTLB_REPLAY_MISS",
+ "T88x_UTLB_STALL",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "T88x_L2_EXT_WRITE_BEATS",
+ "T88x_L2_EXT_READ_BEATS",
+ "T88x_L2_ANY_LOOKUP",
+ "T88x_L2_READ_LOOKUP",
+ "T88x_L2_SREAD_LOOKUP",
+ "T88x_L2_READ_REPLAY",
+ "T88x_L2_READ_SNOOP",
+ "T88x_L2_READ_HIT",
+ "T88x_L2_CLEAN_MISS",
+ "T88x_L2_WRITE_LOOKUP",
+ "T88x_L2_SWRITE_LOOKUP",
+ "T88x_L2_WRITE_REPLAY",
+ "T88x_L2_WRITE_SNOOP",
+ "T88x_L2_WRITE_HIT",
+ "T88x_L2_EXT_READ_FULL",
+ "",
+ "T88x_L2_EXT_WRITE_FULL",
+ "T88x_L2_EXT_R_W_HAZARD",
+ "T88x_L2_EXT_READ",
+ "T88x_L2_EXT_READ_LINE",
+ "T88x_L2_EXT_WRITE",
+ "T88x_L2_EXT_WRITE_LINE",
+ "T88x_L2_EXT_WRITE_SMALL",
+ "T88x_L2_EXT_BARRIER",
+ "T88x_L2_EXT_AR_STALL",
+ "T88x_L2_EXT_R_BUF_FULL",
+ "T88x_L2_EXT_RD_BUF_FULL",
+ "T88x_L2_EXT_R_RAW",
+ "T88x_L2_EXT_W_STALL",
+ "T88x_L2_EXT_W_BUF_FULL",
+ "T88x_L2_EXT_R_BUF_FULL",
+ "T88x_L2_TAG_HAZARD",
+ "T88x_L2_SNOOP_FULL",
+ "T88x_L2_REPLAY_FULL"
+};
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
new file mode 100644
index 00000000000..ca264049653
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c
@@ -0,0 +1,98 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_gpu_memory_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+/** Show callback for the @c gpu_memory debugfs file.
+ *
+ * This function is called to get the contents of the @c gpu_memory debugfs
+ * file. This is a report of current gpu memory usage.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ * -1 if it encountered an error
+ */
+
+static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
+{
+ ssize_t ret = 0;
+ struct list_head *entry;
+ const struct list_head *kbdev_list;
+
+ kbdev_list = kbase_dev_list_get();
+ list_for_each(entry, kbdev_list) {
+ struct kbase_device *kbdev = NULL;
+ struct kbasep_kctx_list_element *element;
+
+ kbdev = list_entry(entry, struct kbase_device, entry);
+ /* output the total memory usage and cap for this device */
+ ret = seq_printf(sfile, "%-16s %10u\n",
+ kbdev->devname,
+ atomic_read(&(kbdev->memdev.used_pages)));
+ mutex_lock(&kbdev->kctx_list_lock);
+ list_for_each_entry(element, &kbdev->kctx_list, link) {
+ /* output the memory usage and cap for each kctx
+ * opened on this device */
+ ret = seq_printf(sfile, " %s-0x%p %10u\n",
+ "kctx",
+ element->kctx,
+ atomic_read(&(element->kctx->used_pages)));
+ }
+ mutex_unlock(&kbdev->kctx_list_lock);
+ }
+ kbase_dev_list_put(kbdev_list);
+ return ret;
+}
+
+/*
+ * File operations related to debugfs entry for gpu_memory
+ */
+static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file)
+{
+ return single_open(file, kbasep_gpu_memory_seq_show , NULL);
+}
+
+static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
+ .open = kbasep_gpu_memory_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+/*
+ * Initialize debugfs entry for gpu_memory
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+ debugfs_create_file("gpu_memory", S_IRUGO,
+ kbdev->mali_debugfs_directory, NULL,
+ &kbasep_gpu_memory_debugfs_fops);
+ return;
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
+{
+ return;
+}
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
new file mode 100644
index 00000000000..3cf30a4e767
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpu_memory_debugfs.h
+ * Header file for gpu_memory entry in debugfs
+ *
+ */
+
+#ifndef _KBASE_GPU_MEMORY_H
+#define _KBASE_GPU_MEMORY_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Initialize gpu_memory debugfs entry
+ */
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev);
+
+#endif /*_KBASE_GPU_MEMORY_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
new file mode 100644
index 00000000000..4276665fbc9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c
@@ -0,0 +1,296 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel property query APIs
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_gpuprops.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_gpuprops.h>
+#include <linux/clk.h>
+
+/**
+ * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
+ * @value: The value from which to extract bits.
+ * @offset: The first bit to extract (0 being the LSB).
+ * @size: The number of bits to extract.
+ *
+ * Context: @offset + @size <= 32.
+ *
+ * Return: Bits [@offset, @offset + @size) from @value.
+ */
+/* from mali_cdsb.h */
+#define KBASE_UBFX32(value, offset, size) \
+ (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
+
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
+{
+ kbase_gpu_clk_speed_func get_gpu_speed_mhz;
+ u32 gpu_speed_mhz;
+ int rc = 1;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(NULL != kbase_props);
+
+ /* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
+ * If that function fails, or the function is not provided by the system integrator, we report the maximum
+ * GPU speed as specified by GPU_FREQ_KHZ_MAX.
+ */
+ get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
+ if (get_gpu_speed_mhz != NULL) {
+ rc = get_gpu_speed_mhz(&gpu_speed_mhz);
+#ifdef CONFIG_MALI_DEBUG
+ /* Issue a warning message when the reported GPU speed falls outside the min/max range */
+ if (rc == 0) {
+ u32 gpu_speed_khz = gpu_speed_mhz * 1000;
+
+ if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
+ gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
+ dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
+ (unsigned long)gpu_speed_khz,
+ (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
+ (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+ }
+#endif /* CONFIG_MALI_DEBUG */
+ }
+ if (kctx->kbdev->clock) {
+ gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
+ rc = 0;
+ }
+ if (rc != 0)
+ gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
+
+ kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
+
+ memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
+
+ /* Before API 8.2 they expect L3 cache info here, which was always 0 */
+ if (kctx->api_version < KBASE_API_VERSION(8, 2))
+ kbase_props->props.raw_props.suspend_size = 0;
+
+
+ return 0;
+}
+
+static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
+{
+ struct mali_base_gpu_coherent_group *current_group;
+ u64 group_present;
+ u64 group_mask;
+ u64 first_set, first_set_prev;
+ u32 num_groups = 0;
+
+ KBASE_DEBUG_ASSERT(NULL != props);
+
+ props->coherency_info.coherency = props->raw_props.mem_features;
+ props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
+
+ if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+ /* Group is l2 coherent */
+ group_present = props->raw_props.l2_present;
+ } else {
+ /* Group is l1 coherent */
+ group_present = props->raw_props.shader_present;
+ }
+
+ /*
+ * The coherent group mask can be computed from the l2 present
+ * register.
+ *
+ * For the coherent group n:
+ * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
+ * where first_set is group_present with only its nth set-bit kept
+ * (i.e. the position from where a new group starts).
+ *
+ * For instance if the groups are l2 coherent and l2_present=0x0..01111:
+ * The first mask is:
+ * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
+ * = (0x0..010 - 1) & ~(0x0..01 - 1)
+ * = 0x0..00f
+ * The second mask is:
+ * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
+ * = (0x0..100 - 1) & ~(0x0..010 - 1)
+ * = 0x0..0f0
+ * And so on until all the bits from group_present have been cleared
+ * (i.e. there is no group left).
+ */
+
+ current_group = props->coherency_info.group;
+ first_set = group_present & ~(group_present - 1);
+
+ while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
+ group_present -= first_set; /* Clear the current group bit */
+ first_set_prev = first_set;
+
+ first_set = group_present & ~(group_present - 1);
+ group_mask = (first_set - 1) & ~(first_set_prev - 1);
+
+ /* Populate the coherent_group structure for each group */
+ current_group->core_mask = group_mask & props->raw_props.shader_present;
+ current_group->num_cores = hweight64(current_group->core_mask);
+
+ num_groups++;
+ current_group++;
+ }
+
+ if (group_present != 0)
+ pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
+
+ props->coherency_info.num_groups = num_groups;
+}
+
+/**
+ * kbase_gpuprops_get_props - Get the GPU configuration
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values from the GPU configuration
+ * registers. Only the raw properties are filled in this function
+ */
+static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+ struct kbase_gpuprops_regdump regdump;
+ int i;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ KBASE_DEBUG_ASSERT(NULL != gpu_props);
+
+ /* Dump relevant registers */
+ kbase_backend_gpuprops_get(kbdev, &regdump);
+
+ gpu_props->raw_props.gpu_id = regdump.gpu_id;
+ gpu_props->raw_props.tiler_features = regdump.tiler_features;
+ gpu_props->raw_props.mem_features = regdump.mem_features;
+ gpu_props->raw_props.mmu_features = regdump.mmu_features;
+ gpu_props->raw_props.l2_features = regdump.l2_features;
+ gpu_props->raw_props.suspend_size = regdump.suspend_size;
+
+ gpu_props->raw_props.as_present = regdump.as_present;
+ gpu_props->raw_props.js_present = regdump.js_present;
+ gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo;
+ gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo;
+ gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo;
+
+ for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
+ gpu_props->raw_props.js_features[i] = regdump.js_features[i];
+
+ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+ gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
+
+ gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
+ gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
+ gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
+ gpu_props->raw_props.thread_features = regdump.thread_features;
+
+}
+
+/**
+ * kbase_gpuprops_calculate_props - Calculate the derived properties
+ * @gpu_props: The &base_gpu_props structure
+ * @kbdev: The &struct kbase_device structure for the device
+ *
+ * Fill the &base_gpu_props structure with values derived from the GPU
+ * configuration registers
+ */
+static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
+{
+ int i;
+
+ /* Populate the base_gpu_props structure */
+ gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
+ gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
+ gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
+ gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
+ gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
+ gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
+
+ for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
+ gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
+
+ gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
+ gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
+ gpu_props->l2_props.num_l2_slices = 1;
+ if (gpu_props->core_props.product_id == GPU_ID_PI_T76X)
+ gpu_props->l2_props.num_l2_slices = KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
+
+ gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
+ gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
+
+ if (gpu_props->raw_props.thread_max_threads == 0)
+ gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
+ else
+ gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
+
+ if (gpu_props->raw_props.thread_max_workgroup_size == 0)
+ gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
+ else
+ gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
+
+ if (gpu_props->raw_props.thread_max_barrier_size == 0)
+ gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
+ else
+ gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
+
+ gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
+ gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
+ gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
+ gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
+
+ /* If values are not specified, then use defaults */
+ if (gpu_props->thread_props.max_registers == 0) {
+ gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
+ gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
+ gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
+ }
+ /* Initialize the coherent_group structure for each group */
+ kbase_gpuprops_construct_coherent_groups(gpu_props);
+}
+
+void kbase_gpuprops_set(struct kbase_device *kbdev)
+{
+ struct kbase_gpu_props *gpu_props;
+ struct gpu_raw_gpu_props *raw;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ gpu_props = &kbdev->gpu_props;
+ raw = &gpu_props->props.raw_props;
+
+ /* Initialize the base_gpu_props structure from the hardware */
+ kbase_gpuprops_get_props(&gpu_props->props, kbdev);
+
+ /* Populate the derived properties */
+ kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
+
+ /* Populate kbase-only fields */
+ gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
+ gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
+
+ gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
+
+ gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
+ gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
+
+ gpu_props->num_cores = hweight64(raw->shader_present);
+ gpu_props->num_core_groups = hweight64(raw->l2_present);
+ gpu_props->num_address_spaces = hweight32(raw->as_present);
+ gpu_props->num_job_slots = hweight32(raw->js_present);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
new file mode 100644
index 00000000000..af97d97bf94
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_H_
+#define _KBASE_GPUPROPS_H_
+
+#include "mali_kbase_gpuprops_types.h"
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/**
+ * @brief Set up Kbase GPU properties.
+ *
+ * Set up Kbase GPU properties with information from the GPU registers
+ *
+ * @param kbdev The struct kbase_device structure for the device
+ */
+void kbase_gpuprops_set(struct kbase_device *kbdev);
+
+/**
+ * @brief Provide GPU properties to userside through UKU call.
+ *
+ * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers.
+ *
+ * @param kctx The struct kbase_context structure
+ * @param kbase_props A copy of the struct kbase_uk_gpuprops structure from userspace
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props);
+
+#endif /* _KBASE_GPUPROPS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
new file mode 100644
index 00000000000..e37e43c2fb0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h
@@ -0,0 +1,91 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_gpuprops_types.h
+ * Base kernel property query APIs
+ */
+
+#ifndef _KBASE_GPUPROPS_TYPES_H_
+#define _KBASE_GPUPROPS_TYPES_H_
+
+#include "mali_base_kernel.h"
+
+#define KBASE_GPU_SPEED_MHZ 123
+#define KBASE_GPU_PC_SIZE_LOG2 24U
+
+struct kbase_gpuprops_regdump {
+ u32 gpu_id;
+ u32 l2_features;
+ u32 suspend_size; /* API 8.2+ */
+ u32 tiler_features;
+ u32 mem_features;
+ u32 mmu_features;
+ u32 as_present;
+ u32 js_present;
+ u32 thread_max_threads;
+ u32 thread_max_workgroup_size;
+ u32 thread_max_barrier_size;
+ u32 thread_features;
+ u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS];
+ u32 js_features[GPU_MAX_JOB_SLOTS];
+ u32 shader_present_lo;
+ u32 shader_present_hi;
+ u32 tiler_present_lo;
+ u32 tiler_present_hi;
+ u32 l2_present_lo;
+ u32 l2_present_hi;
+};
+
+struct kbase_gpu_cache_props {
+ u8 associativity;
+ u8 external_bus_width;
+};
+
+struct kbase_gpu_mem_props {
+ u8 core_group;
+};
+
+struct kbase_gpu_mmu_props {
+ u8 va_bits;
+ u8 pa_bits;
+};
+
+struct kbase_gpu_props {
+ /* kernel-only properties */
+ u8 num_cores;
+ u8 num_core_groups;
+ u8 num_address_spaces;
+ u8 num_job_slots;
+
+ struct kbase_gpu_cache_props l2_props;
+
+ struct kbase_gpu_mem_props mem;
+ struct kbase_gpu_mmu_props mmu;
+
+ /**
+ * Implementation specific irq throttle value (us), should be adjusted during integration.
+ */
+ int irq_throttle_time_us;
+
+ /* Properties shared with userspace */
+ base_gpu_props props;
+};
+
+#endif /* _KBASE_GPUPROPS_TYPES_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c
new file mode 100644
index 00000000000..1a5a21b3812
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Run-time work-arounds helpers
+ */
+
+#include <mali_base_hwconfig_features.h>
+#include <mali_base_hwconfig_issues.h>
+#include <mali_midg_regmap.h>
+#include "mali_kbase.h"
+#include "mali_kbase_hw.h"
+
+void kbase_hw_set_features_mask(struct kbase_device *kbdev)
+{
+ const enum base_hw_feature *features;
+ u32 gpu_id;
+
+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+ gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
+ gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+ switch (gpu_id) {
+ case GPU_ID_PI_TFRX:
+ /* FALLTHROUGH */
+ case GPU_ID_PI_T86X:
+ features = base_hw_features_tFxx;
+ break;
+ case GPU_ID_PI_T83X:
+ features = base_hw_features_t83x;
+ break;
+ case GPU_ID_PI_T82X:
+ features = base_hw_features_t82x;
+ break;
+ case GPU_ID_PI_T76X:
+ features = base_hw_features_t76x;
+ break;
+ case GPU_ID_PI_T72X:
+ features = base_hw_features_t72x;
+ break;
+ case GPU_ID_PI_T62X:
+ features = base_hw_features_t62x;
+ break;
+ case GPU_ID_PI_T60X:
+ features = base_hw_features_t60x;
+ break;
+ default:
+ features = base_hw_features_generic;
+ break;
+ }
+
+ for (; *features != BASE_HW_FEATURE_END; features++)
+ set_bit(*features, &kbdev->hw_features_mask[0]);
+}
+
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
+{
+ const enum base_hw_issue *issues;
+ u32 gpu_id;
+ u32 impl_tech;
+
+ gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
+ impl_tech = kbdev->gpu_props.props.thread_props.impl_tech;
+
+ if (impl_tech != IMPLEMENTATION_MODEL) {
+ switch (gpu_id) {
+ case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0):
+ issues = base_hw_issues_t60x_r0p0_15dev0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC):
+ issues = base_hw_issues_t60x_r0p0_eac;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0):
+ issues = base_hw_issues_t60x_r0p1;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0):
+ issues = base_hw_issues_t62x_r0p1;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0):
+ case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1):
+ issues = base_hw_issues_t62x_r1p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0):
+ issues = base_hw_issues_t62x_r1p1;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1):
+ issues = base_hw_issues_t76x_r0p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1):
+ issues = base_hw_issues_t76x_r0p1;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9):
+ issues = base_hw_issues_t76x_r0p1_50rel0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1):
+ issues = base_hw_issues_t76x_r0p2;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1):
+ issues = base_hw_issues_t76x_r0p3;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0):
+ issues = base_hw_issues_t76x_r1p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0):
+ case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1):
+ case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2):
+ issues = base_hw_issues_t72x_r0p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0):
+ issues = base_hw_issues_t72x_r1p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0):
+ issues = base_hw_issues_t72x_r1p1;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2):
+ issues = base_hw_issues_tFRx_r0p1;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0):
+ issues = base_hw_issues_tFRx_r0p2;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0):
+ issues = base_hw_issues_tFRx_r1p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0):
+ issues = base_hw_issues_tFRx_r2p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0):
+ issues = base_hw_issues_t86x_r0p2;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0):
+ issues = base_hw_issues_t86x_r1p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0):
+ issues = base_hw_issues_t86x_r2p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0):
+ issues = base_hw_issues_t83x_r0p1;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0):
+ issues = base_hw_issues_t83x_r1p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0):
+ issues = base_hw_issues_t82x_r0p0;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0):
+ issues = base_hw_issues_t82x_r0p1;
+ break;
+ case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0):
+ issues = base_hw_issues_t82x_r1p0;
+ break;
+ default:
+ dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id);
+ return -EINVAL;
+ }
+ } else {
+ /* Software model */
+ switch (gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT) {
+ case GPU_ID_PI_T60X:
+ issues = base_hw_issues_model_t60x;
+ break;
+ case GPU_ID_PI_T62X:
+ issues = base_hw_issues_model_t62x;
+ break;
+ case GPU_ID_PI_T72X:
+ issues = base_hw_issues_model_t72x;
+ break;
+ case GPU_ID_PI_T76X:
+ issues = base_hw_issues_model_t76x;
+ break;
+ case GPU_ID_PI_TFRX:
+ issues = base_hw_issues_model_tFRx;
+ break;
+ case GPU_ID_PI_T86X:
+ issues = base_hw_issues_model_t86x;
+ break;
+ case GPU_ID_PI_T83X:
+ issues = base_hw_issues_model_t83x;
+ break;
+ case GPU_ID_PI_T82X:
+ issues = base_hw_issues_model_t82x;
+ break;
+ default:
+ dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id);
+ return -EINVAL;
+ }
+ }
+
+ dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT);
+
+ for (; *issues != BASE_HW_ISSUE_END; issues++)
+ set_bit(*issues, &kbdev->hw_issues_mask[0]);
+
+ return 0;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.h b/drivers/gpu/arm/midgard/mali_kbase_hw.h
new file mode 100644
index 00000000000..fce7d29558e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hw.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file
+ * Run-time work-arounds helpers
+ */
+
+#ifndef _KBASE_HW_H_
+#define _KBASE_HW_H_
+
+#include "mali_kbase_defs.h"
+
+/**
+ * @brief Tell whether a work-around should be enabled
+ */
+#define kbase_hw_has_issue(kbdev, issue)\
+ test_bit(issue, &(kbdev)->hw_issues_mask[0])
+
+/**
+ * @brief Tell whether a feature is supported
+ */
+#define kbase_hw_has_feature(kbdev, feature)\
+ test_bit(feature, &(kbdev)->hw_features_mask[0])
+
+/**
+ * @brief Set the HW issues mask depending on the GPU ID
+ */
+int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
+
+/**
+ * @brief Set the features mask depending on the GPU ID
+ */
+void kbase_hw_set_features_mask(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
new file mode 100644
index 00000000000..b09be99e6b4
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h
@@ -0,0 +1,54 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access backend common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_BACKEND_H_
+#define _KBASE_HWACCESS_BACKEND_H_
+
+/**
+ * kbase_backend_early_init - Perform any backend-specific initialization.
+ * @kbdev: Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_early_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_init - Perform any backend-specific initialization.
+ * @kbdev: Device pointer
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+int kbase_backend_late_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_early_term - Perform any backend-specific termination.
+ * @kbdev: Device pointer
+ */
+void kbase_backend_early_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_late_term - Perform any backend-specific termination.
+ * @kbdev: Device pointer
+ */
+void kbase_backend_late_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_BACKEND_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
new file mode 100644
index 00000000000..261453e8f1a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_gpu_defs.h
+ * HW access common definitions
+ */
+
+#ifndef _KBASE_HWACCESS_DEFS_H_
+#define _KBASE_HWACCESS_DEFS_H_
+
+#include <mali_kbase_jm_defs.h>
+
+/* The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when
+ * accessing this structure */
+struct kbase_hwaccess_data {
+ struct kbase_context *active_kctx;
+
+ struct kbase_backend_data backend;
+};
+
+#endif /* _KBASE_HWACCESS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
new file mode 100644
index 00000000000..f93ca9d8680
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h
@@ -0,0 +1,35 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * Base kernel property query backend APIs
+ */
+
+#ifndef _KBASE_HWACCESS_GPUPROPS_H_
+#define _KBASE_HWACCESS_GPUPROPS_H_
+
+/**
+ * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from
+ * GPU
+ * @kbdev: Device pointer
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure
+ */
+void kbase_backend_gpuprops_get(struct kbase_device *kbdev,
+ struct kbase_gpuprops_regdump *regdump);
+
+#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
new file mode 100644
index 00000000000..5de2b7535bb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h
@@ -0,0 +1,116 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * HW Access instrumentation common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_INSTR_H_
+#define _KBASE_HWACCESS_INSTR_H_
+
+#include <mali_kbase_instr_defs.h>
+
+/**
+ * kbase_instr_hwcnt_enable_internal - Enable HW counters collection
+ * @kbdev: Kbase device
+ * @kctx: Kbase context
+ * @setup: HW counter setup parameters
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable_internal - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for an ongoing dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU
+ * @kctx: Kbase context
+ *
+ * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true,
+ * of call kbase_instr_hwcnt_wait_for_dump().
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has
+ * completed.
+ * @kctx: Kbase context
+ *
+ * Context: will sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has
+ * completed
+ * @kctx: Kbase context
+ * @success: Set to true if successful
+ *
+ * Context: does not sleep.
+ *
+ * Return: true if the dump is complete
+ */
+bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
+ bool * const success);
+
+/**
+ * kbase_instr_hwcnt_clear() - Clear HW counters
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for reset to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_clear(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_backend_init() - Initialise the instrumentation backend
+ * @kbdev: Kbase device
+ *
+ * This function should be called during driver initialization.
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_backend_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_backend_init() - Terminate the instrumentation backend
+ * @kbdev: Kbase device
+ *
+ * This function should be called during driver termination.
+ */
+void kbase_instr_backend_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_HWACCESS_INSTR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
new file mode 100644
index 00000000000..a2fdf247881
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h
@@ -0,0 +1,283 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_JM_H_
+#define _KBASE_HWACCESS_JM_H_
+
+/**
+ * kbase_backend_run_atom() - Run an atom on the GPU
+ * @kbdev: Device pointer
+ * @atom: Atom to run
+ *
+ * Caller must hold the HW access lock
+ */
+void kbase_backend_run_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_find_free_address_space() - Find a free address space.
+ * @kbdev: Device pointer
+ * @kctx: Context pointer
+ *
+ * If no address spaces are currently free, then this function can evict an
+ * idle context from the runpool, freeing up the address space it was using.
+ *
+ * The address space is marked as in use. The caller must either assign a
+ * context using kbase_gpu_use_ctx(), or release it using
+ * kbase_gpu_release_free_address_space()
+ *
+ * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none
+ * available
+ */
+int kbase_backend_find_free_address_space(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_free_address_space() - Release an address space.
+ * @kbdev: Device pointer
+ * @as_nr: Address space to release
+ *
+ * The address space must have been returned by
+ * kbase_gpu_find_free_address_space().
+ */
+void kbase_backend_release_free_address_space(struct kbase_device *kbdev,
+ int as_nr);
+
+/**
+ * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the
+ * provided address space.
+ * @kbdev: Device pointer
+ * @kctx: Context pointer. May be NULL
+ * @as_nr: Free address space to use
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * Return: true if successful, false if ASID not assigned. If kctx->as_pending
+ * is true then ASID assignment will complete at some point in the
+ * future and will re-start scheduling, otherwise no ASIDs are available
+ */
+bool kbase_backend_use_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ int as_nr);
+
+/**
+ * kbase_backend_use_ctx_sched() - Activate a context.
+ * @kbdev: Device pointer
+ * @kctx: Context pointer
+ *
+ * kbase_gpu_next_job() will pull atoms from the active context.
+ *
+ * The context must already be scheduled and assigned to an address space. If
+ * the context is not scheduled, then kbase_gpu_use_ctx() should be used
+ * instead.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if context is now active, false otherwise (ie if context does
+ * not have an address space assigned)
+ */
+bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_irq - Release a context from the GPU. This will
+ * de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx: Context pointer
+ *
+ * Caller must hold as->transaction_mutex and runpool_irq.lock
+ */
+void kbase_backend_release_ctx_irq(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
+
+/**
+ * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will
+ * de-assign the assigned address space.
+ * @kbdev: Device pointer
+ * @kctx: Context pointer
+ *
+ * Caller must hold as->transaction_mutex
+ *
+ * This function must perform any operations that could not be performed in IRQ
+ * context by kbase_backend_release_ctx_irq().
+ */
+void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
+
+/**
+ * kbase_backend_complete_wq() - Perform backend-specific actions required on
+ * completing an atom.
+ * @kbdev: Device pointer
+ * @katom: Pointer to the atom to complete
+ *
+ * This function should only be called from jd_done_worker().
+ *
+ * Return: true if atom has completed, false if atom should be re-submitted
+ */
+void kbase_backend_complete_wq(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom);
+
+/**
+ * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU
+ * and remove any others from the ringbuffers.
+ * @kbdev: Device pointer
+ * @end_timestamp: Timestamp of reset
+ */
+void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
+
+/**
+ * kbase_backend_inspect_head() - Return the atom currently at the head of slot
+ * @js
+ * @kbdev: Device pointer
+ * @js: Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev,
+ int js);
+
+/**
+ * kbase_backend_inspect_tail - Return the atom currently at the tail of slot
+ * @js
+ * @kbdev: Device pointer
+ * @js: Job slot to inspect
+ *
+ * Return : Atom currently at the head of slot @js, or NULL
+ */
+struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
+ int js);
+
+/**
+ * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a
+ * slot.
+ * @kbdev: Device pointer
+ * @js: Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot
+ */
+int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot
+ * that are currently on the GPU.
+ * @kbdev: Device pointer
+ * @js: Job slot to inspect
+ *
+ * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ */
+int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs
+ * has changed.
+ * @kbdev: Device pointer
+ *
+ * Perform any required backend-specific actions (eg starting/stopping
+ * scheduling timers).
+ */
+void kbase_backend_ctx_count_changed(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_slot_free() - Return the number of jobs that can be currently
+ * submitted to slot @js.
+ * @kbdev: Device pointer
+ * @js: Job slot to inspect
+ *
+ * Return : Number of jobs that can be submitted.
+ */
+int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
+
+/**
+ * kbase_job_check_enter_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
+void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
+ struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently
+ * running from a context
+ * @kctx: Context pointer
+ *
+ * This is used in response to a page fault to remove all jobs from the faulting
+ * context from the hardware.
+ */
+void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx);
+
+/**
+ * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and
+ * to be descheduled.
+ * @kctx: Context pointer
+ *
+ * This should be called following kbase_js_zap_context(), to ensure the context
+ * can be safely destroyed.
+ */
+void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx);
+
+#if KBASE_GPU_RESET_EN
+/**
+ * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
+ * @kbdev: Device pointer
+ *
+ * This function just soft-stops all the slots to ensure that as many jobs as
+ * possible are saved.
+ *
+ * Return: a boolean which should be interpreted as follows:
+ * - true - Prepared for reset, kbase_reset_gpu should be called.
+ * - false - Another thread is performing a reset, kbase_reset_gpu should
+ * not be called.
+ */
+bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev);
+
+/**
+ * kbase_reset_gpu - Reset the GPU
+ * @kbdev: Device pointer
+ *
+ * This function should be called after kbase_prepare_to_reset_gpu if it returns
+ * true. It should never be called without a corresponding call to
+ * kbase_prepare_to_reset_gpu.
+ *
+ * After this function is called (or not called if kbase_prepare_to_reset_gpu
+ * returned false), the caller should wait for kbdev->reset_waitq to be
+ * signalled to know when the reset has completed.
+ */
+void kbase_reset_gpu(struct kbase_device *kbdev);
+#endif
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx: The kbase context that contains the job(s) that should
+ * be hard-stopped
+ * @js: The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ * jobs from the context)
+ * Context:
+ * The job slot lock must be held when calling this function.
+ */
+void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
+ struct kbase_jd_atom *target_katom);
+
+#endif /* _KBASE_HWACCESS_JM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
new file mode 100644
index 00000000000..dbdcd3def22
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h
@@ -0,0 +1,206 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ * @file mali_kbase_hwaccess_pm.h
+ * HW access power manager common APIs
+ */
+
+#ifndef _KBASE_HWACCESS_PM_H_
+#define _KBASE_HWACCESS_PM_H_
+
+#include <mali_midg_regmap.h>
+#include <linux/atomic.h>
+
+#include <mali_kbase_pm_defs.h>
+
+/* Forward definition - see mali_kbase.h */
+struct kbase_device;
+
+/* Functions common to all HW access backends */
+
+/**
+ * Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ *
+ * @return 0 if the power management framework was successfully
+ * initialized.
+ */
+int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
+
+/**
+ * Terminate the power management framework.
+ *
+ * No power management functions may be called after this (except
+ * @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_hwaccess_pm_powerup - Power up the GPU.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @flags: Flags to pass on to kbase_pm_init_hw
+ *
+ * Power up GPU after all modules have been initialized and interrupt handlers
+ * installed.
+ *
+ * Return: 0 if powerup was successful.
+ */
+int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
+ unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ *
+ * Should ensure that no new interrupts are generated, but allow any currently
+ * running interrupt handlers to complete successfully. The GPU is forced off by
+ * the time this function returns, regardless of whether or not the active power
+ * policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to suspend the GPU
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Perform any backend-specific actions to resume the GPU from a suspend
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for activating the GPU. Called when the first
+ * context goes active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * Perform any required actions for idling the GPU. Called when the last
+ * context goes idle.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ */
+void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
+
+
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev The kbase device structure for the device (must be a
+ * valid pointer)
+ * @param new_core_mask The core mask to use
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+ u64 new_core_mask);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_ca_policy
+*kbase_pm_ca_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ * @ref kbase_pm_ca_list_policies)
+ */
+void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
+ const struct kbase_pm_ca_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ * be NULL. The contents of this array must not be
+ * modified.
+ *
+ * @return The number of policies
+ */
+int
+kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
+
+
+/**
+ * Get the current policy.
+ *
+ * Returns the policy that is currently active.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ *
+ * @return The current policy
+ */
+const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
+
+/**
+ * Change the policy to the one specified.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid
+ * pointer)
+ * @param policy The policy to change to (valid pointer returned from
+ * @ref kbase_pm_list_policies)
+ */
+void kbase_pm_set_policy(struct kbase_device *kbdev,
+ const struct kbase_pm_policy *policy);
+
+/**
+ * Retrieve a static list of the available policies.
+ *
+ * @param[out] policies An array pointer to take the list of policies. This may
+ * be NULL. The contents of this array must not be
+ * modified.
+ *
+ * @return The number of policies
+ */
+int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies);
+
+#endif /* _KBASE_HWACCESS_PM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
new file mode 100644
index 00000000000..89d26eaf09a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/**
+ *
+ */
+
+#ifndef _KBASE_BACKEND_TIME_H_
+#define _KBASE_BACKEND_TIME_H_
+
+/**
+ * kbase_backend_get_gpu_time() - Get current GPU time
+ * @kbdev: Device pointer
+ * @cycle_counter: Pointer to u64 to store cycle counter in
+ * @system_time: Pointer to u64 to store system time in
+ * @ts: Pointer to struct timespec to store current monotonic
+ * time in
+ */
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+ u64 *system_time, struct timespec *ts);
+
+/**
+ * kbase_wait_write_flush() - Wait for GPU write flush
+ * @kctx: Context pointer
+ *
+ * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush
+ * its write buffer.
+ *
+ * If GPU resets occur then the counters are reset to zero, the delay may not be
+ * as expected.
+ *
+ * This function is only in use for BASE_HW_ISSUE_6367
+ */
+#ifndef CONFIG_MALI_NO_MALI
+void kbase_wait_write_flush(struct kbase_context *kctx);
+#endif
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.c b/drivers/gpu/arm/midgard/mali_kbase_instr.c
new file mode 100644
index 00000000000..9c2043ac948
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_instr.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Base kernel instrumentation APIs.
+ */
+
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+
+void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev)
+{
+ struct kbase_context *kctx;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+ KBASE_DEBUG_ASSERT(!kbdev->hwcnt.suspended_kctx);
+
+ kctx = kbdev->hwcnt.kctx;
+ kbdev->hwcnt.suspended_kctx = kctx;
+
+ /* Relevant state was saved into hwcnt.suspended_state when enabling the
+ * counters */
+
+ if (kctx) {
+ KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.flags &
+ KBASE_CTX_FLAG_PRIVILEGED);
+ kbase_instr_hwcnt_disable(kctx);
+ }
+}
+
+void kbase_instr_hwcnt_resume(struct kbase_device *kbdev)
+{
+ struct kbase_context *kctx;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ kctx = kbdev->hwcnt.suspended_kctx;
+ kbdev->hwcnt.suspended_kctx = NULL;
+
+ if (kctx) {
+ int err;
+
+ err = kbase_instr_hwcnt_enable_internal(kbdev, kctx,
+ &kbdev->hwcnt.suspended_state);
+ WARN(err, "Failed to restore instrumented hardware counters on resume\n");
+ }
+}
+
+int kbase_instr_hwcnt_enable(struct kbase_context *kctx,
+ struct kbase_uk_hwcnt_setup *setup)
+{
+ struct kbase_device *kbdev;
+ bool access_allowed;
+ int err;
+
+ kbdev = kctx->kbdev;
+
+ /* Determine if the calling task has access to this capability */
+ access_allowed = kbase_security_has_capability(kctx,
+ KBASE_SEC_INSTR_HW_COUNTERS_COLLECT,
+ KBASE_SEC_FLAG_NOAUDIT);
+ if (!access_allowed)
+ return -EINVAL;
+
+ /* Mark the context as active so the GPU is kept turned on */
+ /* A suspend won't happen here, because we're in a syscall from a
+ * userspace thread. */
+ kbase_pm_context_active(kbdev);
+
+ /* Schedule the context in */
+ kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+ err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, setup);
+ if (err) {
+ /* Release the context. This had its own Power Manager Active
+ * reference */
+ kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+ /* Also release our Power Manager Active reference */
+ kbase_pm_context_idle(kbdev);
+ }
+
+ return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_enable);
+
+int kbase_instr_hwcnt_disable(struct kbase_context *kctx)
+{
+ int err = -EINVAL;
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ err = kbase_instr_hwcnt_disable_internal(kctx);
+ if (err)
+ goto out;
+
+ /* Release the context. This had its own Power Manager Active reference
+ */
+ kbasep_js_release_privileged_ctx(kbdev, kctx);
+
+ /* Also release our Power Manager Active reference */
+ kbase_pm_context_idle(kbdev);
+
+ dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p",
+ kctx);
+out:
+ return err;
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_disable);
+
+int kbase_instr_hwcnt_setup(struct kbase_context *kctx,
+ struct kbase_uk_hwcnt_setup *setup)
+{
+ struct kbase_vinstr_context *vinstr_ctx = kctx->kbdev->vinstr_ctx;
+ u32 bitmap[4];
+
+ if (setup == NULL)
+ return -EINVAL;
+
+ bitmap[SHADER_HWCNT_BM] = setup->shader_bm;
+ bitmap[TILER_HWCNT_BM] = setup->tiler_bm;
+ bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm;
+ bitmap[JM_HWCNT_BM] = setup->jm_bm;
+ if (setup->dump_buffer) {
+ if (kctx->vinstr_cli)
+ return -EBUSY;
+ kctx->vinstr_cli = kbase_vinstr_attach_client(vinstr_ctx, false,
+ setup->dump_buffer, bitmap);
+ if (!kctx->vinstr_cli)
+ return -ENOMEM;
+ } else {
+ kbase_vinstr_detach_client(vinstr_ctx, kctx->vinstr_cli);
+ kctx->vinstr_cli = NULL;
+ }
+
+ return 0;
+}
+
+int kbase_instr_hwcnt_dump(struct kbase_context *kctx)
+{
+ return kbase_vinstr_dump(kctx->kbdev->vinstr_ctx, kctx->vinstr_cli);
+}
+KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.h b/drivers/gpu/arm/midgard/mali_kbase_instr.h
new file mode 100644
index 00000000000..5613453a3c0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_instr.h
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Instrumentation API definitions
+ */
+
+#ifndef _KBASE_INSTR_H_
+#define _KBASE_INSTR_H_
+
+#include <mali_kbase_hwaccess_instr.h>
+
+/**
+ * kbase_instr_hwcnt_setup() - Configure HW counters collection
+ * @kctx: Kbase context
+ * @setup: &struct kbase_uk_hwcnt_setup containing configuration
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_setup(struct kbase_context *kctx,
+ struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_enable() - Enable HW counters collection
+ * @kctx: Kbase context
+ * @setup: &struct kbase_uk_hwcnt_setup containing configuration
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_enable(struct kbase_context *kctx,
+ struct kbase_uk_hwcnt_setup *setup);
+
+/**
+ * kbase_instr_hwcnt_disable() - Disable HW counters collection
+ * @kctx: Kbase context
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_dump() - Trigger dump of HW counters and wait for
+ * completion
+ * @kctx: Kbase context
+ *
+ * Context: might sleep, waiting for dump to complete
+ *
+ * Return: 0 on success
+ */
+int kbase_instr_hwcnt_dump(struct kbase_context *kctx);
+
+/**
+ * kbase_instr_hwcnt_suspend() - GPU is suspending, stop HW counter collection
+ * @kbdev: Kbase device
+ *
+ * It's assumed that there's only one privileged context.
+ *
+ * Safe to do this without lock when doing an OS suspend, because it only
+ * changes in response to user-space IOCTLs
+ */
+void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev);
+
+/**
+ * kbase_instr_hwcnt_resume() - GPU is resuming, resume HW counter collection
+ * @kbdev: Kbase device
+ */
+void kbase_instr_hwcnt_resume(struct kbase_device *kbdev);
+
+#endif /* _KBASE_INSTR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c
new file mode 100644
index 00000000000..c5a86234049
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c
@@ -0,0 +1,1726 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+#include <linux/dma-buf.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#endif
+#include <mali_kbase.h>
+#include <mali_kbase_uku.h>
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif /* CONFIG_UMP */
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/ratelimit.h>
+
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+
+#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+/* random32 was renamed to prandom_u32 in 3.8 */
+#define prandom_u32 random32
+#endif
+
+/* Return whether katom will run on the GPU or not. Currently only soft jobs and
+ * dependency-only atoms do not run on the GPU */
+#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \
+ ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == \
+ BASE_JD_REQ_DEP)))
+/*
+ * This is the kernel side of the API. Only entry points are:
+ * - kbase_jd_submit(): Called from userspace to submit a single bag
+ * - kbase_jd_done(): Called from interrupt context to track the
+ * completion of a job.
+ * Callouts:
+ * - to the job manager (enqueue a job)
+ * - to the event subsystem (signals the completion/failure of bag/job-chains).
+ */
+
+static void __user *
+get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p)
+{
+#ifdef CONFIG_COMPAT
+ if (kctx->is_compat)
+ return compat_ptr(p->compat_value);
+#endif
+ return p->value;
+}
+
+/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
+ *
+ * Returns whether the JS needs a reschedule.
+ *
+ * Note that the caller must also check the atom status and
+ * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock
+ */
+static int jd_run_atom(struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx = katom->kctx;
+
+ KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+ if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) {
+ /* Dependency only atom */
+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ return 0;
+ } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+ /* Soft-job */
+ if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+ == BASE_JD_REQ_SOFT_REPLAY) {
+ if (!kbase_replay_process(katom))
+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ } else if (kbase_process_soft_job(katom) == 0) {
+ kbase_finish_soft_job(katom);
+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ } else {
+ /* The job has not completed */
+ list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+ }
+ return 0;
+ }
+
+ katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+ /* Queue an action about whether we should try scheduling a context */
+ return kbasep_js_add_job(kctx, katom);
+}
+
+#ifdef CONFIG_KDS
+
+/* Add the katom to the kds waiting list.
+ * Atoms must be added to the waiting list after a successful call to kds_async_waitall.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx;
+
+ KBASE_DEBUG_ASSERT(katom);
+
+ kctx = katom->kctx;
+
+ list_add_tail(&katom->node, &kctx->waiting_kds_resource);
+}
+
+/* Remove the katom from the kds waiting list.
+ * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync.
+ * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add.
+ * The caller must hold the kbase_jd_context.lock */
+
+static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom)
+{
+ KBASE_DEBUG_ASSERT(katom);
+ list_del(&katom->node);
+}
+
+static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter)
+{
+ struct kbase_jd_atom *katom;
+ struct kbase_jd_context *ctx;
+ struct kbase_device *kbdev;
+
+ katom = (struct kbase_jd_atom *)callback_parameter;
+ KBASE_DEBUG_ASSERT(katom);
+ ctx = &katom->kctx->jctx;
+ kbdev = katom->kctx->kbdev;
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ mutex_lock(&ctx->lock);
+
+ /* KDS resource has already been satisfied (e.g. due to zapping) */
+ if (katom->kds_dep_satisfied)
+ goto out;
+
+ /* This atom's KDS dependency has now been met */
+ katom->kds_dep_satisfied = true;
+
+ /* Check whether the atom's other dependencies were already met. If
+ * katom is a GPU atom then the job scheduler may be able to represent
+ * the dependencies, hence we may attempt to submit it before they are
+ * met. Other atoms must have had both dependencies resolved */
+ if (IS_GPU_ATOM(katom) ||
+ (!kbase_jd_katom_dep_atom(&katom->dep[0]) &&
+ !kbase_jd_katom_dep_atom(&katom->dep[1]))) {
+ /* katom dep complete, attempt to run it */
+ bool resched = false;
+
+ resched = jd_run_atom(katom);
+
+ if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+ /* The atom has already finished */
+ resched |= jd_done_nolock(katom);
+ }
+
+ if (resched)
+ kbase_js_sched_all(kbdev);
+ }
+ out:
+ mutex_unlock(&ctx->lock);
+}
+
+static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom)
+{
+ KBASE_DEBUG_ASSERT(katom);
+
+ /* Prevent job_done_nolock from being called twice on an atom when
+ * there is a race between job completion and cancellation */
+
+ if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
+ /* Wait was cancelled - zap the atom */
+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+ if (jd_done_nolock(katom))
+ kbase_js_sched_all(katom->kctx->kbdev);
+ }
+}
+#endif /* CONFIG_KDS */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+ struct sg_table *sgt;
+ struct scatterlist *s;
+ int i;
+ phys_addr_t *pa;
+ int err;
+ size_t count = 0;
+ struct kbase_mem_phy_alloc *alloc;
+
+ alloc = reg->gpu_alloc;
+
+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM);
+ KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt);
+ sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL);
+
+ if (IS_ERR_OR_NULL(sgt))
+ return -EINVAL;
+
+ /* save for later */
+ alloc->imported.umm.sgt = sgt;
+
+ pa = kbase_get_gpu_phy_pages(reg);
+ KBASE_DEBUG_ASSERT(pa);
+
+ for_each_sg(sgt->sgl, s, sgt->nents, i) {
+ int j;
+ size_t pages = PFN_UP(sg_dma_len(s));
+
+ WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1),
+ "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n",
+ sg_dma_len(s));
+
+ WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1),
+ "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n",
+ (unsigned long long) sg_dma_address(s));
+
+ for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++)
+ *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT);
+ WARN_ONCE(j < pages,
+ "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n",
+ alloc->imported.umm.dma_buf->size);
+ }
+
+ if (WARN_ONCE(count < reg->nr_pages,
+ "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n",
+ alloc->imported.umm.dma_buf->size)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Update nents as we now have pages to map */
+ alloc->nents = count;
+
+ err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD);
+
+out:
+ if (err) {
+ dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+ alloc->imported.umm.sgt = NULL;
+ }
+
+ return err;
+}
+
+static void kbase_jd_umm_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc)
+{
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(alloc);
+ KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment);
+ KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt);
+ dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment,
+ alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
+ alloc->imported.umm.sgt = NULL;
+ alloc->nents = 0;
+}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+void kbase_jd_free_external_resources(struct kbase_jd_atom *katom)
+{
+#ifdef CONFIG_KDS
+ if (katom->kds_rset) {
+ struct kbase_jd_context *jctx = &katom->kctx->jctx;
+
+ /*
+ * As the atom is no longer waiting, remove it from
+ * the waiting list.
+ */
+
+ mutex_lock(&jctx->lock);
+ kbase_jd_kds_waiters_remove(katom);
+ mutex_unlock(&jctx->lock);
+
+ /* Release the kds resource or cancel if zapping */
+ kds_resource_set_release_sync(&katom->kds_rset);
+ }
+#endif /* CONFIG_KDS */
+}
+
+static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
+{
+ KBASE_DEBUG_ASSERT(katom);
+ KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+#ifdef CONFIG_KDS
+ /* Prevent the KDS resource from triggering the atom in case of zapping */
+ if (katom->kds_rset)
+ katom->kds_dep_satisfied = true;
+#endif /* CONFIG_KDS */
+
+ kbase_gpu_vm_lock(katom->kctx);
+ /* only roll back if extres is non-NULL */
+ if (katom->extres) {
+ u32 res_no;
+
+ res_no = katom->nr_extres;
+ while (res_no-- > 0) {
+ struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+ if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+ alloc->imported.umm.current_mapping_usage_count--;
+
+ if (0 == alloc->imported.umm.current_mapping_usage_count) {
+ struct kbase_va_region *reg;
+
+ reg = kbase_region_tracker_find_region_base_address(
+ katom->kctx,
+ katom->extres[res_no].gpu_address);
+
+ if (reg && reg->gpu_alloc == alloc)
+ kbase_mmu_teardown_pages(
+ katom->kctx,
+ reg->start_pfn,
+ kbase_reg_current_backed_size(reg));
+
+ kbase_jd_umm_unmap(katom->kctx, alloc);
+ }
+ }
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+ kbase_mem_phy_alloc_put(katom->extres[res_no].alloc);
+ }
+ kfree(katom->extres);
+ katom->extres = NULL;
+ }
+ kbase_gpu_vm_unlock(katom->kctx);
+}
+
+#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS)
+static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, bool exclusive)
+{
+ u32 i;
+
+ for (i = 0; i < *kds_res_count; i++) {
+ /* Duplicate resource, ignore */
+ if (kds_resources[i] == kds_res)
+ return;
+ }
+
+ kds_resources[*kds_res_count] = kds_res;
+ if (exclusive)
+ set_bit(*kds_res_count, kds_access_bitmap);
+ (*kds_res_count)++;
+}
+#endif
+
+/*
+ * Set up external resources needed by this job.
+ *
+ * jctx.lock must be held when this is called.
+ */
+
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+{
+ int err_ret_val = -EINVAL;
+ u32 res_no;
+#ifdef CONFIG_KDS
+ u32 kds_res_count = 0;
+ struct kds_resource **kds_resources = NULL;
+ unsigned long *kds_access_bitmap = NULL;
+#endif /* CONFIG_KDS */
+ struct base_external_resource *input_extres;
+
+ KBASE_DEBUG_ASSERT(katom);
+ KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES);
+
+ /* no resources encoded, early out */
+ if (!katom->nr_extres)
+ return -EINVAL;
+
+ katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL);
+ if (NULL == katom->extres) {
+ err_ret_val = -ENOMEM;
+ goto early_err_out;
+ }
+
+ /* copy user buffer to the end of our real buffer.
+ * Make sure the struct sizes haven't changed in a way
+ * we don't support */
+ BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres));
+ input_extres = (struct base_external_resource *)
+ (((unsigned char *)katom->extres) +
+ (sizeof(*katom->extres) - sizeof(*input_extres)) *
+ katom->nr_extres);
+
+ if (copy_from_user(input_extres,
+ get_compat_pointer(katom->kctx, &user_atom->extres_list),
+ sizeof(*input_extres) * katom->nr_extres) != 0) {
+ err_ret_val = -EINVAL;
+ goto early_err_out;
+ }
+#ifdef CONFIG_KDS
+ /* assume we have to wait for all */
+ KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+ kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL);
+
+ if (NULL == kds_resources) {
+ err_ret_val = -ENOMEM;
+ goto early_err_out;
+ }
+
+ KBASE_DEBUG_ASSERT(0 != katom->nr_extres);
+ kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((katom->nr_extres + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL);
+
+ if (NULL == kds_access_bitmap) {
+ err_ret_val = -ENOMEM;
+ goto early_err_out;
+ }
+#endif /* CONFIG_KDS */
+
+ /* need to keep the GPU VM locked while we set up UMM buffers */
+ kbase_gpu_vm_lock(katom->kctx);
+ for (res_no = 0; res_no < katom->nr_extres; res_no++) {
+ struct base_external_resource *res;
+ struct kbase_va_region *reg;
+
+ res = &input_extres[res_no];
+ reg = kbase_region_tracker_find_region_enclosing_address(katom->kctx,
+ res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE);
+ /* did we find a matching region object? */
+ if (NULL == reg || (reg->flags & KBASE_REG_FREE)) {
+ /* roll back */
+ goto failed_loop;
+ }
+
+ if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) &&
+ (reg->flags & KBASE_REG_SECURE)) {
+ katom->atom_flags |= KBASE_KATOM_FLAG_SECURE;
+ if ((katom->core_req & BASE_JD_REQ_FS) == 0) {
+ WARN_RATELIMIT(1, "Secure non-fragment jobs not supported");
+ goto failed_loop;
+ }
+ }
+
+ /* decide what needs to happen for this resource */
+ switch (reg->gpu_alloc->type) {
+ case BASE_TMEM_IMPORT_TYPE_UMP:
+ {
+#if defined(CONFIG_KDS) && defined(CONFIG_UMP)
+ struct kds_resource *kds_res;
+
+ kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle);
+ if (kds_res)
+ add_kds_resource(kds_res, kds_resources, &kds_res_count,
+ kds_access_bitmap,
+ res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE);
+#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */
+ break;
+ }
+#ifdef CONFIG_DMA_SHARED_BUFFER
+ case BASE_TMEM_IMPORT_TYPE_UMM:
+ {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ struct kds_resource *kds_res;
+
+ kds_res = get_dma_buf_kds_resource(reg->gpu_alloc->imported.umm.dma_buf);
+ if (kds_res)
+ add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE);
+#endif
+ reg->gpu_alloc->imported.umm.current_mapping_usage_count++;
+ if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
+ /* use a local variable to not pollute err_ret_val
+ * with a potential success value as some other gotos depend
+ * on the default error code stored in err_ret_val */
+ int tmp;
+
+ tmp = kbase_jd_umm_map(katom->kctx, reg);
+ if (tmp) {
+ /* failed to map this buffer, roll back */
+ err_ret_val = tmp;
+ reg->gpu_alloc->imported.umm.current_mapping_usage_count--;
+ goto failed_loop;
+ }
+ }
+ break;
+ }
+#endif
+ default:
+ goto failed_loop;
+ }
+
+ /* finish with updating out array with the data we found */
+ /* NOTE: It is important that this is the last thing we do (or
+ * at least not before the first write) as we overwrite elements
+ * as we loop and could be overwriting ourself, so no writes
+ * until the last read for an element.
+ * */
+ katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */
+ katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+ }
+ /* successfully parsed the extres array */
+ /* drop the vm lock before we call into kds */
+ kbase_gpu_vm_unlock(katom->kctx);
+
+#ifdef CONFIG_KDS
+ if (kds_res_count) {
+ int wait_failed;
+
+ /* We have resources to wait for with kds */
+ katom->kds_dep_satisfied = false;
+
+ wait_failed = kds_async_waitall(&katom->kds_rset,
+ &katom->kctx->jctx.kds_cb, katom, NULL,
+ kds_res_count, kds_access_bitmap,
+ kds_resources);
+
+ if (wait_failed)
+ goto failed_kds_setup;
+ else
+ kbase_jd_kds_waiters_add(katom);
+ } else {
+ /* Nothing to wait for, so kds dep met */
+ katom->kds_dep_satisfied = true;
+ }
+ kfree(kds_resources);
+ kfree(kds_access_bitmap);
+#endif /* CONFIG_KDS */
+
+ /* all done OK */
+ return 0;
+
+/* error handling section */
+
+#ifdef CONFIG_KDS
+ failed_kds_setup:
+
+ /* lock before we unmap */
+ kbase_gpu_vm_lock(katom->kctx);
+#endif /* CONFIG_KDS */
+
+ failed_loop:
+ /* undo the loop work */
+ while (res_no-- > 0) {
+ struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc;
+#ifdef CONFIG_DMA_SHARED_BUFFER
+ if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+ alloc->imported.umm.current_mapping_usage_count--;
+
+ if (0 == alloc->imported.umm.current_mapping_usage_count) {
+ struct kbase_va_region *reg;
+
+ reg = kbase_region_tracker_find_region_base_address(
+ katom->kctx,
+ katom->extres[res_no].gpu_address);
+
+ if (reg && reg->gpu_alloc == alloc)
+ kbase_mmu_teardown_pages(katom->kctx,
+ reg->start_pfn,
+ kbase_reg_current_backed_size(reg));
+
+ kbase_jd_umm_unmap(katom->kctx, alloc);
+ }
+ }
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+ kbase_mem_phy_alloc_put(alloc);
+ }
+ kbase_gpu_vm_unlock(katom->kctx);
+
+ early_err_out:
+ kfree(katom->extres);
+ katom->extres = NULL;
+#ifdef CONFIG_KDS
+ kfree(kds_resources);
+ kfree(kds_access_bitmap);
+#endif /* CONFIG_KDS */
+ return err_ret_val;
+}
+
+static inline void jd_resolve_dep(struct list_head *out_list,
+ struct kbase_jd_atom *katom,
+ u8 d,
+ bool ctx_is_dying)
+{
+ u8 other_d = !d;
+
+ while (!list_empty(&katom->dep_head[d])) {
+ struct kbase_jd_atom *dep_atom;
+ u8 dep_type;
+
+ dep_atom = list_entry(katom->dep_head[d].next,
+ struct kbase_jd_atom, dep_item[d]);
+
+ list_del(katom->dep_head[d].next);
+
+ dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]);
+ kbase_jd_katom_dep_clear(&dep_atom->dep[d]);
+
+ if (katom->event_code != BASE_JD_EVENT_DONE &&
+ (dep_type != BASE_JD_DEP_TYPE_ORDER || ctx_is_dying)) {
+ /* Atom failed, so remove the other dependencies and immediately fail the atom */
+ if (kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
+ list_del(&dep_atom->dep_item[other_d]);
+ kbase_jd_katom_dep_clear(&dep_atom->dep[other_d]);
+ }
+#ifdef CONFIG_KDS
+ if (!dep_atom->kds_dep_satisfied) {
+ /* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and
+ * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up
+ */
+ dep_atom->kds_dep_satisfied = true;
+ }
+#endif
+
+ dep_atom->event_code = katom->event_code;
+ KBASE_DEBUG_ASSERT(dep_atom->status !=
+ KBASE_JD_ATOM_STATE_UNUSED);
+ dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+
+ list_add_tail(&dep_atom->dep_item[0], out_list);
+ } else if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) {
+#ifdef CONFIG_KDS
+ if (dep_atom->kds_dep_satisfied)
+#endif
+ list_add_tail(&dep_atom->dep_item[0], out_list);
+ }
+ }
+}
+
+KBASE_EXPORT_TEST_API(jd_resolve_dep);
+
+#if MALI_CUSTOMER_RELEASE == 0
+static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+ kbdev->force_replay_count++;
+
+ if (kbdev->force_replay_count >= kbdev->force_replay_limit) {
+ kbdev->force_replay_count = 0;
+ katom->event_code = BASE_JD_EVENT_FORCE_REPLAY;
+
+ if (kbdev->force_replay_random)
+ kbdev->force_replay_limit =
+ (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1;
+
+ dev_info(kbdev->dev, "force_replay : promoting to error\n");
+ }
+}
+
+/** Test to see if atom should be forced to fail.
+ *
+ * This function will check if an atom has a replay job as a dependent. If so
+ * then it will be considered for forced failure. */
+static void jd_check_force_failure(struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx = katom->kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ int i;
+
+ if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) ||
+ (katom->core_req & BASEP_JD_REQ_EVENT_NEVER))
+ return;
+
+ for (i = 1; i < BASE_JD_ATOM_COUNT; i++) {
+ if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom ||
+ kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) {
+ struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i];
+
+ if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) ==
+ BASE_JD_REQ_SOFT_REPLAY &&
+ (dep_atom->core_req & kbdev->force_replay_core_req)
+ == kbdev->force_replay_core_req) {
+ jd_force_failure(kbdev, katom);
+ return;
+ }
+ }
+ }
+}
+#endif
+
+/*
+ * Perform the necessary handling of an atom that has finished running
+ * on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx = katom->kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+ struct list_head completed_jobs;
+ struct list_head runnable_jobs;
+ bool need_to_try_schedule_context = false;
+ int i;
+
+ INIT_LIST_HEAD(&completed_jobs);
+ INIT_LIST_HEAD(&runnable_jobs);
+
+ KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+#if MALI_CUSTOMER_RELEASE == 0
+ jd_check_force_failure(katom);
+#endif
+
+
+ /* This is needed in case an atom is failed due to being invalid, this
+ * can happen *before* the jobs that the atom depends on have completed */
+ for (i = 0; i < 2; i++) {
+ if (kbase_jd_katom_dep_atom(&katom->dep[i])) {
+ list_del(&katom->dep_item[i]);
+ kbase_jd_katom_dep_clear(&katom->dep[i]);
+ }
+ }
+
+ /* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with
+ * BASE_JD_EVENT_TILE_RANGE_FAULT.
+ *
+ * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the
+ * error code to BASE_JD_EVENT_DONE
+ */
+
+ if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) &&
+ katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) {
+ if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) {
+ /* Promote the failure to job done */
+ katom->event_code = BASE_JD_EVENT_DONE;
+ katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED);
+ }
+ }
+
+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ list_add_tail(&katom->dep_item[0], &completed_jobs);
+
+ while (!list_empty(&completed_jobs)) {
+ katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, dep_item[0]);
+ list_del(completed_jobs.prev);
+
+ KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED);
+
+ for (i = 0; i < 2; i++)
+ jd_resolve_dep(&runnable_jobs, katom, i,
+ js_kctx_info->ctx.is_dying);
+
+ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES)
+ kbase_jd_post_external_resources(katom);
+
+ while (!list_empty(&runnable_jobs)) {
+ struct kbase_jd_atom *node;
+
+ node = list_entry(runnable_jobs.next,
+ struct kbase_jd_atom, dep_item[0]);
+
+ list_del(runnable_jobs.next);
+
+ KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED);
+
+ if (node->status != KBASE_JD_ATOM_STATE_COMPLETED) {
+ need_to_try_schedule_context |= jd_run_atom(node);
+ } else {
+ node->event_code = katom->event_code;
+
+ if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE)
+ == BASE_JD_REQ_SOFT_REPLAY) {
+ if (kbase_replay_process(node))
+ /* Don't complete this atom */
+ continue;
+ } else if (node->core_req &
+ BASE_JD_REQ_SOFT_JOB) {
+ /* If this is a fence wait then remove it from the list of sync waiters. */
+ if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req)
+ list_del(&node->dep_item[0]);
+
+ kbase_finish_soft_job(node);
+ }
+ node->status = KBASE_JD_ATOM_STATE_COMPLETED;
+ }
+
+ if (node->status == KBASE_JD_ATOM_STATE_COMPLETED)
+ list_add_tail(&node->dep_item[0], &completed_jobs);
+ }
+
+ /* Register a completed job as a disjoint event when the GPU
+ * is in a disjoint state (ie. being reset or replaying jobs).
+ */
+ kbase_disjoint_event_potential(kctx->kbdev);
+ kbase_event_post(kctx, katom);
+
+ /* Decrement and check the TOTAL number of jobs. This includes
+ * those not tracked by the scheduler: 'not ready to run' and
+ * 'dependency-only' jobs. */
+ if (--kctx->jctx.job_nr == 0)
+ wake_up(&kctx->jctx.zero_jobs_wait); /* All events are safely queued now, and we can signal any waiter
+ * that we've got no more jobs (so we can be safely terminated) */
+ }
+
+ return need_to_try_schedule_context;
+}
+
+KBASE_EXPORT_TEST_API(jd_done_nolock);
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+enum {
+ CORE_REQ_DEP_ONLY,
+ CORE_REQ_SOFT,
+ CORE_REQ_COMPUTE,
+ CORE_REQ_FRAGMENT,
+ CORE_REQ_VERTEX,
+ CORE_REQ_TILER,
+ CORE_REQ_FRAGMENT_VERTEX,
+ CORE_REQ_FRAGMENT_VERTEX_TILER,
+ CORE_REQ_FRAGMENT_TILER,
+ CORE_REQ_VERTEX_TILER,
+ CORE_REQ_UNKNOWN
+};
+static const char * const core_req_strings[] = {
+ "Dependency Only Job",
+ "Soft Job",
+ "Compute Shader Job",
+ "Fragment Shader Job",
+ "Vertex/Geometry Shader Job",
+ "Tiler Job",
+ "Fragment Shader + Vertex/Geometry Shader Job",
+ "Fragment Shader + Vertex/Geometry Shader Job + Tiler Job",
+ "Fragment Shader + Tiler Job",
+ "Vertex/Geometry Shader Job + Tiler Job",
+ "Unknown Job"
+};
+static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
+{
+ if (core_req & BASE_JD_REQ_SOFT_JOB)
+ return core_req_strings[CORE_REQ_SOFT];
+ if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+ return core_req_strings[CORE_REQ_COMPUTE];
+ switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) {
+ case BASE_JD_REQ_DEP:
+ return core_req_strings[CORE_REQ_DEP_ONLY];
+ case BASE_JD_REQ_FS:
+ return core_req_strings[CORE_REQ_FRAGMENT];
+ case BASE_JD_REQ_CS:
+ return core_req_strings[CORE_REQ_VERTEX];
+ case BASE_JD_REQ_T:
+ return core_req_strings[CORE_REQ_TILER];
+ case (BASE_JD_REQ_FS | BASE_JD_REQ_CS):
+ return core_req_strings[CORE_REQ_FRAGMENT_VERTEX];
+ case (BASE_JD_REQ_FS | BASE_JD_REQ_T):
+ return core_req_strings[CORE_REQ_FRAGMENT_TILER];
+ case (BASE_JD_REQ_CS | BASE_JD_REQ_T):
+ return core_req_strings[CORE_REQ_VERTEX_TILER];
+ case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T):
+ return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER];
+ }
+ return core_req_strings[CORE_REQ_UNKNOWN];
+}
+#endif
+
+bool jd_submit_atom(struct kbase_context *kctx,
+ const struct base_jd_atom_v2 *user_atom,
+ struct kbase_jd_atom *katom)
+{
+ struct kbase_jd_context *jctx = &kctx->jctx;
+ base_jd_core_req core_req;
+ int queued = 0;
+ int i;
+ int sched_prio;
+ bool ret;
+
+ /* Update the TOTAL number of jobs. This includes those not tracked by
+ * the scheduler: 'not ready to run' and 'dependency-only' jobs. */
+ jctx->job_nr++;
+
+ core_req = user_atom->core_req;
+
+ katom->start_timestamp.tv64 = 0;
+ katom->time_spent_us = 0;
+ katom->udata = user_atom->udata;
+ katom->kctx = kctx;
+ katom->nr_extres = user_atom->nr_extres;
+ katom->extres = NULL;
+ katom->device_nr = user_atom->device_nr;
+ katom->affinity = 0;
+ katom->jc = user_atom->jc;
+ katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED;
+ katom->core_req = core_req;
+ katom->atom_flags = 0;
+ katom->retry_count = 0;
+ katom->need_cache_flush_cores_retained = 0;
+ katom->x_pre_dep = NULL;
+ katom->x_post_dep = NULL;
+#ifdef CONFIG_KDS
+ /* Start by assuming that the KDS dependencies are satisfied,
+ * kbase_jd_pre_external_resources will correct this if there are dependencies */
+ katom->kds_dep_satisfied = true;
+ katom->kds_rset = NULL;
+#endif /* CONFIG_KDS */
+
+ /* Don't do anything if there is a mess up with dependencies.
+ This is done in a separate cycle to check both the dependencies at ones, otherwise
+ it will be extra complexity to deal with 1st dependency ( just added to the list )
+ if only the 2nd one has invalid config.
+ */
+ for (i = 0; i < 2; i++) {
+ int dep_atom_number = user_atom->pre_dep[i].atom_id;
+ base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type;
+
+ if (dep_atom_number) {
+ if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER &&
+ dep_atom_type != BASE_JD_DEP_TYPE_DATA) {
+ katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT;
+ katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_tl_new_atom(
+ katom,
+ kbase_jd_atom_id(kctx, katom));
+ kbase_tlstream_tl_ret_atom_ctx(
+ katom, kctx);
+#endif
+ ret = jd_done_nolock(katom);
+ goto out;
+ }
+ }
+ }
+
+ /* Add dependencies */
+ for (i = 0; i < 2; i++) {
+ int dep_atom_number = user_atom->pre_dep[i].atom_id;
+ base_jd_dep_type dep_atom_type;
+ struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number];
+
+ dep_atom_type = user_atom->pre_dep[i].dependency_type;
+ kbase_jd_katom_dep_clear(&katom->dep[i]);
+
+ if (!dep_atom_number)
+ continue;
+
+ if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED ||
+ dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
+
+ if (dep_atom->event_code == BASE_JD_EVENT_DONE)
+ continue;
+ /* don't stop this atom if it has an order dependency
+ * only to the failed one, try to submit it throught
+ * the normal path
+ */
+ if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER &&
+ dep_atom->event_code > BASE_JD_EVENT_ACTIVE) {
+ continue;
+ }
+
+ if (i == 1 && kbase_jd_katom_dep_atom(&katom->dep[0])) {
+ /* Remove the previous dependency */
+ list_del(&katom->dep_item[0]);
+ kbase_jd_katom_dep_clear(&katom->dep[0]);
+ }
+
+ /* Atom has completed, propagate the error code if any */
+ katom->event_code = dep_atom->event_code;
+ katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_tl_new_atom(
+ katom,
+ kbase_jd_atom_id(kctx, katom));
+ kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+#endif
+ if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+ == BASE_JD_REQ_SOFT_REPLAY) {
+ if (kbase_replay_process(katom)) {
+ ret = false;
+ goto out;
+ }
+ }
+ ret = jd_done_nolock(katom);
+
+ goto out;
+ } else {
+ /* Atom is in progress, add this atom to the list */
+ list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]);
+ kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type);
+ queued = 1;
+ }
+ }
+
+ /* These must occur after the above loop to ensure that an atom that
+ * depends on a previous atom with the same number behaves as expected */
+ katom->event_code = BASE_JD_EVENT_DONE;
+ katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_tl_new_atom(
+ katom,
+ kbase_jd_atom_id(kctx, katom));
+ kbase_tlstream_tl_ret_atom_ctx(katom, kctx);
+#endif
+
+ /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */
+ if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+ dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL");
+ katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+ ret = jd_done_nolock(katom);
+ goto out;
+ }
+
+ /* Reject atoms with an invalid device_nr */
+ if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) &&
+ (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) {
+ dev_warn(kctx->kbdev->dev,
+ "Rejecting atom with invalid device_nr %d",
+ katom->device_nr);
+ katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+ ret = jd_done_nolock(katom);
+ goto out;
+ }
+
+ /* For invalid priority, be most lenient and choose the default */
+ sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio);
+ if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID)
+ sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT;
+ katom->sched_priority = sched_prio;
+
+ if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+ /* handle what we need to do to access the external resources */
+ if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
+ /* setup failed (no access, bad resource, unknown resource types, etc.) */
+ katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+ ret = jd_done_nolock(katom);
+ goto out;
+ }
+ }
+
+ /* Validate the atom. Function will return error if the atom is
+ * malformed.
+ *
+ * Soft-jobs never enter the job scheduler but have their own initialize method.
+ *
+ * If either fail then we immediately complete the atom with an error.
+ */
+ if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
+ if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
+ katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+ ret = jd_done_nolock(katom);
+ goto out;
+ }
+ } else {
+ /* Soft-job */
+ if (kbase_prepare_soft_job(katom) != 0) {
+ katom->event_code = BASE_JD_EVENT_JOB_INVALID;
+ ret = jd_done_nolock(katom);
+ goto out;
+ }
+ }
+
+#ifdef CONFIG_GPU_TRACEPOINTS
+ katom->work_id = atomic_inc_return(&jctx->work_id);
+ trace_gpu_job_enqueue((u32)kctx, katom->work_id, kbasep_map_core_reqs_to_string(katom->core_req));
+#endif
+
+ if (queued && !IS_GPU_ATOM(katom)) {
+ ret = false;
+ goto out;
+ }
+#ifdef CONFIG_KDS
+ if (!katom->kds_dep_satisfied) {
+ /* Queue atom due to KDS dependency */
+ ret = false;
+ goto out;
+ }
+#endif /* CONFIG_KDS */
+
+ if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE)
+ == BASE_JD_REQ_SOFT_REPLAY) {
+ if (kbase_replay_process(katom))
+ ret = false;
+ else
+ ret = jd_done_nolock(katom);
+
+ goto out;
+ } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
+ if (kbase_process_soft_job(katom) == 0) {
+ kbase_finish_soft_job(katom);
+ ret = jd_done_nolock(katom);
+ goto out;
+ }
+ /* The job has not yet completed */
+ list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs);
+ ret = false;
+ } else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) {
+ katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+ ret = kbasep_js_add_job(kctx, katom);
+ /* If job was cancelled then resolve immediately */
+ if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED)
+ ret = jd_done_nolock(katom);
+ } else {
+ /* This is a pure dependency. Resolve it immediately */
+ ret = jd_done_nolock(katom);
+ }
+
+ out:
+ return ret;
+}
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+int kbase_jd_submit(struct kbase_context *kctx,
+ const struct kbase_uk_job_submit *submit_data,
+ int uk6_atom)
+#else
+int kbase_jd_submit(struct kbase_context *kctx,
+ const struct kbase_uk_job_submit *submit_data)
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+{
+ struct kbase_jd_context *jctx = &kctx->jctx;
+ int err = 0;
+ int i;
+ bool need_to_try_schedule_context = false;
+ struct kbase_device *kbdev;
+ void __user *user_addr;
+
+ /*
+ * kbase_jd_submit isn't expected to fail and so all errors with the jobs
+ * are reported by immediately falling them (through event system)
+ */
+ kbdev = kctx->kbdev;
+
+ beenthere(kctx, "%s", "Enter");
+
+ if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) {
+ dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it");
+ return -EINVAL;
+ }
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+ if ((uk6_atom && submit_data->stride !=
+ sizeof(struct base_jd_atom_v2_uk6)) ||
+ submit_data->stride != sizeof(base_jd_atom_v2)) {
+#else
+ if (submit_data->stride != sizeof(base_jd_atom_v2)) {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+ dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel");
+ return -EINVAL;
+ }
+
+ user_addr = get_compat_pointer(kctx, &submit_data->addr);
+
+ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight));
+
+ for (i = 0; i < submit_data->nr_atoms; i++) {
+ struct base_jd_atom_v2 user_atom;
+ struct kbase_jd_atom *katom;
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+ if (uk6_atom) {
+ struct base_jd_atom_v2_uk6 user_atom_v6;
+ base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA};
+
+ if (copy_from_user(&user_atom_v6, user_addr,
+ sizeof(user_atom_v6))) {
+ err = -EINVAL;
+ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx,
+ atomic_sub_return(
+ submit_data->nr_atoms - i,
+ &kctx->timeline.jd_atoms_in_flight));
+ break;
+ }
+ /* Convert from UK6 atom format to UK7 format */
+ user_atom.jc = user_atom_v6.jc;
+ user_atom.udata = user_atom_v6.udata;
+ user_atom.extres_list = user_atom_v6.extres_list;
+ user_atom.nr_extres = user_atom_v6.nr_extres;
+ user_atom.core_req = user_atom_v6.core_req;
+
+ /* atom number 0 is used for no dependency atoms */
+ if (!user_atom_v6.pre_dep[0])
+ dep_types[0] = BASE_JD_DEP_TYPE_INVALID;
+
+ base_jd_atom_dep_set(&user_atom.pre_dep[0],
+ user_atom_v6.pre_dep[0],
+ dep_types[0]);
+
+ /* atom number 0 is used for no dependency atoms */
+ if (!user_atom_v6.pre_dep[1])
+ dep_types[1] = BASE_JD_DEP_TYPE_INVALID;
+
+ base_jd_atom_dep_set(&user_atom.pre_dep[1],
+ user_atom_v6.pre_dep[1],
+ dep_types[1]);
+
+ user_atom.atom_number = user_atom_v6.atom_number;
+ user_atom.prio = user_atom_v6.prio;
+ user_atom.device_nr = user_atom_v6.device_nr;
+ } else {
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+ if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) {
+ err = -EINVAL;
+ KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight));
+ break;
+ }
+#ifdef BASE_LEGACY_UK6_SUPPORT
+ }
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+ user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride);
+
+ mutex_lock(&jctx->lock);
+#ifndef compiletime_assert
+#define compiletime_assert_defined
+#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \
+while (false)
+#endif
+ compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) ==
+ BASE_JD_ATOM_COUNT,
+ "BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+ compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) ==
+ sizeof(user_atom.atom_number),
+ "BASE_JD_ATOM_COUNT and base_atom_id type out of sync");
+#ifdef compiletime_assert_defined
+#undef compiletime_assert
+#undef compiletime_assert_defined
+#endif
+ katom = &jctx->atoms[user_atom.atom_number];
+
+ while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) {
+ /* Atom number is already in use, wait for the atom to
+ * complete
+ */
+ mutex_unlock(&jctx->lock);
+
+ /* This thread will wait for the atom to complete. Due
+ * to thread scheduling we are not sure that the other
+ * thread that owns the atom will also schedule the
+ * context, so we force the scheduler to be active and
+ * hence eventually schedule this context at some point
+ * later.
+ */
+ kbase_js_sched_all(kbdev);
+
+ if (wait_event_killable(katom->completed,
+ katom->status == KBASE_JD_ATOM_STATE_UNUSED)) {
+ /* We're being killed so the result code
+ * doesn't really matter
+ */
+ return 0;
+ }
+ mutex_lock(&jctx->lock);
+ }
+
+ need_to_try_schedule_context |=
+ jd_submit_atom(kctx, &user_atom, katom);
+
+ /* Register a completed job as a disjoint event when the GPU is in a disjoint state
+ * (ie. being reset or replaying jobs).
+ */
+ kbase_disjoint_event_potential(kbdev);
+
+ mutex_unlock(&jctx->lock);
+ }
+
+ if (need_to_try_schedule_context)
+ kbase_js_sched_all(kbdev);
+
+ return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_submit);
+
+/**
+ * jd_done_worker - Handle a job completion
+ * @data: a &struct work_struct
+ *
+ * This function requeues the job from the runpool (if it was soft-stopped or
+ * removed from NEXT registers).
+ *
+ * Removes it from the system if it finished/failed/was cancelled.
+ *
+ * Resolves dependencies to add dependent jobs to the context, potentially
+ * starting them if necessary (which may add more references to the context)
+ *
+ * Releases the reference to the context from the no-longer-running job.
+ *
+ * Handles retrying submission outside of IRQ context if it failed from within
+ * IRQ context.
+ */
+static void jd_done_worker(struct work_struct *data)
+{
+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+ struct kbase_jd_context *jctx;
+ struct kbase_context *kctx;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ union kbasep_js_policy *js_policy;
+ struct kbase_device *kbdev;
+ struct kbasep_js_device_data *js_devdata;
+ u64 cache_jc = katom->jc;
+ struct kbasep_js_atom_retained_state katom_retained_state;
+ bool schedule = false;
+
+ /* Soft jobs should never reach this function */
+ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+ kctx = katom->kctx;
+ jctx = &kctx->jctx;
+ kbdev = kctx->kbdev;
+ js_kctx_info = &kctx->jctx.sched_info;
+ js_devdata = &kbdev->js_data;
+ js_policy = &kbdev->js_data.policy;
+
+ KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0);
+
+ kbase_backend_complete_wq(kbdev, katom);
+
+ /*
+ * Begin transaction on JD context and JS context
+ */
+ mutex_lock(&jctx->lock);
+ mutex_lock(&js_devdata->queue_mutex);
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+ /* This worker only gets called on contexts that are scheduled *in*. This is
+ * because it only happens in response to an IRQ from a job that was
+ * running.
+ */
+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+ if (katom->event_code == BASE_JD_EVENT_STOPPED) {
+ /* Atom has been promoted to stopped */
+ unsigned long flags;
+
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_unlock(&js_devdata->queue_mutex);
+ mutex_unlock(&jctx->lock);
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ katom->status = KBASE_JD_ATOM_STATE_IN_JS;
+ kbase_js_unpull(kctx, katom);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return;
+ }
+
+ if (katom->event_code != BASE_JD_EVENT_DONE)
+ dev_err(kbdev->dev,
+ "t6xx: GPU fault 0x%02lx from job slot %d\n",
+ (unsigned long)katom->event_code,
+ katom->slot_nr);
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+ kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+ /* Retain state before the katom disappears */
+ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+
+ if (!kbasep_js_has_atom_finished(&katom_retained_state)) {
+ mutex_lock(&js_devdata->runpool_mutex);
+ kbasep_js_clear_job_retry_submit(katom);
+ /* An atom that has been hard-stopped might have previously
+ * been soft-stopped and has just finished before the hard-stop
+ * occurred. For this reason, clear the hard-stopped flag */
+ katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED);
+ mutex_unlock(&js_devdata->runpool_mutex);
+ }
+
+ if (kbasep_js_has_atom_finished(&katom_retained_state))
+ schedule = true;
+
+ kbase_js_complete_atom_wq(kctx, katom);
+
+ KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state));
+
+ kbasep_js_remove_job(kbdev, kctx, katom);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_unlock(&js_devdata->queue_mutex);
+ katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+ /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
+ schedule |= jd_done_nolock(katom);
+
+ /* katom may have been freed now, do not use! */
+
+ /*
+ * Transaction complete
+ */
+ mutex_unlock(&jctx->lock);
+
+ /* Job is now no longer running, so can now safely release the context
+ * reference, and handle any actions that were logged against the atom's retained state */
+
+ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state);
+
+ if (schedule)
+ kbase_js_sched_all(kbdev);
+
+ KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0);
+}
+
+/**
+ * jd_cancel_worker - Work queue job cancel function.
+ * @data: a &struct work_struct
+ *
+ * Only called as part of 'Zapping' a context (which occurs on termination).
+ * Operates serially with the jd_done_worker() on the work queue.
+ *
+ * This can only be called on contexts that aren't scheduled.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or jd_done_worker(), because the atoms here must not be
+ * running (by virtue of only being called on contexts that aren't
+ * scheduled).
+ */
+static void jd_cancel_worker(struct work_struct *data)
+{
+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work);
+ struct kbase_jd_context *jctx;
+ struct kbase_context *kctx;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ bool need_to_try_schedule_context;
+ bool attr_state_changed;
+ struct kbase_device *kbdev;
+
+ /* Soft jobs should never reach this function */
+ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+ kctx = katom->kctx;
+ kbdev = kctx->kbdev;
+ jctx = &kctx->jctx;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+ /* This only gets called on contexts that are scheduled out. Hence, we must
+ * make sure we don't de-ref the number of running jobs (there aren't
+ * any), nor must we try to schedule out the context (it's already
+ * scheduled out).
+ */
+ KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+ /* Scheduler: Remove the job from the system */
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+ mutex_lock(&jctx->lock);
+
+ need_to_try_schedule_context = jd_done_nolock(katom);
+ /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
+ * schedule the context. There's also no need for the jsctx_mutex to have been taken
+ * around this too. */
+ KBASE_DEBUG_ASSERT(!need_to_try_schedule_context);
+
+ /* katom may have been freed now, do not use! */
+ mutex_unlock(&jctx->lock);
+
+ if (attr_state_changed)
+ kbase_js_sched_all(kbdev);
+}
+
+/**
+ * jd_evict_worker - Work queue job evict function
+ * @data: a &struct work_struct
+ *
+ * Only called as part of evicting failed jobs. This is only called on jobs that
+ * were never submitted to HW Access. Jobs that were submitted are handled
+ * through jd_done_worker().
+ * Operates serially with the jd_done_worker() on the work queue.
+ *
+ * We don't need to release most of the resources that would occur on
+ * kbase_jd_done() or jd_done_worker(), because the atoms here must not be
+ * running (by virtue of having not been submitted to HW Access).
+ */
+static void jd_evict_worker(struct work_struct *data)
+{
+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+ work);
+ struct kbase_jd_context *jctx;
+ struct kbase_context *kctx;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ struct kbase_device *kbdev;
+
+ /* Soft jobs should never reach this function */
+ KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0);
+
+ kctx = katom->kctx;
+ kbdev = kctx->kbdev;
+ jctx = &kctx->jctx;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0);
+
+ /* Scheduler: Remove the job from the system */
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ kbasep_js_remove_cancelled_job(kbdev, kctx, katom);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+ mutex_lock(&jctx->lock);
+ jd_done_nolock(katom);
+ /* katom may have been freed now, do not use! */
+ mutex_unlock(&jctx->lock);
+
+ kbase_js_sched_all(kbdev);
+}
+
+/**
+ * kbase_jd_done - Complete a job that has been removed from the Hardware
+ * @katom: atom which has been completed
+ * @slot_nr: slot the atom was on
+ * @end_timestamp: completion time
+ * @done_code: completion code
+ *
+ * This must be used whenever a job has been removed from the Hardware, e.g.:
+ * An IRQ indicates that the job finished (for both error and 'done' codes), or
+ * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop.
+ *
+ * Some work is carried out immediately, and the rest is deferred onto a
+ * workqueue
+ *
+ * Context:
+ * This can be called safely from atomic context.
+ * The caller must hold kbasep_js_device_data.runpool_irq.lock
+ */
+void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr,
+ ktime_t *end_timestamp, kbasep_js_atom_done_code done_code)
+{
+ struct kbase_context *kctx;
+ struct kbase_device *kbdev;
+
+ KBASE_DEBUG_ASSERT(katom);
+ kctx = katom->kctx;
+ KBASE_DEBUG_ASSERT(kctx);
+ kbdev = kctx->kbdev;
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT)
+ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+
+ KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0);
+
+ kbase_job_check_leave_disjoint(kbdev, katom);
+
+ katom->slot_nr = slot_nr;
+
+ WARN_ON(work_pending(&katom->work));
+
+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+ INIT_WORK(&katom->work, jd_done_worker);
+ queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_done);
+
+void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx;
+ struct kbasep_js_kctx_info *js_kctx_info;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ KBASE_DEBUG_ASSERT(NULL != katom);
+ kctx = katom->kctx;
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+ /* This should only be done from a context that is not scheduled */
+ KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+ WARN_ON(work_pending(&katom->work));
+
+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+ INIT_WORK(&katom->work, jd_cancel_worker);
+ queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx;
+ struct kbasep_js_kctx_info *js_kctx_info;
+
+ KBASE_DEBUG_ASSERT(NULL != kbdev);
+ KBASE_DEBUG_ASSERT(NULL != katom);
+ kctx = katom->kctx;
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0);
+
+ /* This should only be done from a context that is currently scheduled
+ */
+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+ WARN_ON(work_pending(&katom->work));
+
+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+ INIT_WORK(&katom->work, jd_evict_worker);
+ queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+void kbase_jd_zap_context(struct kbase_context *kctx)
+{
+ struct kbase_jd_atom *katom;
+ struct list_head *entry, *tmp;
+ struct kbase_device *kbdev;
+
+ KBASE_DEBUG_ASSERT(kctx);
+
+ kbdev = kctx->kbdev;
+
+ KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u);
+
+ kbase_js_zap_context(kctx);
+ kbase_jm_wait_for_zero_jobs(kctx);
+
+ mutex_lock(&kctx->jctx.lock);
+
+ /*
+ * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are
+ * queued outside the job scheduler.
+ */
+
+ list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) {
+ katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]);
+ kbase_cancel_soft_job(katom);
+ }
+
+
+#ifdef CONFIG_KDS
+
+ /* For each job waiting on a kds resource, cancel the wait and force the job to
+ * complete early, this is done so that we don't leave jobs outstanding waiting
+ * on kds resources which may never be released when contexts are zapped, resulting
+ * in a hang.
+ *
+ * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held,
+ * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job.
+ */
+
+ list_for_each(entry, &kctx->waiting_kds_resource) {
+ katom = list_entry(entry, struct kbase_jd_atom, node);
+
+ kbase_cancel_kds_wait_job(katom);
+ }
+#endif
+
+ mutex_unlock(&kctx->jctx.lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_zap_context);
+
+int kbase_jd_init(struct kbase_context *kctx)
+{
+ int i;
+ int mali_err = 0;
+#ifdef CONFIG_KDS
+ int err;
+#endif /* CONFIG_KDS */
+
+ KBASE_DEBUG_ASSERT(kctx);
+
+ kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", 0, 1);
+ if (NULL == kctx->jctx.job_done_wq) {
+ mali_err = -ENOMEM;
+ goto out1;
+ }
+
+ for (i = 0; i < BASE_JD_ATOM_COUNT; i++) {
+ init_waitqueue_head(&kctx->jctx.atoms[i].completed);
+
+ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]);
+ INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]);
+
+ /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */
+ kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID;
+ kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED;
+ }
+
+ mutex_init(&kctx->jctx.lock);
+
+ init_waitqueue_head(&kctx->jctx.zero_jobs_wait);
+
+ spin_lock_init(&kctx->jctx.tb_lock);
+
+#ifdef CONFIG_KDS
+ err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear);
+ if (0 != err) {
+ mali_err = -EINVAL;
+ goto out2;
+ }
+#endif /* CONFIG_KDS */
+
+ kctx->jctx.job_nr = 0;
+
+ return 0;
+
+#ifdef CONFIG_KDS
+ out2:
+ destroy_workqueue(kctx->jctx.job_done_wq);
+#endif /* CONFIG_KDS */
+ out1:
+ return mali_err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_init);
+
+void kbase_jd_exit(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(kctx);
+
+#ifdef CONFIG_KDS
+ kds_callback_term(&kctx->jctx.kds_cb);
+#endif /* CONFIG_KDS */
+ /* Work queue is emptied by this */
+ destroy_workqueue(kctx->jctx.job_done_wq);
+}
+
+KBASE_EXPORT_TEST_API(kbase_jd_exit);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
new file mode 100644
index 00000000000..b37f280a647
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c
@@ -0,0 +1,114 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/seq_file.h>
+
+#include <mali_kbase_jd_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file.
+ * @sfile: The debugfs entry
+ * @data: Data associated with the entry
+ *
+ * This function is called to get the contents of the JD atoms debugfs file.
+ * This is a report of all atoms managed by kbase_jd_context.atoms
+ *
+ * Return: 0 if successfully prints data in debugfs entry file, failure
+ * otherwise
+ */
+static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data)
+{
+ struct kbase_context *kctx = sfile->private;
+ struct kbase_jd_atom *atoms;
+ unsigned long irq_flags;
+ int i;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ /* Print table heading */
+ seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n");
+
+ atoms = kctx->jctx.atoms;
+ /* General atom states */
+ mutex_lock(&kctx->jctx.lock);
+ /* JS-related states */
+ spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+ for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) {
+ struct kbase_jd_atom *atom = &atoms[i];
+ s64 start_timestamp = 0;
+
+ if (atom->status == KBASE_JD_ATOM_STATE_UNUSED)
+ continue;
+
+ /* start_timestamp is cleared as soon as the atom leaves UNUSED state
+ * and set before a job is submitted to the h/w, a non-zero value means
+ * it is valid */
+ if (ktime_to_ns(atom->start_timestamp))
+ start_timestamp = ktime_to_ns(
+ ktime_sub(ktime_get(), atom->start_timestamp));
+
+ seq_printf(sfile,
+ "%i,%u,%u,%u,%u %u,%lli,%llu\n",
+ i, atom->core_req, atom->status, atom->coreref_state,
+ (unsigned)(atom->dep[0].atom ?
+ atom->dep[0].atom - atoms : 0),
+ (unsigned)(atom->dep[1].atom ?
+ atom->dep[1].atom - atoms : 0),
+ (signed long long)start_timestamp,
+ (unsigned long long)(atom->time_spent_us ?
+ atom->time_spent_us * 1000 : start_timestamp)
+ );
+ }
+ spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags);
+ mutex_unlock(&kctx->jctx.lock);
+
+ return 0;
+}
+
+
+/**
+ * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file
+ * @in: &struct inode pointer
+ * @file: &struct file pointer
+ *
+ * Return: file descriptor
+ */
+static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file)
+{
+ return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private);
+}
+
+static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
+ .open = kbasep_jd_debugfs_atoms_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ /* Expose all atoms */
+ debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx,
+ &kbasep_jd_debugfs_atoms_fops);
+
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
new file mode 100644
index 00000000000..703e4cf6a5f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_jd_debugfs.h
+ * Header file for job dispatcher-related entries in debugfs
+ */
+
+#ifndef _KBASE_JD_DEBUGFS_H
+#define _KBASE_JD_DEBUGFS_H
+
+#include <linux/debugfs.h>
+
+#include <mali_kbase.h>
+
+/**
+ * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system
+ *
+ * @kctx Pointer to kbase_context
+ */
+void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx);
+
+#endif /*_KBASE_JD_DEBUGFS_H*/
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c
new file mode 100644
index 00000000000..bc5ad805818
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c
@@ -0,0 +1,132 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * HW access job manager common APIs
+ */
+
+#include <mali_kbase.h>
+#include "mali_kbase_hwaccess_jm.h"
+#include "mali_kbase_jm.h"
+
+/**
+ * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot
+ * @js on the active context.
+ * @kbdev: Device pointer
+ * @js: Job slot to run on
+ * @nr_jobs_to_submit: Number of jobs to attempt to submit
+ *
+ * Return: true if slot can still be submitted on, false if slot is now full.
+ */
+static bool kbase_jm_next_job(struct kbase_device *kbdev, int js,
+ int nr_jobs_to_submit)
+{
+ struct kbase_context *kctx;
+ int i;
+
+ kctx = kbdev->hwaccess.active_kctx;
+
+ if (!kctx)
+ return true;
+
+ for (i = 0; i < nr_jobs_to_submit; i++) {
+ struct kbase_jd_atom *katom = kbase_js_pull(kctx, js);
+
+ if (!katom)
+ return true; /* Context has no jobs on this slot */
+
+ kbase_backend_run_atom(kbdev, katom);
+ }
+
+ return false; /* Slot ringbuffer should now be full */
+}
+
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+ u32 ret_mask = 0;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ while (js_mask) {
+ int js = ffs(js_mask) - 1;
+ int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js);
+
+ if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit))
+ ret_mask |= (1 << js);
+
+ js_mask &= ~(1 << js);
+ }
+
+ return ret_mask;
+}
+
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+ lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+ if (!down_trylock(&js_devdata->schedule_sem)) {
+ kbase_jm_kick(kbdev, js_mask);
+ up(&js_devdata->schedule_sem);
+ }
+}
+
+void kbase_jm_try_kick_all(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+ lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+ if (!down_trylock(&js_devdata->schedule_sem)) {
+ kbase_jm_kick_all(kbdev);
+ up(&js_devdata->schedule_sem);
+ }
+}
+
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ WARN_ON(atomic_read(&kctx->atoms_pulled));
+
+ if (kbdev->hwaccess.active_kctx == kctx)
+ kbdev->hwaccess.active_kctx = NULL;
+}
+
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ if (katom->event_code != BASE_JD_EVENT_STOPPED &&
+ katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
+ kbase_js_complete_atom(katom, NULL);
+ } else {
+ kbase_js_unpull(katom->kctx, katom);
+ }
+}
+
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+ ktime_t *end_timestamp)
+{
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ kbase_js_complete_atom(katom, end_timestamp);
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h
new file mode 100644
index 00000000000..27aca3a699f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h
@@ -0,0 +1,106 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+/*
+ * Job manager common APIs
+ */
+
+#ifndef _KBASE_JM_H_
+#define _KBASE_JM_H_
+
+/**
+ * kbase_jm_kick() - Indicate that there are jobs ready to run.
+ * @kbdev: Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from.
+ *
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job
+ * slots.
+ * @kbdev: Device pointer
+ *
+ * Caller must hold the runpool_irq lock and schedule_sem semaphore
+ *
+ * Return: Mask of the job slots that can still be submitted to.
+ */
+static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev)
+{
+ return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+/**
+ * kbase_jm_try_kick - Attempt to call kbase_jm_kick
+ * @kbdev: Device pointer
+ * @js_mask: Mask of the job slots that can be pulled from
+ * Context: Caller must hold runpool_irq lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
+
+/**
+ * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all
+ * @kbdev: Device pointer
+ * Context: Caller must hold runpool_irq lock
+ *
+ * If schedule_sem can be immediately obtained then this function will call
+ * kbase_jm_kick_all() otherwise it will do nothing.
+ */
+void kbase_jm_try_kick_all(struct kbase_device *kbdev);
+
+/**
+ * kbase_jm_idle_ctx() - Mark a context as idle.
+ * @kbdev: Device pointer
+ * @kctx: Context to mark as idle
+ *
+ * No more atoms will be pulled from this context until it is marked as active
+ * by kbase_js_use_ctx().
+ *
+ * The context should have no atoms currently pulled from it
+ * (kctx->atoms_pulled == 0).
+ *
+ * Caller must hold the runpool_irq lock
+ */
+void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
+ * been soft-stopped or will fail due to a
+ * dependency
+ * @kbdev: Device pointer
+ * @katom: Atom that has been stopped or will be failed
+ */
+void kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom);
+
+/**
+ * kbase_jm_complete() - Complete an atom
+ * @kbdev: Device pointer
+ * @katom: Atom that has completed
+ * @end_timestamp: Timestamp of atom completion
+ */
+void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
+ ktime_t *end_timestamp);
+
+#endif /* _KBASE_JM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c
new file mode 100644
index 00000000000..c49d622b761
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.c
@@ -0,0 +1,3177 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler Implementation
+ */
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+#include <mali_kbase_hw.h>
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config_defaults.h>
+
+#include "mali_kbase_jm.h"
+#include "mali_kbase_hwaccess_jm.h"
+
+/*
+ * Private types
+ */
+
+/* Bitpattern indicating the result of releasing a context */
+enum {
+ /* The context was descheduled - caller should try scheduling in a new
+ * one to keep the runpool full */
+ KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0),
+ /* Ctx attributes were changed - caller should try scheduling all
+ * contexts */
+ KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1)
+};
+
+typedef u32 kbasep_js_release_result;
+
+const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = {
+ KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */
+ KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */
+ KBASE_JS_ATOM_SCHED_PRIO_LOW /* BASE_JD_PRIO_LOW */
+};
+
+const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = {
+ BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */
+ BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */
+ BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */
+};
+
+
+/*
+ * Private function prototypes
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+ struct kbase_device *kbdev, struct kbase_context *kctx,
+ struct kbasep_js_atom_retained_state *katom_retained_state);
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom);
+
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+ kbasep_js_policy_ctx_job_cb callback);
+
+static bool kbase_js_evict_atom(struct kbase_context *kctx,
+ struct kbase_jd_atom *katom_evict,
+ struct kbase_jd_atom *start_katom,
+ struct kbase_jd_atom *head_katom,
+ struct list_head *evict_list,
+ struct jsctx_rb *rb, int idx);
+
+/* Helper for trace subcodes */
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ unsigned long flags;
+ struct kbasep_js_device_data *js_devdata;
+ int as_nr;
+ int refcnt = 0;
+
+ js_devdata = &kbdev->js_data;
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ as_nr = kctx->as_nr;
+ if (as_nr != KBASEP_AS_NR_INVALID) {
+ struct kbasep_js_per_as_data *js_per_as_data;
+
+ js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+ refcnt = js_per_as_data->as_busy_refcount;
+ }
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return refcnt;
+}
+
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ int as_nr;
+ int refcnt = 0;
+
+ js_devdata = &kbdev->js_data;
+
+ as_nr = kctx->as_nr;
+ if (as_nr != KBASEP_AS_NR_INVALID) {
+ struct kbasep_js_per_as_data *js_per_as_data;
+
+ js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+ refcnt = js_per_as_data->as_busy_refcount;
+ }
+
+ return refcnt;
+}
+#else /* KBASE_TRACE_ENABLE */
+static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ CSTD_UNUSED(kbdev);
+ CSTD_UNUSED(kctx);
+ return 0;
+}
+static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ CSTD_UNUSED(kbdev);
+ CSTD_UNUSED(kctx);
+ return 0;
+}
+#endif /* KBASE_TRACE_ENABLE */
+
+/*
+ * Private types
+ */
+enum {
+ JS_DEVDATA_INIT_NONE = 0,
+ JS_DEVDATA_INIT_CONSTANTS = (1 << 0),
+ JS_DEVDATA_INIT_POLICY = (1 << 1),
+ JS_DEVDATA_INIT_ALL = ((1 << 2) - 1)
+};
+
+enum {
+ JS_KCTX_INIT_NONE = 0,
+ JS_KCTX_INIT_CONSTANTS = (1 << 0),
+ JS_KCTX_INIT_POLICY = (1 << 1),
+ JS_KCTX_INIT_ALL = ((1 << 2) - 1)
+};
+
+/*
+ * Private functions
+ */
+
+/**
+ * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
+ * @features: JSn_FEATURE register value
+ *
+ * Given a JSn_FEATURE register value returns the core requirements that match
+ *
+ * Return: Core requirement bit mask
+ */
+static base_jd_core_req core_reqs_from_jsn_features(u16 features)
+{
+ base_jd_core_req core_req = 0u;
+
+ if ((features & JS_FEATURE_SET_VALUE_JOB) != 0)
+ core_req |= BASE_JD_REQ_V;
+
+ if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0)
+ core_req |= BASE_JD_REQ_CF;
+
+ if ((features & JS_FEATURE_COMPUTE_JOB) != 0)
+ core_req |= BASE_JD_REQ_CS;
+
+ if ((features & JS_FEATURE_TILER_JOB) != 0)
+ core_req |= BASE_JD_REQ_T;
+
+ if ((features & JS_FEATURE_FRAGMENT_JOB) != 0)
+ core_req |= BASE_JD_REQ_FS;
+
+ return core_req;
+}
+
+static void kbase_js_sync_timers(struct kbase_device *kbdev)
+{
+ mutex_lock(&kbdev->js_data.runpool_mutex);
+ kbase_backend_ctx_count_changed(kbdev);
+ mutex_unlock(&kbdev->js_data.runpool_mutex);
+}
+
+/* Hold the kbasep_js_device_data::runpool_irq::lock for this */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_per_as_data *js_per_as_data;
+ bool result = false;
+ int as_nr;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ js_devdata = &kbdev->js_data;
+
+ as_nr = kctx->as_nr;
+ if (as_nr != KBASEP_AS_NR_INVALID) {
+ int new_refcnt;
+
+ KBASE_DEBUG_ASSERT(as_nr >= 0);
+ js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+ KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL);
+
+ new_refcnt = ++(js_per_as_data->as_busy_refcount);
+
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx,
+ NULL, 0u, new_refcnt);
+ result = true;
+ }
+
+ return result;
+}
+
+/**
+ * jsctx_rb_is_empty_prio(): - Check if ring buffer is empty
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js: Job slot id to check.
+ * @prio: Priority to check.
+ *
+ * Return true if there are no atoms to pull. There may be running atoms in the
+ * ring buffer even if there are no atoms to pull. It is also possible for the
+ * ring buffer to be full (with running atoms) when this functions returns
+ * true.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if there are no atoms to pull, false otherwise.
+ */
+static inline bool
+jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio)
+{
+ struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ return rb->read_idx == rb->write_idx;
+}
+
+/**
+ * jsctx_rb_is_empty(): - Check if all priority ring buffers are empty
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js: Job slot id to check.
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if the ring buffers for all priorities are empty, false
+ * otherwise.
+ */
+static inline bool
+jsctx_rb_is_empty(struct kbase_context *kctx, int js)
+{
+ int prio;
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+ if (!jsctx_rb_is_empty_prio(kctx, js, prio))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * jsctx_rb_compact_prio(): - Compact a ring buffer
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js: Job slot id to compact.
+ * @prio: Priority id to compact.
+ */
+static inline void
+jsctx_rb_compact_prio(struct kbase_context *kctx, int js, int prio)
+{
+ struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+ u16 compact_idx = rb->write_idx - 1;
+ u16 end_idx = rb->running_idx - 1;
+ u16 i;
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ for (i = compact_idx; i != end_idx; i--) {
+ if (rb->entries[i & JSCTX_RB_MASK].atom_id !=
+ KBASEP_ATOM_ID_INVALID) {
+ WARN_ON(compact_idx < rb->running_idx);
+ rb->entries[compact_idx & JSCTX_RB_MASK].atom_id =
+ rb->entries[i & JSCTX_RB_MASK].atom_id;
+
+ compact_idx--;
+ }
+ if (rb->read_idx == i)
+ rb->read_idx = compact_idx + 1;
+ }
+
+ rb->running_idx = compact_idx + 1;
+}
+
+/**
+ * jsctx_rb_compact(): - Compact all priority ring buffers
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js: Job slot id to compact.
+ */
+static inline void
+jsctx_rb_compact(struct kbase_context *kctx, int js)
+{
+ int prio;
+
+ for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+ jsctx_rb_compact_prio(kctx, js, prio);
+}
+
+/**
+ * jsctx_rb_foreach_prio(): - Execute callback for each entry in ring buffer
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js: Job slot id to iterate.
+ * @prio: Priority id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over a ring buffer and invoke @callback for each entry in buffer, and
+ * remove the entry from the buffer.
+ *
+ * If entries are added to the ring buffer while this is running those entries
+ * may, or may not be covered. To ensure that all entries in the buffer have
+ * been enumerated when this function returns jsctx->lock must be held when
+ * calling this function.
+ *
+ * The HW access lock, js_data.runpool_irq.lock, must always be held when
+ * calling this function.
+ */
+static void
+jsctx_rb_foreach_prio(struct kbase_context *kctx, int js, int prio,
+ kbasep_js_policy_ctx_job_cb callback)
+{
+ struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+ struct kbase_jd_atom *katom;
+ u16 write_idx = ACCESS_ONCE(rb->write_idx);
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ /* There must be no jobs currently in HW access */
+ WARN_ON(rb->read_idx != rb->running_idx);
+
+ /* Invoke callback on all kbase_jd_atoms in the ring buffer, and
+ * removes them from the buffer */
+ while (rb->read_idx != write_idx) {
+ int id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id;
+
+ katom = kbase_jd_atom_from_id(kctx, id);
+
+ rb->read_idx++;
+ rb->running_idx++;
+
+ callback(kctx->kbdev, katom);
+ }
+}
+
+/**
+ * jsctx_rb_foreach(): - Execute callback for each entry in all priority rb
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js: Job slot id to iterate.
+ * @callback: Function pointer to callback.
+ *
+ * Iterate over all the different priorities, and for each call
+ * jsctx_rb_foreach_prio() to iterate over the ring buffer and invoke @callback
+ * for each entry in buffer, and remove the entry from the buffer.
+ */
+static inline void
+jsctx_rb_foreach(struct kbase_context *kctx, int js,
+ kbasep_js_policy_ctx_job_cb callback)
+{
+ int prio;
+
+ for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++)
+ jsctx_rb_foreach_prio(kctx, js, prio, callback);
+}
+
+/**
+ * jsctx_rb_peek_prio(): - Check buffer and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js: Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a pointer to
+ * the next atom, unless the ring buffer is empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio)
+{
+ struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+ int id;
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ if (jsctx_rb_is_empty_prio(kctx, js, prio))
+ return NULL;
+
+ id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id;
+ return kbase_jd_atom_from_id(kctx, id);
+}
+
+/**
+ * jsctx_rb_peek(): - Check all priority buffers and get next atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js: Job slot id to check.
+ *
+ * Check the ring buffers for all priorities, starting from
+ * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a
+ * pointer to the next atom, unless all the priority's ring buffers are empty.
+ *
+ * Return: Pointer to next atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek(struct kbase_context *kctx, int js)
+{
+ int prio;
+
+ for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) {
+ struct kbase_jd_atom *katom;
+
+ katom = jsctx_rb_peek_prio(kctx, js, prio);
+ if (katom)
+ return katom;
+ }
+
+ return NULL;
+}
+
+/**
+ * jsctx_rb_peek_last(): - Check a ring buffer and get the last atom
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @js: Job slot id to check.
+ * @prio: Priority id to check.
+ *
+ * Check the ring buffer for the specified @js and @prio and return a
+ * pointer to the last atom, unless all the priority's ring buffers are empty.
+ *
+ * The last atom is the atom that was added using jsctx_rb_add() most recently.
+ *
+ * Return: Pointer to last atom in buffer, or NULL if there is no atom.
+ */
+static inline struct kbase_jd_atom *
+jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio)
+{
+ struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+ int id;
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ if (jsctx_rb_is_empty_prio(kctx, js, prio))
+ return NULL;
+
+ id = rb->entries[(rb->write_idx - 1) & JSCTX_RB_MASK].atom_id;
+ return kbase_jd_atom_from_id(kctx, id);
+}
+
+/**
+ * jsctx_rb_pull(): - Mark atom in list as running
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to pull.
+ *
+ * Mark an atom previously obtained from jsctx_rb_peek() as running.
+ *
+ * @katom must currently be at the head of the ring buffer.
+ */
+static inline void
+jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ int prio = katom->sched_priority;
+ int js = katom->slot_nr;
+ struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ /* Atoms must be pulled in the correct order. */
+ WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio));
+
+ rb->read_idx++;
+}
+
+/**
+ * jsctx_rb_unpull(): - Undo marking of atom in list as running
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to unpull.
+ *
+ * Undo jsctx_rb_pull() and put @katom back in the queue.
+ *
+ * jsctx_rb_unpull() must be called on atoms in the same order the atoms were
+ * pulled.
+ */
+static inline void
+jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ int prio = katom->sched_priority;
+ int js = katom->slot_nr;
+ struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ /* Atoms must be unpulled in correct order. */
+ WARN_ON(rb->entries[(rb->read_idx - 1) & JSCTX_RB_MASK].atom_id !=
+ kbase_jd_atom_id(kctx, katom));
+
+ rb->read_idx--;
+}
+
+/**
+ * jsctx_rb_add(): - Add atom to ring buffer
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to add.
+ *
+ * Add @katom to the ring buffer determined by the atom's priority and job slot
+ * number.
+ *
+ * If the ring buffer is full -EBUSY will be returned.
+ *
+ * Return: On success 0 is returned, on failure a negative error code.
+ */
+static int
+jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ int prio = katom->sched_priority;
+ int js = katom->slot_nr;
+ struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ /* Check if the ring buffer is full */
+ if ((rb->write_idx - rb->running_idx) >= JSCTX_RB_SIZE)
+ return -EBUSY;
+
+ rb->entries[rb->write_idx & JSCTX_RB_MASK].atom_id =
+ kbase_jd_atom_id(kctx, katom);
+ rb->write_idx++;
+
+ return 0;
+}
+
+/**
+ * jsctx_rb_remove(): - Remove atom from ring buffer
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @katom: Pointer to katom to remove.
+ *
+ * Remove @katom from the ring buffer.
+ *
+ * @katom must have been pulled from the buffer earlier by jsctx_rb_pull(), and
+ * atoms must be removed in the same order they were pulled from the ring
+ * buffer.
+ */
+static inline void
+jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ int prio = katom->sched_priority;
+ int js = katom->slot_nr;
+ struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ /* Atoms must be completed in order. */
+ WARN_ON(rb->entries[rb->running_idx & JSCTX_RB_MASK].atom_id !=
+ kbase_jd_atom_id(kctx, katom));
+
+ rb->running_idx++;
+}
+
+/**
+ * jsctx_rb_evict(): - Evict atom, and dependents, from ring buffer
+ * @kctx: Pointer to kbase context with ring buffer.
+ * @start_katom: Pointer to the first katom to evict.
+ * @head_katom: Pointer to head katom.
+ * @evict_list: Pointer to head of list where evicted atoms are added.
+ *
+ * Iterate over the ring buffer starting at @start_katom and evict @start_atom
+ * and dependent atoms in ring buffer.
+ *
+ * @evict_list and @head_katom is passed on to kbase_js_evict_atom() which will
+ * examine the atom dependencies.
+ *
+ * jsctx_rb_evict() is only called by kbase_js_evict_deps().
+ */
+static void
+jsctx_rb_evict(struct kbase_context *kctx,
+ struct kbase_jd_atom *start_katom,
+ struct kbase_jd_atom *head_katom,
+ struct list_head *evict_list)
+{
+ int prio = start_katom->sched_priority;
+ int js = start_katom->slot_nr;
+ struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js];
+ bool atom_in_rb = false;
+ u16 i, start_idx;
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ for (i = rb->running_idx; i != rb->write_idx; i++) {
+ if (rb->entries[i & JSCTX_RB_MASK].atom_id ==
+ kbase_jd_atom_id(kctx, start_katom)) {
+ start_idx = i;
+ atom_in_rb = true;
+ break;
+ }
+ }
+
+ /* start_katom must still be in ring buffer. */
+ if (i == rb->write_idx || !atom_in_rb)
+ return;
+
+ /* Evict all dependencies on same slot. */
+ for (i = start_idx; i != rb->write_idx; i++) {
+ u8 katom_evict;
+
+ katom_evict = rb->entries[i & JSCTX_RB_MASK].atom_id;
+ if (katom_evict != KBASEP_ATOM_ID_INVALID) {
+ if (!kbase_js_evict_atom(kctx,
+ &kctx->jctx.atoms[katom_evict],
+ start_katom, head_katom,
+ evict_list, rb, i))
+ break;
+ }
+ }
+}
+
+/*
+ * Functions private to KBase ('Protected' functions)
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev)
+{
+ struct kbasep_js_device_data *jsdd;
+ int err;
+ int i;
+ u16 as_present;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ jsdd = &kbdev->js_data;
+
+ KBASE_DEBUG_ASSERT(jsdd->init_status == JS_DEVDATA_INIT_NONE);
+
+ /* These two must be recalculated if nr_hw_address_spaces changes
+ * (e.g. for HW workarounds) */
+ as_present = (1U << kbdev->nr_hw_address_spaces) - 1;
+ kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces;
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) {
+ bool use_workaround;
+
+ use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE;
+ if (use_workaround) {
+ dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only");
+ kbdev->nr_user_address_spaces = 1;
+ }
+ }
+#ifdef CONFIG_MALI_DEBUG
+ /* Soft-stop will be disabled on a single context by default unless
+ * softstop_always is set */
+ jsdd->softstop_always = false;
+#endif /* CONFIG_MALI_DEBUG */
+ jsdd->nr_all_contexts_running = 0;
+ jsdd->nr_user_contexts_running = 0;
+ jsdd->nr_contexts_pullable = 0;
+ atomic_set(&jsdd->nr_contexts_runnable, 0);
+ /* All ASs initially free */
+ jsdd->as_free = as_present;
+ /* No ctx allowed to submit */
+ jsdd->runpool_irq.submit_allowed = 0u;
+ memset(jsdd->runpool_irq.ctx_attr_ref_count, 0,
+ sizeof(jsdd->runpool_irq.ctx_attr_ref_count));
+ memset(jsdd->runpool_irq.slot_affinities, 0,
+ sizeof(jsdd->runpool_irq.slot_affinities));
+ memset(jsdd->runpool_irq.slot_affinity_refcount, 0,
+ sizeof(jsdd->runpool_irq.slot_affinity_refcount));
+ INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list);
+
+ /* Config attributes */
+ jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+ jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+ jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+ jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+ else
+ jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS;
+ jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+ jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+ jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408;
+ else
+ jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+ jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+ jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
+ jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
+ jsdd->cfs_ctx_runtime_init_slices =
+ DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES;
+ jsdd->cfs_ctx_runtime_min_slices =
+ DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES;
+
+ dev_dbg(kbdev->dev, "JS Config Attribs: ");
+ dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
+ jsdd->scheduling_period_ns);
+ dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u",
+ jsdd->soft_stop_ticks);
+ dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u",
+ jsdd->soft_stop_ticks_cl);
+ dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u",
+ jsdd->hard_stop_ticks_ss);
+ dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u",
+ jsdd->hard_stop_ticks_cl);
+ dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u",
+ jsdd->hard_stop_ticks_dumping);
+ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u",
+ jsdd->gpu_reset_ticks_ss);
+ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u",
+ jsdd->gpu_reset_ticks_cl);
+ dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u",
+ jsdd->gpu_reset_ticks_dumping);
+ dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u",
+ jsdd->ctx_timeslice_ns);
+ dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_init_slices:%u",
+ jsdd->cfs_ctx_runtime_init_slices);
+ dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u",
+ jsdd->cfs_ctx_runtime_min_slices);
+
+ if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
+ jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
+ jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping &&
+ jsdd->hard_stop_ticks_dumping <
+ jsdd->gpu_reset_ticks_dumping)) {
+ dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n");
+ return -EINVAL;
+ }
+
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS
+ dev_dbg(kbdev->dev, "Job Scheduling Policy Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.",
+ jsdd->soft_stop_ticks,
+ jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_HARD_STOPS
+ dev_dbg(kbdev->dev, "Job Scheduling Policy Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.",
+ jsdd->hard_stop_ticks_ss,
+ jsdd->hard_stop_ticks_dumping,
+ jsdd->scheduling_period_ns);
+#endif
+#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS
+ dev_dbg(kbdev->dev, "Note: The JS policy's tick timer (if coded) will still be run, but do nothing.");
+#endif
+
+ /* setup the number of irq throttle cycles base on given time */
+ {
+ int time_us = kbdev->gpu_props.irq_throttle_time_us;
+ int cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev,
+ time_us);
+
+ atomic_set(&kbdev->irq_throttle_cycles, cycles);
+ }
+
+ /* Clear the AS data, including setting NULL pointers */
+ memset(&jsdd->runpool_irq.per_as_data[0], 0,
+ sizeof(jsdd->runpool_irq.per_as_data));
+
+ for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i)
+ jsdd->js_reqs[i] = core_reqs_from_jsn_features(
+ kbdev->gpu_props.props.raw_props.js_features[i]);
+
+ jsdd->init_status |= JS_DEVDATA_INIT_CONSTANTS;
+
+ /* On error, we could continue on: providing none of the below resources
+ * rely on the ones above */
+
+ mutex_init(&jsdd->runpool_mutex);
+ mutex_init(&jsdd->queue_mutex);
+ spin_lock_init(&jsdd->runpool_irq.lock);
+ sema_init(&jsdd->schedule_sem, 1);
+
+ err = kbasep_js_policy_init(kbdev);
+ if (!err)
+ jsdd->init_status |= JS_DEVDATA_INIT_POLICY;
+
+ for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) {
+ INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]);
+ INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]);
+ }
+
+ jsdd->runpool_irq.secure_mode = false;
+ if (kbdev->secure_ops) {
+ /* Make sure secure mode is disabled */
+ kbdev->secure_ops->secure_mode_disable(kbdev);
+ }
+
+ /* On error, do no cleanup; this will be handled by the caller(s), since
+ * we've designed this resource to be safe to terminate on init-fail */
+ if (jsdd->init_status != JS_DEVDATA_INIT_ALL)
+ return -EINVAL;
+
+ return 0;
+}
+
+void kbasep_js_devdata_halt(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+void kbasep_js_devdata_term(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_devdata;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ js_devdata = &kbdev->js_data;
+
+ if ((js_devdata->init_status & JS_DEVDATA_INIT_CONSTANTS)) {
+ s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, };
+ /* The caller must de-register all contexts before calling this
+ */
+ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0);
+ KBASE_DEBUG_ASSERT(memcmp(
+ js_devdata->runpool_irq.ctx_attr_ref_count,
+ zero_ctx_attr_ref_count,
+ sizeof(zero_ctx_attr_ref_count)) == 0);
+ CSTD_UNUSED(zero_ctx_attr_ref_count);
+ }
+ if ((js_devdata->init_status & JS_DEVDATA_INIT_POLICY))
+ kbasep_js_policy_term(&js_devdata->policy);
+
+ js_devdata->init_status = JS_DEVDATA_INIT_NONE;
+}
+
+int kbasep_js_kctx_init(struct kbase_context * const kctx)
+{
+ struct kbase_device *kbdev;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ int err;
+ int i;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ kbdev = kctx->kbdev;
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
+ INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
+
+ js_kctx_info = &kctx->jctx.sched_info;
+ KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE);
+
+ js_kctx_info->ctx.nr_jobs = 0;
+ atomic_set(&js_kctx_info->ctx.fault_count, 0);
+ js_kctx_info->ctx.is_scheduled = false;
+ js_kctx_info->ctx.is_dying = false;
+ memset(js_kctx_info->ctx.ctx_attr_ref_count, 0,
+ sizeof(js_kctx_info->ctx.ctx_attr_ref_count));
+
+ /* Initially, the context is disabled from submission until the create
+ * flags are set */
+ js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED;
+
+ js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS;
+
+ /* On error, we could continue on: providing none of the below resources
+ * rely on the ones above */
+ mutex_init(&js_kctx_info->ctx.jsctx_mutex);
+
+ init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait);
+
+ err = kbasep_js_policy_init_ctx(kbdev, kctx);
+ if (!err)
+ js_kctx_info->init_status |= JS_KCTX_INIT_POLICY;
+
+ /* On error, do no cleanup; this will be handled by the caller(s), since
+ * we've designed this resource to be safe to terminate on init-fail */
+ if (js_kctx_info->init_status != JS_KCTX_INIT_ALL)
+ return -EINVAL;
+
+ return 0;
+}
+
+void kbasep_js_kctx_term(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ union kbasep_js_policy *js_policy;
+ int js;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ kbdev = kctx->kbdev;
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ js_policy = &kbdev->js_data.policy;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) {
+ /* The caller must de-register all jobs before calling this */
+ KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0);
+ }
+
+ mutex_lock(&kbdev->js_data.queue_mutex);
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+ mutex_unlock(&kbdev->js_data.queue_mutex);
+
+ if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY))
+ kbasep_js_policy_term_ctx(js_policy, kctx);
+
+ js_kctx_info->init_status = JS_KCTX_INIT_NONE;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable - Add context to the tail of the per-slot
+ * pullable context queue
+ * @kbdev: Device pointer
+ * @kctx: Context to add to queue
+ * @js: Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the tail.
+ *
+ * This function should be used when queueing a context for the first time, or
+ * re-queueing a context that has been pulled from.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ int js)
+{
+ bool ret = false;
+
+ lockdep_assert_held(&kbdev->js_data.queue_mutex);
+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+ if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+ list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+ &kbdev->js_data.ctx_list_pullable[js]);
+
+ if (!kctx->slots_pullable) {
+ kbdev->js_data.nr_contexts_pullable++;
+ ret = true;
+ if (!atomic_read(&kctx->atoms_pulled))
+ atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+ }
+ kctx->slots_pullable |= (1 << js);
+
+ return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_pullable_head - Add context to the head of the
+ * per-slot pullable context queue
+ * @kbdev: Device pointer
+ * @kctx: Context to add to queue
+ * @js: Job slot to use
+ *
+ * If the context is on either the pullable or unpullable queues, then it is
+ * removed before being added to the head.
+ *
+ * This function should be used when a context has been scheduled, but no jobs
+ * can currently be pulled from it.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ int js)
+{
+ bool ret = false;
+
+ lockdep_assert_held(&kbdev->js_data.queue_mutex);
+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+ if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+ list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+ &kbdev->js_data.ctx_list_pullable[js]);
+
+ if (!kctx->slots_pullable) {
+ kbdev->js_data.nr_contexts_pullable++;
+ ret = true;
+ if (!atomic_read(&kctx->atoms_pulled))
+ atomic_inc(&kbdev->js_data.nr_contexts_runnable);
+ }
+ kctx->slots_pullable |= (1 << js);
+
+ return ret;
+}
+
+/**
+ * kbase_js_ctx_list_add_unpullable - Add context to the tail of the per-slot
+ * unpullable context queue
+ * @kbdev: Device pointer
+ * @kctx: Context to add to queue
+ * @js: Job slot to use
+ *
+ * The context must already be on the per-slot pullable queue. It will be
+ * removed from the pullable queue before being added to the unpullable queue.
+ *
+ * This function should be used when a context has been pulled from, and there
+ * are no jobs remaining on the specified slot.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ int js)
+{
+ bool ret = false;
+
+ lockdep_assert_held(&kbdev->js_data.queue_mutex);
+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+ list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js],
+ &kbdev->js_data.ctx_list_unpullable[js]);
+
+ if (kctx->slots_pullable == (1 << js)) {
+ kbdev->js_data.nr_contexts_pullable--;
+ ret = true;
+ if (!atomic_read(&kctx->atoms_pulled))
+ atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+ }
+ kctx->slots_pullable &= ~(1 << js);
+
+ return ret;
+}
+
+/**
+ * kbase_js_ctx_list_remove - Remove context from the per-slot pullable or
+ * unpullable context queues
+ * @kbdev: Device pointer
+ * @kctx: Context to remove from queue
+ * @js: Job slot to use
+ *
+ * The context must already be on one of the queues.
+ *
+ * This function should be used when a context has no jobs on the GPU, and no
+ * jobs remaining for the specified slot.
+ *
+ * Caller must hold kbasep_jd_device_data.queue_mutex
+ *
+ * Return: true if caller should call kbase_backend_ctx_count_changed()
+ */
+static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ int js)
+{
+ bool ret = false;
+
+ lockdep_assert_held(&kbdev->js_data.queue_mutex);
+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+ WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]));
+
+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+ if (kctx->slots_pullable == (1 << js)) {
+ kbdev->js_data.nr_contexts_pullable--;
+ ret = true;
+ if (!atomic_read(&kctx->atoms_pulled))
+ atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+ }
+ kctx->slots_pullable &= ~(1 << js);
+
+ return ret;
+}
+
+/**
+ * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable
+ * queue.
+ * @kbdev: Device pointer
+ * @js: Job slot to use
+ *
+ * Caller must hold kbasep_jd_device_data::queue_mutex
+ *
+ * Return: Context to use for specified slot.
+ * NULL if no contexts present for specified slot
+ */
+static struct kbase_context *kbase_js_ctx_list_pop_head(
+ struct kbase_device *kbdev,
+ int js)
+{
+ struct kbase_context *kctx;
+
+ lockdep_assert_held(&kbdev->js_data.queue_mutex);
+
+ if (list_empty(&kbdev->js_data.ctx_list_pullable[js]))
+ return NULL;
+
+ kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next,
+ struct kbase_context,
+ jctx.sched_info.ctx.ctx_list_entry[js]);
+
+ list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+
+ return kctx;
+}
+
+/**
+ * kbase_js_ctx_pullable - Return if a context can be pulled from on the
+ * specified slot
+ * @kctx: Context pointer
+ * @js: Job slot to use
+ * @is_scheduled: true if the context is currently scheduled
+ *
+ * Caller must hold runpool_irq.lock
+ *
+ * Return: true if context can be pulled from on specified slot
+ * false otherwise
+ */
+static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js,
+ bool is_scheduled)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbase_jd_atom *katom;
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ js_devdata = &kctx->kbdev->js_data;
+
+ if (is_scheduled) {
+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+ return false;
+ }
+ katom = jsctx_rb_peek(kctx, js);
+ if (!katom)
+ return false; /* ringbuffer empty */
+ if (atomic_read(&katom->blocked))
+ return false; /* next atom blocked */
+ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+ if (katom->x_pre_dep->gpu_rb_state ==
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)
+ return false;
+ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+ kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+ return false;
+ }
+
+ return true;
+}
+
+static bool kbase_js_dep_validate(struct kbase_context *kctx,
+ struct kbase_jd_atom *katom)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ bool ret = true;
+ bool has_dep = false, has_x_dep = false;
+ int js = kbase_js_get_slot(kbdev, katom);
+ int prio = katom->sched_priority;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+ if (dep_atom) {
+ int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+ int dep_prio = dep_atom->sched_priority;
+
+ /* Dependent atom must already have been submitted */
+ if (!(dep_atom->atom_flags &
+ KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)) {
+ ret = false;
+ break;
+ }
+
+ /* Dependencies with different priorities can't
+ be represented in the ringbuffer */
+ if (prio != dep_prio) {
+ ret = false;
+ break;
+ }
+
+ if (js == dep_js) {
+ /* Only one same-slot dependency can be
+ * represented in the ringbuffer */
+ if (has_dep) {
+ ret = false;
+ break;
+ }
+ has_dep = true;
+ } else {
+ /* Only one cross-slot dependency can be
+ * represented in the ringbuffer */
+ if (has_x_dep) {
+ ret = false;
+ break;
+ }
+ /* Each dependee atom can only have one
+ * cross-slot dependency */
+ if (dep_atom->x_post_dep) {
+ ret = false;
+ break;
+ }
+ /* The dependee atom can not already be in the
+ * HW access ringbuffer */
+ if (dep_atom->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+ ret = false;
+ break;
+ }
+ /* The dependee atom can not already have
+ * completed */
+ if (dep_atom->status !=
+ KBASE_JD_ATOM_STATE_IN_JS) {
+ ret = false;
+ break;
+ }
+ /* Cross-slot dependencies must not violate
+ * PRLAM-8987 affinity restrictions */
+ if (kbase_hw_has_issue(kbdev,
+ BASE_HW_ISSUE_8987) &&
+ (js == 2 || dep_js == 2)) {
+ ret = false;
+ break;
+ }
+ has_x_dep = true;
+ }
+
+ if (kbase_jd_katom_dep_type(&katom->dep[i]) ==
+ BASE_JD_DEP_TYPE_DATA &&
+ js == dep_js) {
+ struct kbase_jd_atom *last_atom =
+ jsctx_rb_peek_last(kctx, js,
+ prio);
+
+ /* Last atom on slot must be pre-dep for this
+ * atom */
+ if (last_atom != dep_atom) {
+ ret = false;
+ break;
+ }
+ }
+
+ /* Dependency can be represented in ringbuffers */
+ }
+ }
+
+ /* If dependencies can be represented by ringbuffer then clear them from
+ * atom structure */
+ if (ret) {
+ for (i = 0; i < 2; i++) {
+ struct kbase_jd_atom *dep_atom = katom->dep[i].atom;
+
+ if (dep_atom) {
+ int dep_js = kbase_js_get_slot(kbdev, dep_atom);
+
+ if ((js != dep_js) &&
+ (dep_atom->status !=
+ KBASE_JD_ATOM_STATE_COMPLETED)
+ && (dep_atom->status !=
+ KBASE_JD_ATOM_STATE_HW_COMPLETED)
+ && (dep_atom->status !=
+ KBASE_JD_ATOM_STATE_UNUSED)) {
+
+ katom->atom_flags |=
+ KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+ katom->x_pre_dep = dep_atom;
+ dep_atom->x_post_dep = katom;
+ if (kbase_jd_katom_dep_type(
+ &katom->dep[i]) ==
+ BASE_JD_DEP_TYPE_DATA)
+ katom->atom_flags |=
+ KBASE_KATOM_FLAG_FAIL_BLOCKER;
+ }
+ if ((kbase_jd_katom_dep_type(&katom->dep[i])
+ == BASE_JD_DEP_TYPE_DATA) &&
+ (js == dep_js))
+ katom->atom_flags |=
+ KBASE_KATOM_FLAG_FAIL_PREV;
+
+ list_del(&katom->dep_item[i]);
+ kbase_jd_katom_dep_clear(&katom->dep[i]);
+ }
+ }
+ }
+
+ return ret;
+}
+
+bool kbasep_js_add_job(struct kbase_context *kctx,
+ struct kbase_jd_atom *atom)
+{
+ unsigned long flags;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ struct kbase_device *kbdev;
+ struct kbasep_js_device_data *js_devdata;
+ union kbasep_js_policy *js_policy;
+
+ bool enqueue_required = false;
+ bool timer_sync = false;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(atom != NULL);
+ lockdep_assert_held(&kctx->jctx.lock);
+
+ kbdev = kctx->kbdev;
+ js_devdata = &kbdev->js_data;
+ js_policy = &kbdev->js_data.policy;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ mutex_lock(&js_devdata->queue_mutex);
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+ /*
+ * Begin Runpool transaction
+ */
+ mutex_lock(&js_devdata->runpool_mutex);
+
+ /* Refcount ctx.nr_jobs */
+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX);
+ ++(js_kctx_info->ctx.nr_jobs);
+
+ /* Setup any scheduling information */
+ kbasep_js_clear_job_retry_submit(atom);
+
+ /* Lock for state available during IRQ */
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ if (!kbase_js_dep_validate(kctx, atom)) {
+ /* Dependencies could not be represented */
+ --(js_kctx_info->ctx.nr_jobs);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ goto out_unlock;
+ }
+
+ KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom));
+
+ if (kbase_js_dep_resolved_submit(kctx, atom, &enqueue_required) != 0) {
+ /* Ringbuffer was full (should be impossible) - fail the job */
+ --(js_kctx_info->ctx.nr_jobs);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ atom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+ goto out_unlock;
+ }
+
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc,
+ kbasep_js_trace_get_refcnt_nolock(kbdev, kctx));
+
+ /* Context Attribute Refcounting */
+ kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom);
+
+ if (enqueue_required)
+ timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx,
+ atom->slot_nr);
+
+ /* If this context is active and the atom is the first on its slot,
+ * kick the job manager to attempt to fast-start the atom */
+ if (enqueue_required && kctx == kbdev->hwaccess.active_kctx)
+ kbase_jm_try_kick(kbdev, 1 << atom->slot_nr);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ if (timer_sync)
+ kbase_backend_ctx_count_changed(kbdev);
+ mutex_unlock(&js_devdata->runpool_mutex);
+ /* End runpool transaction */
+
+ if (!js_kctx_info->ctx.is_scheduled) {
+ if (js_kctx_info->ctx.is_dying) {
+ /* A job got added while/after kbase_job_zap_context()
+ * was called on a non-scheduled context (e.g. KDS
+ * dependency resolved). Kill that job by killing the
+ * context. */
+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx,
+ false);
+ } else if (js_kctx_info->ctx.nr_jobs == 1) {
+ /* Handle Refcount going from 0 to 1: schedule the
+ * context on the Policy Queue */
+ KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+ dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx);
+
+ /* Policy Queue was updated - caller must try to
+ * schedule the head context */
+ WARN_ON(!enqueue_required);
+ }
+ }
+out_unlock:
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+ mutex_unlock(&js_devdata->queue_mutex);
+
+ return enqueue_required;
+}
+
+void kbasep_js_remove_job(struct kbase_device *kbdev,
+ struct kbase_context *kctx, struct kbase_jd_atom *atom)
+{
+ struct kbasep_js_kctx_info *js_kctx_info;
+ struct kbasep_js_device_data *js_devdata;
+ union kbasep_js_policy *js_policy;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(atom != NULL);
+
+ js_devdata = &kbdev->js_data;
+ js_policy = &kbdev->js_data.policy;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc,
+ kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+ /* De-refcount ctx.nr_jobs */
+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0);
+ --(js_kctx_info->ctx.nr_jobs);
+}
+
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+ struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ unsigned long flags;
+ struct kbasep_js_atom_retained_state katom_retained_state;
+ struct kbasep_js_device_data *js_devdata;
+ bool attr_state_changed;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(katom != NULL);
+
+ js_devdata = &kbdev->js_data;
+
+ kbasep_js_atom_retained_state_copy(&katom_retained_state, katom);
+ kbasep_js_remove_job(kbdev, kctx, katom);
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ /* The atom has 'finished' (will not be re-run), so no need to call
+ * kbasep_js_has_atom_finished().
+ *
+ * This is because it returns false for soft-stopped atoms, but we
+ * want to override that, because we're cancelling an atom regardless of
+ * whether it was soft-stopped or not */
+ attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx,
+ &katom_retained_state);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return attr_state_changed;
+}
+
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ unsigned long flags;
+ struct kbasep_js_device_data *js_devdata;
+ bool result;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ js_devdata = &kbdev->js_data;
+
+ /* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0,
+ kbasep_js_trace_get_refcnt(kbdev, kctx)); */
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return result;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev,
+ int as_nr)
+{
+ unsigned long flags;
+ struct kbasep_js_device_data *js_devdata;
+ struct kbase_context *found_kctx = NULL;
+ struct kbasep_js_per_as_data *js_per_as_data;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+ js_devdata = &kbdev->js_data;
+ js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ found_kctx = js_per_as_data->kctx;
+
+ if (found_kctx != NULL)
+ ++(js_per_as_data->as_busy_refcount);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return found_kctx;
+}
+
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+ struct kbase_device *kbdev, int as_nr)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbase_context *found_kctx = NULL;
+ struct kbasep_js_per_as_data *js_per_as_data;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ js_devdata = &kbdev->js_data;
+ js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+ found_kctx = js_per_as_data->kctx;
+
+ if (found_kctx != NULL)
+ ++(js_per_as_data->as_busy_refcount);
+
+ return found_kctx;
+}
+
+/**
+ * kbasep_js_release_result - Try running more jobs after releasing a context
+ * and/or atom
+ *
+ * @kbdev: The kbase_device to operate on
+ * @kctx: The kbase_context to operate on
+ * @katom_retained_state: Retained state from the atom
+ * @runpool_ctx_attr_change: True if the runpool context attributes have changed
+ *
+ * This collates a set of actions that must happen whilst
+ * kbasep_js_device_data.runpool_irq.lock is held.
+ *
+ * This includes running more jobs when:
+ * - The previously released kctx caused a ctx attribute change,
+ * - The released atom caused a ctx attribute change,
+ * - Slots were previously blocked due to affinity restrictions,
+ * - Submission during IRQ handling failed.
+ *
+ * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were
+ * changed. The caller should try scheduling all contexts
+ */
+static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release(
+ struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ struct kbasep_js_atom_retained_state *katom_retained_state,
+ bool runpool_ctx_attr_change)
+{
+ struct kbasep_js_device_data *js_devdata;
+ kbasep_js_release_result result = 0;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(katom_retained_state != NULL);
+ js_devdata = &kbdev->js_data;
+
+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+ lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+ if (js_devdata->nr_user_contexts_running != 0) {
+ bool retry_submit = false;
+ int retry_jobslot;
+
+ if (katom_retained_state)
+ retry_submit = kbasep_js_get_atom_retry_submit_slot(
+ katom_retained_state, &retry_jobslot);
+
+ if (runpool_ctx_attr_change || retry_submit) {
+ /* A change in runpool ctx attributes might mean we can
+ * run more jobs than before */
+ result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL;
+
+ KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB,
+ kctx, NULL, 0u, retry_jobslot);
+ }
+ }
+ return result;
+}
+
+/*
+ * Internal function to release the reference on a ctx and an atom's "retained
+ * state", only taking the runpool and as transaction mutexes
+ *
+ * This also starts more jobs running in the case of an ctx-attribute state
+ * change
+ *
+ * This does none of the followup actions for scheduling:
+ * - It does not schedule in a new context
+ * - It does not requeue or handle dying contexts
+ *
+ * For those tasks, just call kbasep_js_runpool_release_ctx() instead
+ *
+ * Requires:
+ * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr
+ * - Context has a non-zero refcount
+ * - Caller holds js_kctx_info->ctx.jsctx_mutex
+ * - Caller holds js_devdata->runpool_mutex
+ */
+static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal(
+ struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+ unsigned long flags;
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ union kbasep_js_policy *js_policy;
+ struct kbasep_js_per_as_data *js_per_as_data;
+
+ kbasep_js_release_result release_result = 0u;
+ bool runpool_ctx_attr_change = false;
+ int kctx_as_nr;
+ struct kbase_as *current_as;
+ int new_ref_count;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ js_kctx_info = &kctx->jctx.sched_info;
+ js_devdata = &kbdev->js_data;
+ js_policy = &kbdev->js_data.policy;
+
+ /* Ensure context really is scheduled in */
+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled);
+
+ /* kctx->as_nr and js_per_as_data are only read from here. The caller's
+ * js_ctx_mutex provides a barrier that ensures they are up-to-date.
+ *
+ * They will not change whilst we're reading them, because the refcount
+ * is non-zero (and we ASSERT on that last fact).
+ */
+ kctx_as_nr = kctx->as_nr;
+ KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID);
+ js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr];
+ KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+ /*
+ * Transaction begins on AS and runpool_irq
+ *
+ * Assert about out calling contract
+ */
+ current_as = &kbdev->as[kctx_as_nr];
+ mutex_lock(&kbdev->pm.lock);
+ mutex_lock(&current_as->transaction_mutex);
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr);
+ KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0);
+
+ /* Update refcount */
+ new_ref_count = --(js_per_as_data->as_busy_refcount);
+
+ /* Release the atom if it finished (i.e. wasn't soft-stopped) */
+ if (kbasep_js_has_atom_finished(katom_retained_state))
+ runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom(
+ kbdev, kctx, katom_retained_state);
+
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u,
+ new_ref_count);
+
+ if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags &
+ KBASE_CTX_FLAG_PRIVILEGED &&
+ !kbase_pm_is_suspending(kbdev)) {
+ /* Context is kept scheduled into an address space even when
+ * there are no jobs, in this case we have to handle the
+ * situation where all jobs have been evicted from the GPU and
+ * submission is disabled.
+ *
+ * At this point we re-enable submission to allow further jobs
+ * to be executed
+ */
+ kbasep_js_set_submit_allowed(js_devdata, kctx);
+ }
+
+ /* Make a set of checks to see if the context should be scheduled out */
+ if (new_ref_count == 0 &&
+ (!kbasep_js_is_submit_allowed(js_devdata, kctx) ||
+ kbdev->pm.suspending)) {
+ /* Last reference, and we've been told to remove this context
+ * from the Run Pool */
+ dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d",
+ kctx, new_ref_count, js_kctx_info->ctx.nr_jobs,
+ kbasep_js_is_submit_allowed(js_devdata, kctx));
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+ kbase_trace_mali_mmu_as_released(kctx->as_nr);
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_tl_nret_gpu_ctx(kbdev, kctx);
+#endif
+
+ kbase_backend_release_ctx_irq(kbdev, kctx);
+
+ if (kbdev->hwaccess.active_kctx == kctx)
+ kbdev->hwaccess.active_kctx = NULL;
+
+ /* Ctx Attribute handling
+ *
+ * Releasing atoms attributes must either happen before this, or
+ * after 'is_scheduled' is changed, otherwise we double-decount
+ * the attributes */
+ runpool_ctx_attr_change |=
+ kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx);
+
+ /* Releasing the context and katom retained state can allow
+ * more jobs to run */
+ release_result |=
+ kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev,
+ kctx, katom_retained_state,
+ runpool_ctx_attr_change);
+
+ /*
+ * Transaction ends on AS and runpool_irq:
+ *
+ * By this point, the AS-related data is now clear and ready
+ * for re-use.
+ *
+ * Since releases only occur once for each previous successful
+ * retain, and no more retains are allowed on this context, no
+ * other thread will be operating in this
+ * code whilst we are
+ */
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ kbase_backend_release_ctx_noirq(kbdev, kctx);
+
+ mutex_unlock(&current_as->transaction_mutex);
+ mutex_unlock(&kbdev->pm.lock);
+
+ /* Note: Don't reuse kctx_as_nr now */
+
+ /* Synchronize with any policy timers */
+ kbase_backend_ctx_count_changed(kbdev);
+
+ /* update book-keeping info */
+ js_kctx_info->ctx.is_scheduled = false;
+ /* Signal any waiter that the context is not scheduled, so is
+ * safe for termination - once the jsctx_mutex is also dropped,
+ * and jobs have finished. */
+ wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+ /* Queue an action to occur after we've dropped the lock */
+ release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED;
+ } else {
+ kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx,
+ katom_retained_state, runpool_ctx_attr_change);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ mutex_unlock(&current_as->transaction_mutex);
+ mutex_unlock(&kbdev->pm.lock);
+ }
+
+ return release_result;
+}
+
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_atom_retained_state katom_retained_state;
+
+ /* Setup a dummy katom_retained_state */
+ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+ kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+ &katom_retained_state);
+}
+
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx, bool has_pm_ref)
+{
+ struct kbasep_js_device_data *js_devdata;
+ union kbasep_js_policy *js_policy;
+ struct kbasep_js_kctx_info *js_kctx_info;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ js_kctx_info = &kctx->jctx.sched_info;
+ js_policy = &kbdev->js_data.policy;
+ js_devdata = &kbdev->js_data;
+
+ /* This is called if and only if you've you've detached the context from
+ * the Runpool or the Policy Queue, and not added it back to the Runpool
+ */
+ KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled);
+
+ if (js_kctx_info->ctx.is_dying) {
+ /* Dying: don't requeue, but kill all jobs on the context. This
+ * happens asynchronously */
+ dev_dbg(kbdev->dev,
+ "JS: ** Killing Context %p on RunPool Remove **", kctx);
+ kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel);
+ }
+}
+
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(
+ struct kbase_device *kbdev, struct kbase_context *kctx,
+ struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ base_jd_event_code event_code;
+ kbasep_js_release_result release_result;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ js_kctx_info = &kctx->jctx.sched_info;
+ js_devdata = &kbdev->js_data;
+ event_code = katom_retained_state->event_code;
+
+ mutex_lock(&js_devdata->queue_mutex);
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_lock(&js_devdata->runpool_mutex);
+
+#if 2 == MALI_INSTRUMENTATION_LEVEL
+ /* When any fault is detected, stop the context from submitting more
+ * and add a refcount to prevent the context from being removed while
+ * the job core dump is undergoing in the user space. Once the dump
+ * finish, it will release the refcount of the context and allow it
+ * to be removed. The test conditions are to ensure this mechanism
+ * will be triggered only in the cases that cmarp_event_handler
+ * handles.
+ *
+ * Currently, cmar event handler only handles job exceptions and
+ * assert the cases where the event code of the atom does not
+ * belong to the MMU exceptions or GPU exceptions class. In order to
+ * perform dump on error in those cases, changes in cmar event handler
+ * need to be made.
+ */
+ if ((BASE_JD_EVENT_NOT_STARTED != event_code) &&
+ (BASE_JD_EVENT_STOPPED != event_code) &&
+ (BASE_JD_EVENT_ACTIVE != event_code) &&
+ (!((event_code >= BASE_JD_EVENT_RANGE_KERNEL_ONLY_START) &&
+ (event_code <= BASE_JD_EVENT_RANGE_KERNEL_ONLY_END))) &&
+ ((event_code & BASE_JD_SW_EVENT) ||
+ event_code <= BASE_JD_EVENT_UNKNOWN) &&
+ (BASE_JD_EVENT_DONE != event_code)) {
+ unsigned long flags;
+
+ atomic_inc(&js_kctx_info->ctx.fault_count);
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ kbasep_js_clear_submit_allowed(js_devdata, kctx);
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ kbasep_js_runpool_retain_ctx(kbdev, kctx);
+ }
+#endif /* 2 == MALI_INSTRUMENTATION_LEVEL */
+
+ release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+ katom_retained_state);
+
+ /* Drop the runpool mutex to allow requeing kctx */
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+ /* Drop the jsctx_mutex to allow scheduling in a new context */
+
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_unlock(&js_devdata->queue_mutex);
+
+ if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL)
+ kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_dump_fault_term(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ unsigned long flags;
+ struct kbasep_js_device_data *js_devdata;
+
+ js_devdata = &kbdev->js_data;
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ kbasep_js_set_submit_allowed(js_devdata, kctx);
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ kbasep_js_runpool_release_ctx(kbdev, kctx);
+ atomic_dec(&kctx->jctx.sched_info.ctx.fault_count);
+}
+
+
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_atom_retained_state katom_retained_state;
+
+ kbasep_js_atom_retained_state_init_invalid(&katom_retained_state);
+
+ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+ &katom_retained_state);
+}
+
+/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into
+ * kbase_js_sched_all() */
+static void kbasep_js_runpool_release_ctx_no_schedule(
+ struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ kbasep_js_release_result release_result;
+ struct kbasep_js_atom_retained_state katom_retained_state_struct;
+ struct kbasep_js_atom_retained_state *katom_retained_state =
+ &katom_retained_state_struct;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ js_kctx_info = &kctx->jctx.sched_info;
+ js_devdata = &kbdev->js_data;
+ kbasep_js_atom_retained_state_init_invalid(katom_retained_state);
+
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_lock(&js_devdata->runpool_mutex);
+
+ release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx,
+ katom_retained_state);
+
+ /* Drop the runpool mutex to allow requeing kctx */
+ mutex_unlock(&js_devdata->runpool_mutex);
+ if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u)
+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true);
+
+ /* Drop the jsctx_mutex to allow scheduling in a new context */
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+ /* NOTE: could return release_result if the caller would like to know
+ * whether it should schedule a new context, but currently no callers do
+ */
+}
+
+/**
+ * kbase_js_set_timeouts - update all JS timeouts with user specified data
+ * @kbdev: Device pointer
+ *
+ * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is
+ * set to a positive number then that becomes the new value used, if a timeout
+ * is negative then the default is set.
+ */
+static void kbase_js_set_timeouts(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_data = &kbdev->js_data;
+
+ if (kbdev->js_scheduling_period_ns < 0)
+ js_data->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS;
+ else if (kbdev->js_scheduling_period_ns > 0)
+ js_data->scheduling_period_ns = kbdev->js_scheduling_period_ns;
+
+ if (kbdev->js_soft_stop_ticks < 0)
+ js_data->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS;
+ else if (kbdev->js_soft_stop_ticks > 0)
+ js_data->soft_stop_ticks = kbdev->js_soft_stop_ticks;
+
+ if (kbdev->js_soft_stop_ticks_cl < 0)
+ js_data->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL;
+ else if (kbdev->js_soft_stop_ticks_cl > 0)
+ js_data->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl;
+
+ if (kbdev->js_hard_stop_ticks_ss < 0) {
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+ js_data->hard_stop_ticks_ss =
+ DEFAULT_JS_HARD_STOP_TICKS_SS_8408;
+ else
+ js_data->hard_stop_ticks_ss =
+ DEFAULT_JS_HARD_STOP_TICKS_SS;
+ } else if (kbdev->js_hard_stop_ticks_ss > 0) {
+ js_data->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss;
+ }
+
+ if (kbdev->js_hard_stop_ticks_cl < 0)
+ js_data->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL;
+ else if (kbdev->js_hard_stop_ticks_cl > 0)
+ js_data->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl;
+
+ if (kbdev->js_hard_stop_ticks_dumping < 0)
+ js_data->hard_stop_ticks_dumping =
+ DEFAULT_JS_HARD_STOP_TICKS_DUMPING;
+ else if (kbdev->js_hard_stop_ticks_dumping > 0)
+ js_data->hard_stop_ticks_dumping =
+ kbdev->js_hard_stop_ticks_dumping;
+
+ if (kbdev->js_reset_ticks_ss < 0) {
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408))
+ js_data->gpu_reset_ticks_ss =
+ DEFAULT_JS_RESET_TICKS_SS_8408;
+ else
+ js_data->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS;
+ } else if (kbdev->js_reset_ticks_ss > 0) {
+ js_data->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss;
+ }
+
+ if (kbdev->js_reset_ticks_cl < 0)
+ js_data->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL;
+ else if (kbdev->js_reset_ticks_cl > 0)
+ js_data->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl;
+
+ if (kbdev->js_reset_ticks_dumping < 0)
+ js_data->gpu_reset_ticks_dumping =
+ DEFAULT_JS_RESET_TICKS_DUMPING;
+ else if (kbdev->js_reset_ticks_dumping > 0)
+ js_data->gpu_reset_ticks_dumping =
+ kbdev->js_reset_ticks_dumping;
+}
+
+static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ union kbasep_js_policy *js_policy;
+ struct kbase_as *new_address_space = NULL;
+ unsigned long flags;
+ bool kctx_suspended = false;
+ int as_nr;
+
+ js_devdata = &kbdev->js_data;
+ js_policy = &kbdev->js_data.policy;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ /* Pick available address space for this context */
+ as_nr = kbase_backend_find_free_address_space(kbdev, kctx);
+
+ if (as_nr == KBASEP_AS_NR_INVALID)
+ return false; /* No address spaces currently available */
+
+ new_address_space = &kbdev->as[as_nr];
+
+ /*
+ * Atomic transaction on the Context and Run Pool begins
+ */
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_lock(&js_devdata->runpool_mutex);
+
+ /* Check to see if context is dying due to kbase_job_zap_context() */
+ if (js_kctx_info->ctx.is_dying) {
+ /* Roll back the transaction so far and return */
+ kbase_backend_release_free_address_space(kbdev, as_nr);
+
+ mutex_unlock(&js_devdata->runpool_mutex);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+ return false;
+ }
+
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL,
+ 0u,
+ kbasep_js_trace_get_refcnt(kbdev, kctx));
+
+ if (js_devdata->nr_user_contexts_running == 0 &&
+ kbdev->js_timeouts_updated) {
+ /* Only when there are no other contexts submitting jobs:
+ * Latch in run-time job scheduler timeouts that were set
+ * through js_timeouts sysfs file */
+ kbase_js_set_timeouts(kbdev);
+
+ kbdev->js_timeouts_updated = false;
+ }
+
+ js_kctx_info->ctx.is_scheduled = true;
+
+ mutex_lock(&new_address_space->transaction_mutex);
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ /* Assign context to previously chosen address space */
+ if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) {
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ mutex_unlock(&new_address_space->transaction_mutex);
+ /* If address space is not pending, then kbase_backend_use_ctx()
+ * failed. Roll back the transaction so far and return */
+ if (!kctx->as_pending) {
+ js_kctx_info->ctx.is_scheduled = false;
+
+ kbase_backend_release_free_address_space(kbdev, as_nr);
+ }
+
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ return false;
+ }
+
+ kbdev->hwaccess.active_kctx = kctx;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+ kbase_trace_mali_mmu_as_in_use(kctx->as_nr);
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_tl_ret_gpu_ctx(kbdev, kctx);
+#endif
+
+ /* Cause any future waiter-on-termination to wait until the context is
+ * descheduled */
+ wake_up(&js_kctx_info->ctx.is_scheduled_wait);
+
+ /* Re-check for suspending: a suspend could've occurred, and all the
+ * contexts could've been removed from the runpool before we took this
+ * lock. In this case, we don't want to allow this context to run jobs,
+ * we just want it out immediately.
+ *
+ * The DMB required to read the suspend flag was issued recently as part
+ * of the runpool_irq locking. If a suspend occurs *after* that lock was
+ * taken (i.e. this condition doesn't execute), then the
+ * kbasep_js_suspend() code will cleanup this context instead (by virtue
+ * of it being called strictly after the suspend flag is set, and will
+ * wait for this lock to drop) */
+ if (kbase_pm_is_suspending(kbdev)) {
+ /* Cause it to leave at some later point */
+ bool retained;
+
+ retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+ KBASE_DEBUG_ASSERT(retained);
+
+ kbasep_js_clear_submit_allowed(js_devdata, kctx);
+ kctx_suspended = true;
+ }
+
+ /* Transaction complete */
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ mutex_unlock(&new_address_space->transaction_mutex);
+
+ /* Synchronize with any policy timers */
+ kbase_backend_ctx_count_changed(kbdev);
+
+ mutex_unlock(&js_devdata->runpool_mutex);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ /* Note: after this point, the context could potentially get scheduled
+ * out immediately */
+
+ if (kctx_suspended) {
+ /* Finishing forcing out the context due to a suspend. Use a
+ * variant of kbasep_js_runpool_release_ctx() that doesn't
+ * schedule a new context, to prevent a risk of recursion back
+ * into this function */
+ kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx);
+ return false;
+ }
+ return true;
+}
+
+static bool kbase_js_use_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ if (kctx->as_pending) {
+ /* Context waiting for AS to be assigned */
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ return false;
+ }
+ if (kbase_backend_use_ctx_sched(kbdev, kctx)) {
+ /* Context already has ASID - mark as active */
+ kbdev->hwaccess.active_kctx = kctx;
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ return true; /* Context already scheduled */
+ }
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return kbasep_js_schedule_ctx(kbdev, kctx);
+}
+
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_kctx_info *js_kctx_info;
+ struct kbasep_js_device_data *js_devdata;
+ bool is_scheduled;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ js_devdata = &kbdev->js_data;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ /* This must never be attempted whilst suspending - i.e. it should only
+ * happen in response to a syscall from a user-space thread */
+ BUG_ON(kbase_pm_is_suspending(kbdev));
+
+ mutex_lock(&js_devdata->queue_mutex);
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+ /* Mark the context as privileged */
+ js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED;
+
+ is_scheduled = js_kctx_info->ctx.is_scheduled;
+ if (!is_scheduled) {
+ /* Add the context to the pullable list */
+ if (kbase_js_ctx_list_add_pullable(kbdev, kctx, 0))
+ kbase_js_sync_timers(kbdev);
+
+ /* Fast-starting requires the jsctx_mutex to be dropped,
+ * because it works on multiple ctxs */
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_unlock(&js_devdata->queue_mutex);
+
+ /* Try to schedule the context in */
+ kbase_js_sched_all(kbdev);
+
+ /* Wait for the context to be scheduled in */
+ wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait,
+ kctx->jctx.sched_info.ctx.is_scheduled);
+ } else {
+ /* Already scheduled in - We need to retain it to keep the
+ * corresponding address space */
+ kbasep_js_runpool_retain_ctx(kbdev, kctx);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_unlock(&js_devdata->queue_mutex);
+ }
+}
+
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_kctx_info *js_kctx_info;
+ bool pending;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ /* We don't need to use the address space anymore */
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED);
+ pending = kctx->as_pending;
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+
+ /* Release the context - it will be scheduled out if there is no
+ * pending job */
+ if (!pending)
+ kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+ kbase_js_sched_all(kbdev);
+}
+
+void kbasep_js_suspend(struct kbase_device *kbdev)
+{
+ unsigned long flags;
+ struct kbasep_js_device_data *js_devdata;
+ int i;
+ u16 retained = 0u;
+ int nr_privileged_ctx = 0;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+ KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev));
+ js_devdata = &kbdev->js_data;
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ /* Prevent all contexts from submitting */
+ js_devdata->runpool_irq.submit_allowed = 0;
+
+ /* Retain each of the contexts, so we can cause it to leave even if it
+ * had no refcount to begin with */
+ for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) {
+ struct kbasep_js_per_as_data *js_per_as_data =
+ &js_devdata->runpool_irq.per_as_data[i];
+ struct kbase_context *kctx = js_per_as_data->kctx;
+
+ retained = retained << 1;
+
+ if (kctx) {
+ ++(js_per_as_data->as_busy_refcount);
+ retained |= 1u;
+ /* We can only cope with up to 1 privileged context -
+ * the instrumented context. It'll be suspended by
+ * disabling instrumentation */
+ if (kctx->jctx.sched_info.ctx.flags &
+ KBASE_CTX_FLAG_PRIVILEGED)
+ KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1);
+ }
+ }
+ CSTD_UNUSED(nr_privileged_ctx);
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ /* De-ref the previous retain to ensure each context gets pulled out
+ * sometime later. */
+ for (i = 0;
+ i < BASE_MAX_NR_AS;
+ ++i, retained = retained >> 1) {
+ struct kbasep_js_per_as_data *js_per_as_data =
+ &js_devdata->runpool_irq.per_as_data[i];
+ struct kbase_context *kctx = js_per_as_data->kctx;
+
+ if (retained & 1u)
+ kbasep_js_runpool_release_ctx(kbdev, kctx);
+ }
+
+ /* Caller must wait for all Power Manager active references to be
+ * dropped */
+}
+
+void kbasep_js_resume(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_devdata;
+ int js;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+ js_devdata = &kbdev->js_data;
+ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+
+ mutex_lock(&js_devdata->queue_mutex);
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ struct kbase_context *kctx, *n;
+
+ list_for_each_entry_safe(kctx, n,
+ &kbdev->js_data.ctx_list_unpullable[js],
+ jctx.sched_info.ctx.ctx_list_entry[js]) {
+ struct kbasep_js_kctx_info *js_kctx_info;
+ unsigned long flags;
+ bool timer_sync = false;
+
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_lock(&js_devdata->runpool_mutex);
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ if (!js_kctx_info->ctx.is_scheduled &&
+ kbase_js_ctx_pullable(kctx, js, false))
+ timer_sync = kbase_js_ctx_list_add_pullable(
+ kbdev, kctx, js);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+ flags);
+ if (timer_sync)
+ kbase_backend_ctx_count_changed(kbdev);
+ mutex_unlock(&js_devdata->runpool_mutex);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ }
+ }
+ mutex_unlock(&js_devdata->queue_mutex);
+
+ /* Restart atom processing */
+ kbase_js_sched_all(kbdev);
+
+ /* JS Resume complete */
+}
+
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ if ((katom->core_req & BASE_JD_REQ_FS) &&
+ (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE |
+ BASE_JD_REQ_T)))
+ return false;
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) &&
+ (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) &&
+ (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T)))
+ return false;
+
+ return true;
+}
+
+static int kbase_js_get_slot(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom)
+{
+ if (katom->core_req & BASE_JD_REQ_FS)
+ return 0;
+
+ if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) {
+ if (katom->device_nr == 1 &&
+ kbdev->gpu_props.num_core_groups == 2)
+ return 2;
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
+ return 2;
+ }
+
+ return 1;
+}
+
+int kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+ struct kbase_jd_atom *katom,
+ bool *enqueue_required)
+{
+ katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom);
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ /* If slot will transition from unpullable to pullable then add to
+ * pullable list */
+ if (jsctx_rb_is_empty(kctx, katom->slot_nr)) {
+ *enqueue_required = true;
+ } else {
+ *enqueue_required = false;
+ /* Check if there are lower priority jobs to soft stop */
+ kbase_job_slot_ctx_priority_check_locked(kctx, katom);
+ }
+
+ /* Add atom to ring buffer. */
+ if (unlikely(jsctx_rb_add_atom(kctx, katom))) {
+ /* The ring buffer is full. This should be impossible as the
+ * job dispatcher can not submit enough atoms to exceed the
+ * ring buffer size. Fail the job.
+ */
+ WARN(1, "Job submit while JSCTX ringbuffer already full\n");
+ return -EINVAL;
+ }
+
+ katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+
+ return 0;
+}
+
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js)
+{
+ struct kbase_jd_atom *katom;
+ struct kbasep_js_device_data *js_devdata;
+ int pulled;
+
+ KBASE_DEBUG_ASSERT(kctx);
+
+ js_devdata = &kctx->kbdev->js_data;
+ lockdep_assert_held(&js_devdata->runpool_irq.lock);
+
+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+ return NULL;
+ if (kbase_pm_is_suspending(kctx->kbdev))
+ return NULL;
+
+ katom = jsctx_rb_peek(kctx, js);
+ if (!katom)
+ return NULL;
+
+ if (atomic_read(&katom->blocked))
+ return NULL;
+
+ /* Due to ordering restrictions when unpulling atoms on failure, we do
+ * not allow multiple runs of fail-dep atoms from the same context to be
+ * present on the same slot */
+ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) &&
+ atomic_read(&kctx->atoms_pulled_slot[js])) {
+ struct kbase_jd_atom *prev_atom =
+ kbase_backend_inspect_tail(kctx->kbdev, js);
+
+ if (prev_atom && prev_atom->kctx != kctx)
+ return NULL;
+ }
+
+ if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) {
+ if (katom->x_pre_dep->gpu_rb_state ==
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)
+ return NULL;
+ if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) &&
+ kbase_backend_nr_atoms_on_slot(kctx->kbdev, js))
+ return NULL;
+ }
+
+ kctx->pulled = true;
+ pulled = atomic_inc_return(&kctx->atoms_pulled);
+ if (pulled == 1 && !kctx->slots_pullable)
+ atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable);
+ atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]);
+ jsctx_rb_pull(kctx, katom);
+
+ kbasep_js_runpool_retain_ctx_nolock(kctx->kbdev, kctx);
+ katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+
+ katom->sched_info.cfs.ticks = 0;
+
+ return katom;
+}
+
+
+static void js_return_worker(struct work_struct *data)
+{
+ struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom,
+ work);
+ struct kbase_context *kctx = katom->kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+ struct kbasep_js_atom_retained_state retained_state;
+ int js = katom->slot_nr;
+ bool timer_sync = false;
+ bool context_idle = false;
+ unsigned long flags;
+
+ kbase_backend_complete_wq(kbdev, katom);
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316))
+ kbase_as_poking_timer_release_atom(kbdev, kctx, katom);
+
+ kbasep_js_atom_retained_state_copy(&retained_state, katom);
+
+ mutex_lock(&js_devdata->queue_mutex);
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+
+ atomic_dec(&kctx->atoms_pulled);
+ atomic_dec(&kctx->atoms_pulled_slot[js]);
+
+ atomic_dec(&katom->blocked);
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ if (!atomic_read(&kctx->atoms_pulled_slot[js]) &&
+ jsctx_rb_is_empty(kctx, js))
+ timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js);
+
+ if (!atomic_read(&kctx->atoms_pulled)) {
+ if (!kctx->slots_pullable)
+ atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+
+ if (kctx->as_nr != KBASEP_AS_NR_INVALID &&
+ !js_kctx_info->ctx.is_dying) {
+ int num_slots = kbdev->gpu_props.num_job_slots;
+ int slot;
+
+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx))
+ kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+ for (slot = 0; slot < num_slots; slot++) {
+ if (kbase_js_ctx_pullable(kctx, slot, true))
+ timer_sync |=
+ kbase_js_ctx_list_add_pullable(
+ kbdev, kctx, slot);
+ }
+ }
+
+ kbase_jm_idle_ctx(kbdev, kctx);
+
+ context_idle = true;
+ }
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ if (context_idle)
+ kbase_pm_context_idle(kbdev);
+
+ if (timer_sync)
+ kbase_js_sync_timers(kbdev);
+
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_unlock(&js_devdata->queue_mutex);
+
+ katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF;
+ kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx,
+ &retained_state);
+
+ kbase_js_sched_all(kbdev);
+}
+
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ jsctx_rb_unpull(kctx, katom);
+
+ WARN_ON(work_pending(&katom->work));
+
+ /* Block re-submission until workqueue has run */
+ atomic_inc(&katom->blocked);
+
+ kbase_job_check_leave_disjoint(kctx->kbdev, katom);
+
+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+ INIT_WORK(&katom->work, js_return_worker);
+ queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static bool kbase_js_evict_atom(struct kbase_context *kctx,
+ struct kbase_jd_atom *katom_evict,
+ struct kbase_jd_atom *start_katom,
+ struct kbase_jd_atom *head_katom,
+ struct list_head *evict_list,
+ struct jsctx_rb *rb, int idx)
+{
+ struct kbase_jd_atom *x_dep = katom_evict->x_post_dep;
+
+ if (!(katom_evict->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) &&
+ katom_evict != start_katom)
+ return false;
+
+ if (katom_evict->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) {
+ WARN_ON(katom_evict->gpu_rb_state !=
+ KBASE_ATOM_GPU_RB_RETURN_TO_JS);
+ WARN_ON(katom_evict->event_code != head_katom->event_code);
+
+ return false;
+ }
+
+ if (katom_evict->status == KBASE_JD_ATOM_STATE_HW_COMPLETED &&
+ katom_evict != head_katom)
+ return false;
+
+ /* Evict cross dependency if present */
+ if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)
+ && (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER))
+ list_add_tail(&x_dep->dep_item[0], evict_list);
+
+ /* If cross dependency is present and does not have a data dependency
+ * then unblock */
+ if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)
+ && !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER))
+ x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+
+ if (katom_evict != head_katom) {
+ rb->entries[idx & JSCTX_RB_MASK].atom_id =
+ KBASEP_ATOM_ID_INVALID;
+
+ katom_evict->event_code = head_katom->event_code;
+ katom_evict->atom_flags &=
+ ~KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED;
+
+ if (katom_evict->atom_flags & KBASE_KATOM_FLAG_HOLDING_CTX_REF)
+ kbase_jd_done(katom_evict, katom_evict->slot_nr, NULL,
+ 0);
+ else
+ kbase_jd_evict(kctx->kbdev, katom_evict);
+ }
+
+ return true;
+}
+
+/**
+ * kbase_js_evict_deps - Evict dependencies
+ * @kctx: Context pointer
+ * @head_katom: Pointer to the atom to evict
+ *
+ * Remove all post dependencies of an atom from the context ringbuffers.
+ *
+ * The original atom's event_code will be propogated to all dependent atoms.
+ *
+ * Context: Caller must hold both jctx and HW access locks
+ */
+static void kbase_js_evict_deps(struct kbase_context *kctx,
+ struct kbase_jd_atom *head_katom)
+{
+ struct list_head evict_list;
+
+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ INIT_LIST_HEAD(&evict_list);
+
+ list_add_tail(&head_katom->dep_item[0], &evict_list);
+
+ while (!list_empty(&evict_list)) {
+ struct kbase_jd_atom *start_katom;
+
+ start_katom = list_entry(evict_list.prev, struct kbase_jd_atom,
+ dep_item[0]);
+ list_del(evict_list.prev);
+
+ jsctx_rb_evict(kctx, start_katom, head_katom, &evict_list);
+ }
+}
+
+/**
+ * kbase_js_compact - Compact JSCTX ringbuffers
+ * @kctx: Context pointer
+ *
+ * Compact the JSCTX ringbuffers, removing any NULL entries
+ *
+ * Context: Caller must hold both jctx and HW access locks
+ */
+static void kbase_js_compact(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ int js;
+
+ lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+ jsctx_rb_compact(kctx, js);
+}
+
+void kbase_js_complete_atom_wq(struct kbase_context *kctx,
+ struct kbase_jd_atom *katom)
+{
+ struct kbasep_js_kctx_info *js_kctx_info;
+ struct kbasep_js_device_data *js_devdata;
+ struct kbase_device *kbdev;
+ unsigned long flags;
+ bool timer_sync = false;
+ int atom_slot;
+ bool context_idle = false;
+
+ kbdev = kctx->kbdev;
+ atom_slot = katom->slot_nr;
+
+ js_kctx_info = &kctx->jctx.sched_info;
+ js_devdata = &kbdev->js_data;
+
+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+
+ mutex_lock(&js_devdata->runpool_mutex);
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) {
+ if (katom->event_code != BASE_JD_EVENT_DONE)
+ kbase_js_evict_deps(kctx, katom);
+
+ jsctx_rb_remove(kctx, katom);
+
+ context_idle = !atomic_dec_return(&kctx->atoms_pulled);
+ atomic_dec(&kctx->atoms_pulled_slot[atom_slot]);
+
+ if (!atomic_read(&kctx->atoms_pulled) && !kctx->slots_pullable)
+ atomic_dec(&kbdev->js_data.nr_contexts_runnable);
+
+ if (katom->event_code != BASE_JD_EVENT_DONE)
+ kbase_js_compact(kctx);
+ }
+
+ if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) &&
+ jsctx_rb_is_empty(kctx, atom_slot))
+ timer_sync |= kbase_js_ctx_list_remove(kctx->kbdev, kctx,
+ atom_slot);
+
+ /*
+ * If submission is disabled on this context (most likely due to an
+ * atom failure) and there are now no atoms left in the system then
+ * re-enable submission so that context can be scheduled again.
+ */
+ if (!kbasep_js_is_submit_allowed(js_devdata, kctx) &&
+ !atomic_read(&kctx->atoms_pulled) &&
+ !js_kctx_info->ctx.is_dying) {
+ int js;
+
+ kbasep_js_set_submit_allowed(js_devdata, kctx);
+
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ if (kbase_js_ctx_pullable(kctx, js, true))
+ timer_sync |= kbase_js_ctx_list_add_pullable(
+ kbdev, kctx, js);
+ }
+ } else if (katom->x_post_dep &&
+ kbasep_js_is_submit_allowed(js_devdata, kctx)) {
+ int js;
+
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ if (kbase_js_ctx_pullable(kctx, js, true))
+ timer_sync |= kbase_js_ctx_list_add_pullable(
+ kbdev, kctx, js);
+ }
+ }
+
+ if (context_idle)
+ kbase_jm_idle_ctx(kbdev, kctx);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ if (timer_sync)
+ kbase_backend_ctx_count_changed(kbdev);
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ if (context_idle)
+ kbase_pm_context_idle(kbdev);
+}
+
+void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp)
+{
+ u64 microseconds_spent = 0;
+ struct kbase_device *kbdev;
+ struct kbase_context *kctx = katom->kctx;
+ union kbasep_js_policy *js_policy;
+ struct kbasep_js_device_data *js_devdata;
+
+ kbdev = kctx->kbdev;
+
+ js_policy = &kbdev->js_data.policy;
+ js_devdata = &kbdev->js_data;
+
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+
+ katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED;
+
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+ kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP,
+ katom->slot_nr), NULL, 0);
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_tl_nret_atom_lpu(
+ katom,
+ &kbdev->gpu_props.props.raw_props.js_features[
+ katom->slot_nr]);
+#endif
+ /* Calculate the job's time used */
+ if (end_timestamp != NULL) {
+ /* Only calculating it for jobs that really run on the HW (e.g.
+ * removed from next jobs never actually ran, so really did take
+ * zero time) */
+ ktime_t tick_diff = ktime_sub(*end_timestamp,
+ katom->start_timestamp);
+
+ microseconds_spent = ktime_to_ns(tick_diff);
+
+ do_div(microseconds_spent, 1000);
+
+ /* Round up time spent to the minimum timer resolution */
+ if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US)
+ microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US;
+ }
+
+ /* Log the result of the job (completion status, and time spent). */
+ kbasep_js_policy_log_job_result(js_policy, katom, microseconds_spent);
+
+ kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0);
+
+ /* Unblock cross dependency if present */
+ if (katom->x_post_dep && (katom->event_code == BASE_JD_EVENT_DONE ||
+ !(katom->x_post_dep->atom_flags &
+ KBASE_KATOM_FLAG_FAIL_BLOCKER)))
+ katom->x_post_dep->atom_flags &=
+ ~KBASE_KATOM_FLAG_X_DEP_BLOCKED;
+}
+
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask)
+{
+ struct kbasep_js_device_data *js_devdata;
+ union kbasep_js_policy *js_policy;
+ bool timer_sync = false;
+
+ js_devdata = &kbdev->js_data;
+ js_policy = &js_devdata->policy;
+
+ down(&js_devdata->schedule_sem);
+ mutex_lock(&js_devdata->queue_mutex);
+
+ while (js_mask) {
+ int js;
+
+ js = ffs(js_mask) - 1;
+
+ while (1) {
+ struct kbase_context *kctx;
+ unsigned long flags;
+ bool context_idle = false;
+
+ kctx = kbase_js_ctx_list_pop_head(kbdev, js);
+
+ if (!kctx) {
+ js_mask &= ~(1 << js);
+ break; /* No contexts on pullable list */
+ }
+
+ if (!atomic_read(&kctx->atoms_pulled)) {
+ context_idle = true;
+
+ if (kbase_pm_context_active_handle_suspend(
+ kbdev,
+ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+ /* Suspend pending - return context to
+ * queue and stop scheduling */
+ mutex_lock(
+ &kctx->jctx.sched_info.ctx.jsctx_mutex);
+ if (kbase_js_ctx_list_add_pullable_head(
+ kctx->kbdev, kctx, js))
+ kbase_js_sync_timers(kbdev);
+ mutex_unlock(
+ &kctx->jctx.sched_info.ctx.jsctx_mutex);
+ mutex_unlock(&js_devdata->queue_mutex);
+ up(&js_devdata->schedule_sem);
+ return;
+ }
+ }
+
+ if (!kbase_js_use_ctx(kbdev, kctx)) {
+ mutex_lock(
+ &kctx->jctx.sched_info.ctx.jsctx_mutex);
+ /* Context can not be used at this time */
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock,
+ flags);
+ if (kctx->as_pending ||
+ kbase_js_ctx_pullable(kctx, js, false)
+ || (kctx->jctx.sched_info.ctx.flags &
+ KBASE_CTX_FLAG_PRIVILEGED))
+ timer_sync |=
+ kbase_js_ctx_list_add_pullable_head(
+ kctx->kbdev, kctx, js);
+ else
+ timer_sync |=
+ kbase_js_ctx_list_add_unpullable(
+ kctx->kbdev, kctx, js);
+ spin_unlock_irqrestore(
+ &js_devdata->runpool_irq.lock, flags);
+ mutex_unlock(
+ &kctx->jctx.sched_info.ctx.jsctx_mutex);
+ if (context_idle)
+ kbase_pm_context_idle(kbdev);
+
+ /* No more jobs can be submitted on this slot */
+ js_mask &= ~(1 << js);
+ break;
+ }
+ mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ kctx->pulled = false;
+
+ if (!kbase_jm_kick(kbdev, 1 << js))
+ /* No more jobs can be submitted on this slot */
+ js_mask &= ~(1 << js);
+
+ if (!kctx->pulled) {
+ /* Failed to pull jobs - push to head of list */
+ if (kbase_js_ctx_pullable(kctx, js, true))
+ timer_sync |=
+ kbase_js_ctx_list_add_pullable_head(
+ kctx->kbdev,
+ kctx, js);
+ else
+ timer_sync |=
+ kbase_js_ctx_list_add_unpullable(
+ kctx->kbdev,
+ kctx, js);
+
+ if (context_idle) {
+ kbase_jm_idle_ctx(kbdev, kctx);
+ spin_unlock_irqrestore(
+ &js_devdata->runpool_irq.lock,
+ flags);
+ kbase_pm_context_idle(kbdev);
+ } else {
+ spin_unlock_irqrestore(
+ &js_devdata->runpool_irq.lock,
+ flags);
+ }
+ mutex_unlock(
+ &kctx->jctx.sched_info.ctx.jsctx_mutex);
+
+ js_mask &= ~(1 << js);
+ break; /* Could not run atoms on this slot */
+ }
+
+ /* Push to back of list */
+ if (kbase_js_ctx_pullable(kctx, js, true))
+ timer_sync |= kbase_js_ctx_list_add_pullable(
+ kctx->kbdev, kctx, js);
+ else
+ timer_sync |= kbase_js_ctx_list_add_unpullable(
+ kctx->kbdev, kctx, js);
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock,
+ flags);
+ mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex);
+ }
+ }
+
+ if (timer_sync)
+ kbase_js_sync_timers(kbdev);
+
+ mutex_unlock(&js_devdata->queue_mutex);
+ up(&js_devdata->schedule_sem);
+}
+
+void kbase_js_zap_context(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info;
+ int js;
+
+ /*
+ * Critical assumption: No more submission is possible outside of the
+ * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs)
+ * whilst the struct kbase_context is terminating.
+ */
+
+ /* First, atomically do the following:
+ * - mark the context as dying
+ * - try to evict it from the policy queue */
+ mutex_lock(&js_devdata->queue_mutex);
+ mutex_lock(&js_kctx_info->ctx.jsctx_mutex);
+ js_kctx_info->ctx.is_dying = true;
+
+ dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx);
+
+ /*
+ * At this point we know:
+ * - If eviction succeeded, it was in the policy queue, but now no
+ * longer is
+ * - We must cancel the jobs here. No Power Manager active reference to
+ * release.
+ * - This happens asynchronously - kbase_jd_zap_context() will wait for
+ * those jobs to be killed.
+ * - If eviction failed, then it wasn't in the policy queue. It is one
+ * of the following:
+ * - a. it didn't have any jobs, and so is not in the Policy Queue or
+ * the Run Pool (not scheduled)
+ * - Hence, no more work required to cancel jobs. No Power Manager
+ * active reference to release.
+ * - b. it was in the middle of a scheduling transaction (and thus must
+ * have at least 1 job). This can happen from a syscall or a
+ * kernel thread. We still hold the jsctx_mutex, and so the thread
+ * must be waiting inside kbasep_js_try_schedule_head_ctx(),
+ * before checking whether the runpool is full. That thread will
+ * continue after we drop the mutex, and will notice the context
+ * is dying. It will rollback the transaction, killing all jobs at
+ * the same time. kbase_jd_zap_context() will wait for those jobs
+ * to be killed.
+ * - Hence, no more work required to cancel jobs, or to release the
+ * Power Manager active reference.
+ * - c. it is scheduled, and may or may not be running jobs
+ * - We must cause it to leave the runpool by stopping it from
+ * submitting any more jobs. When it finally does leave,
+ * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs
+ * (because it is dying), release the Power Manager active reference,
+ * and will not requeue the context in the policy queue.
+ * kbase_jd_zap_context() will wait for those jobs to be killed.
+ * - Hence, work required just to make it leave the runpool. Cancelling
+ * jobs and releasing the Power manager active reference will be
+ * handled when it leaves the runpool.
+ */
+ if (!js_kctx_info->ctx.is_scheduled) {
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
+ if (!list_empty(
+ &kctx->jctx.sched_info.ctx.ctx_list_entry[js]))
+ list_del_init(
+ &kctx->jctx.sched_info.ctx.ctx_list_entry[js]);
+ }
+
+ /* The following events require us to kill off remaining jobs
+ * and update PM book-keeping:
+ * - we evicted it correctly (it must have jobs to be in the
+ * Policy Queue)
+ *
+ * These events need no action, but take this path anyway:
+ * - Case a: it didn't have any jobs, and was never in the Queue
+ * - Case b: scheduling transaction will be partially rolled-
+ * back (this already cancels the jobs)
+ */
+
+ KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u,
+ js_kctx_info->ctx.is_scheduled);
+
+ dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx);
+
+ /* Only cancel jobs when we evicted from the policy
+ * queue. No Power Manager active reference was held.
+ *
+ * Having is_dying set ensures that this kills, and
+ * doesn't requeue */
+ kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false);
+
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_unlock(&js_devdata->queue_mutex);
+ } else {
+ unsigned long flags;
+ bool was_retained;
+
+ /* Case c: didn't evict, but it is scheduled - it's in the Run
+ * Pool */
+ KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u,
+ js_kctx_info->ctx.is_scheduled);
+ dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx);
+
+ /* Disable the ctx from submitting any more jobs */
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+ /* Retain and (later) release the context whilst it is is now
+ * disallowed from submitting jobs - ensures that someone
+ * somewhere will be removing the context later on */
+ was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx);
+
+ /* Since it's scheduled and we have the jsctx_mutex, it must be
+ * retained successfully */
+ KBASE_DEBUG_ASSERT(was_retained);
+
+ dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx);
+
+ /* Cancel any remaining running jobs for this kctx - if any.
+ * Submit is disallowed which takes effect immediately, so no
+ * more new jobs will appear after we do this. */
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+ kbase_job_slot_hardstop(kctx, js, NULL);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+ mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
+ mutex_unlock(&js_devdata->queue_mutex);
+
+ dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)",
+ kctx);
+
+ kbasep_js_runpool_release_ctx(kbdev, kctx);
+ }
+
+ KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u);
+
+ /* After this, you must wait on both the
+ * kbase_jd_context::zero_jobs_wait and the
+ * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs
+ * to be destroyed, and the context to be de-scheduled (if it was on the
+ * runpool).
+ *
+ * kbase_jd_zap_context() will do this. */
+}
+
+static inline int trace_get_refcnt(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ int as_nr;
+ int refcnt = 0;
+
+ js_devdata = &kbdev->js_data;
+
+ as_nr = kctx->as_nr;
+ if (as_nr != KBASEP_AS_NR_INVALID) {
+ struct kbasep_js_per_as_data *js_per_as_data;
+
+ js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+ refcnt = js_per_as_data->as_busy_refcount;
+ }
+
+ return refcnt;
+}
+
+/**
+ * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context
+ * @kctx: Pointer to context.
+ * @callback: Pointer to function to call for each job.
+ *
+ * Call a function on all jobs belonging to a non-queued, non-running
+ * context, and detach the jobs from the context as it goes.
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * Atoms will be removed from the queue, so this must only be called when
+ * cancelling jobs (which occurs as part of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+static void kbase_js_foreach_ctx_job(struct kbase_context *kctx,
+ kbasep_js_policy_ctx_job_cb callback)
+{
+ struct kbase_device *kbdev;
+ struct kbasep_js_device_data *js_devdata;
+ unsigned long flags;
+ u32 js;
+
+ kbdev = kctx->kbdev;
+
+ js_devdata = &kbdev->js_data;
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL,
+ 0u, trace_get_refcnt(kbdev, kctx));
+
+ /* Invoke callback on jobs on each slot in turn */
+ for (js = 0; js < kbdev->gpu_props.num_job_slots; js++)
+ jsctx_rb_foreach(kctx, js, callback);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h
new file mode 100644
index 00000000000..79e7705932f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js.h
@@ -0,0 +1,1054 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler APIs.
+ */
+
+#ifndef _KBASE_JS_H_
+#define _KBASE_JS_H_
+
+#include "mali_kbase_js_defs.h"
+#include "mali_kbase_js_policy.h"
+#include "mali_kbase_defs.h"
+#include "mali_kbase_debug.h"
+
+#include "mali_kbase_js_ctx_attr.h"
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js Job Scheduler Internal APIs
+ * @{
+ *
+ * These APIs are Internal to KBase and are available for use by the
+ * @ref kbase_js_policy "Job Scheduler Policy APIs"
+ */
+
+/**
+ * @brief Initialize the Job Scheduler
+ *
+ * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero
+ * initialized before passing to the kbasep_js_devdata_init() function. This is
+ * to give efficient error path code.
+ */
+int kbasep_js_devdata_init(struct kbase_device * const kbdev);
+
+/**
+ * @brief Halt the Job Scheduler.
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ *
+ */
+void kbasep_js_devdata_halt(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler
+ *
+ * It is safe to call this on \a kbdev even if it the kbasep_js_device_data
+ * sub-structure was never initialized/failed initialization, to give efficient
+ * error-path code.
+ *
+ * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must
+ * be zero initialized before passing to the kbasep_js_devdata_init()
+ * function. This is to give efficient error path code.
+ *
+ * It is a Programming Error to call this whilst there are still kbase_context
+ * structures registered with this scheduler.
+ */
+void kbasep_js_devdata_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler.
+ *
+ * This effectively registers a struct kbase_context with a Job Scheduler.
+ *
+ * It does not register any jobs owned by the struct kbase_context with the scheduler.
+ * Those must be separately registered by kbasep_js_add_job().
+ *
+ * The struct kbase_context must be zero intitialized before passing to the
+ * kbase_js_init() function. This is to give efficient error path code.
+ */
+int kbasep_js_kctx_init(struct kbase_context * const kctx);
+
+/**
+ * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler
+ *
+ * This effectively de-registers a struct kbase_context from its Job Scheduler
+ *
+ * It is safe to call this on a struct kbase_context that has never had or failed
+ * initialization of its jctx.sched_info member, to give efficient error-path
+ * code.
+ *
+ * For this to work, the struct kbase_context must be zero intitialized before passing
+ * to the kbase_js_init() function.
+ *
+ * It is a Programming Error to call this whilst there are still jobs
+ * registered with this context.
+ */
+void kbasep_js_kctx_term(struct kbase_context *kctx);
+
+/**
+ * @brief Add a job chain to the Job Scheduler, and take necessary actions to
+ * schedule the context/run the job.
+ *
+ * This atomically does the following:
+ * - Update the numbers of jobs information
+ * - Add the job to the run pool if necessary (part of init_job)
+ *
+ * Once this is done, then an appropriate action is taken:
+ * - If the ctx is scheduled, it attempts to start the next job (which might be
+ * this added job)
+ * - Otherwise, and if this is the first job on the context, it enqueues it on
+ * the Policy Queue
+ *
+ * The Policy's Queue can be updated by this in the following ways:
+ * - In the above case that this is the first job on the context
+ * - If the context is high priority and the context is not scheduled, then it
+ * could cause the Policy to schedule out a low-priority context, allowing
+ * this context to be scheduled in.
+ *
+ * If the context is already scheduled on the RunPool, then adding a job to it
+ * is guarenteed not to update the Policy Queue. And so, the caller is
+ * guarenteed to not need to try scheduling a context from the Run Pool - it
+ * can safely assert that the result is false.
+ *
+ * It is a programming error to have more than U32_MAX jobs in flight at a time.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ *
+ * @return true indicates that the Policy Queue was updated, and so the
+ * caller will need to try scheduling a context onto the Run Pool.
+ * @return false indicates that no updates were made to the Policy Queue,
+ * so no further action is required from the caller. This is \b always returned
+ * when the context is currently scheduled.
+ */
+bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'.
+ *
+ * Completely removing a job requires several calls:
+ * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
+ * the atom
+ * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler
+ * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the
+ * remaining state held as part of the job having been run.
+ *
+ * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions.
+ *
+ * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ * - it has not been removed with kbasep_js_policy_dequeue_job()
+ * - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * Do not use this for removing jobs being killed by kbase_jd_cancel() - use
+ * kbasep_js_remove_cancelled_job() instead.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ *
+ */
+void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom);
+
+/**
+ * @brief Completely remove a job chain from the Job Scheduler, in the case
+ * where the job chain was cancelled.
+ *
+ * This is a variant of kbasep_js_remove_job() that takes care of removing all
+ * of the retained state too. This is generally useful for cancelled atoms,
+ * which need not be handled in an optimal way.
+ *
+ * It is a programming error to call this when:
+ * - \a atom is not a job belonging to kctx.
+ * - \a atom has already been removed from the Job Scheduler.
+ * - \a atom is still in the runpool:
+ * - it is not being killed with kbasep_jd_cancel()
+ * - or, it has not been removed with kbasep_js_policy_dequeue_job()
+ * - or, it has not been removed with kbasep_js_policy_dequeue_job_irq()
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be
+ * obtained internally)
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be
+ * obtained internally)
+ *
+ * @return true indicates that ctx attributes have changed and the caller
+ * should call kbase_js_sched_all() to try to run more jobs
+ * @return false otherwise
+ */
+bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev,
+ struct kbase_context *kctx,
+ struct kbase_jd_atom *katom);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Refcount a context as being busy, preventing it from being scheduled
+ * out.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locks must be held by the caller:
+ * - kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return value != false if the retain succeeded, and the context will not be scheduled out.
+ * @return false if the retain failed (because the context is being/has been scheduled out).
+ */
+bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Lookup a context in the Run Pool based upon its current address space
+ * and ensure that is stays scheduled in.
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * @note This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally. If the runpool_irq::lock is already held, then
+ * the caller should use kbasep_js_runpool_lookup_ctx_nolock() instead.
+ *
+ * @return a valid struct kbase_context on success, which has been refcounted as being busy.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr);
+
+/**
+ * kbasep_js_runpool_lookup_ctx_nolock - Lookup a context in the Run Pool based
+ * upon its current address space and ensure that is stays scheduled in.
+ * @kbdev: Device pointer
+ * @as_nr: Address space to lookup
+ *
+ * The context is refcounted as being busy to prevent it from scheduling
+ * out. It must be released with kbasep_js_runpool_release_ctx() when it is no
+ * longer required to stay scheduled in.
+ *
+ * Note: This function can safely be called from IRQ context.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must the kbasep_js_device_data::runpoool_irq::lock.
+ *
+ * Return: a valid struct kbase_context on success, which has been refcounted as
+ * being busy.
+ * NULL on failure, indicating that no context was found in \a as_nr
+ */
+struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock(
+ struct kbase_device *kbdev, int as_nr);
+
+/**
+ * @brief Handling the requeuing/killing of a context that was evicted from the
+ * policy queue or runpool.
+ *
+ * This should be used whenever handing off a context that has been evicted
+ * from the policy queue or the runpool:
+ * - If the context is not dying and has jobs, it gets re-added to the policy
+ * queue
+ * - Otherwise, it is not added
+ *
+ * In addition, if the context is dying the jobs are killed asynchronously.
+ *
+ * In all cases, the Power Manager active reference is released
+ * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. \a
+ * has_pm_ref must be set to false whenever the context was not previously in
+ * the runpool and does not hold a Power Manager active refcount. Note that
+ * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an
+ * active refcount even though they weren't in the runpool.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ */
+void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref);
+
+/**
+ * @brief Release a refcount of a context being busy, allowing it to be
+ * scheduled out.
+ *
+ * When the refcount reaches zero and the context \em might be scheduled out
+ * (depending on whether the Scheudling Policy has deemed it so, or if it has run
+ * out of jobs).
+ *
+ * If the context does get scheduled out, then The following actions will be
+ * taken as part of deschduling a context:
+ * - For the context being descheduled:
+ * - If the context is in the processing of dying (all the jobs are being
+ * removed from it), then descheduling also kills off any jobs remaining in the
+ * context.
+ * - If the context is not dying, and any jobs remain after descheduling the
+ * context then it is re-enqueued to the Policy's Queue.
+ * - Otherwise, the context is still known to the scheduler, but remains absent
+ * from the Policy Queue until a job is next added to it.
+ * - In all descheduling cases, the Power Manager active reference (obtained
+ * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()).
+ *
+ * Whilst the context is being descheduled, this also handles actions that
+ * cause more atoms to be run:
+ * - Attempt submitting atoms when the Context Attributes on the Runpool have
+ * changed. This is because the context being scheduled out could mean that
+ * there are more opportunities to run atoms.
+ * - Attempt submitting to a slot that was previously blocked due to affinity
+ * restrictions. This is usually only necessary when releasing a context
+ * happens as part of completing a previous job, but is harmless nonetheless.
+ * - Attempt scheduling in a new context (if one is available), and if necessary,
+ * running a job from that new context.
+ *
+ * Unlike retaining a context in the runpool, this function \b cannot be called
+ * from IRQ context.
+ *
+ * It is a programming error to call this on a \a kctx that is not currently
+ * scheduled, or that already has a zero refcount.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be
+ * obtained internally)
+ *
+ */
+void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional
+ * actions from completing an atom.
+ *
+ * This is usually called as part of completing an atom and releasing the
+ * refcount on the context held by the atom.
+ *
+ * Therefore, the extra actions carried out are part of handling actions queued
+ * on a completed atom, namely:
+ * - Releasing the atom's context attributes
+ * - Retrying the submission on a particular slot, because we couldn't submit
+ * on that slot from an IRQ handler.
+ *
+ * The locking conditions of this function are the same as those for
+ * kbasep_js_runpool_release_ctx()
+ */
+void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+
+/**
+ * @brief Release the refcount of the context and allow further submission
+ * of the context after the dump on error in user space terminates.
+ *
+ * Before this function is called, when a fault happens the kernel should
+ * have disallowed the context from further submission of jobs and
+ * retained the context to avoid it from being removed. This function
+ * releases the refcount of the context and allow further submission of
+ * jobs.
+ *
+ * This function should only be called when "instr=2" during compile time.
+ */
+void kbasep_js_dump_fault_term(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Variant of kbase_js_runpool_release_ctx() that assumes that
+ * kbasep_js_device_data::runpool_mutex and
+ * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not
+ * attempt to schedule new contexts.
+ */
+void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
+ struct kbase_context *kctx);
+
+/**
+ * @brief Schedule in a privileged context
+ *
+ * This schedules a context in regardless of the context priority.
+ * If the runpool is full, a context will be forced out of the runpool and the function will wait
+ * for the new context to be scheduled in.
+ * The context will be kept scheduled in (and the corresponding address space reserved) until
+ * kbasep_js_release_privileged_ctx is called).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally).
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will
+ * be used internally.
+ *
+ */
+void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Release a privileged context, allowing it to be scheduled out.
+ *
+ * See kbasep_js_runpool_release_ctx for potential side effects.
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because
+ * it will be used internally.
+ * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be
+ * obtained internally)
+ * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally)
+ *
+ */
+void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Try to submit the next job on each slot
+ *
+ * The following locks may be used:
+ * - kbasep_js_device_data::runpool_mutex
+ * - kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_js_try_run_jobs(struct kbase_device *kbdev);
+
+/**
+ * @brief Suspend the job scheduler during a Power Management Suspend event.
+ *
+ * Causes all contexts to be removed from the runpool, and prevents any
+ * contexts from (re)entering the runpool.
+ *
+ * This does not handle suspending the one privileged context: the caller must
+ * instead do this by by suspending the GPU HW Counter Instrumentation.
+ *
+ * This will eventually cause all Power Management active references held by
+ * contexts on the runpool to be released, without running any more atoms.
+ *
+ * The caller must then wait for all Power Mangement active refcount to become
+ * zero before completing the suspend.
+ *
+ * The emptying mechanism may take some time to complete, since it can wait for
+ * jobs to complete naturally instead of forcing them to end quickly. However,
+ * this is bounded by the Job Scheduling Policy's Job Timeouts. Hence, this
+ * function is guaranteed to complete in a finite time whenever the Job
+ * Scheduling Policy implements Job Timeouts (such as those done by CFS).
+ */
+void kbasep_js_suspend(struct kbase_device *kbdev);
+
+/**
+ * @brief Resume the Job Scheduler after a Power Management Resume event.
+ *
+ * This restores the actions from kbasep_js_suspend():
+ * - Schedules contexts back into the runpool
+ * - Resumes running atoms on the GPU
+ */
+void kbasep_js_resume(struct kbase_device *kbdev);
+
+/**
+ * @brief Submit an atom to the job scheduler.
+ *
+ * The atom is enqueued on the context's ringbuffer. The caller must have
+ * ensured that all dependencies can be represented in the ringbuffer.
+ *
+ * Caller must hold jctx->lock
+ *
+ * @param[in] kctx Context pointer
+ * @param[in] atom Pointer to the atom to submit
+ *
+ * @return 0 if submit succeeded
+ * error code if the atom can not be submitted at this
+ * time, due to insufficient space in the ringbuffer, or dependencies
+ * that can not be represented.
+ */
+int kbase_js_dep_resolved_submit(struct kbase_context *kctx,
+ struct kbase_jd_atom *katom,
+ bool *enqueue_required);
+
+/**
+ * @brief Pull an atom from a context in the job scheduler for execution.
+ *
+ * The atom will not be removed from the ringbuffer at this stage.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx Context to pull from
+ * @param[in] js Job slot to pull from
+ * @return Pointer to an atom, or NULL if there are no atoms for this
+ * slot that can be currently run.
+ */
+struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js);
+
+/**
+ * @brief Return an atom to the job scheduler ringbuffer.
+ *
+ * An atom is 'unpulled' if execution is stopped but intended to be returned to
+ * later. The most common reason for this is that the atom has been
+ * soft-stopped.
+ *
+ * Note that if multiple atoms are to be 'unpulled', they must be returned in
+ * the reverse order to which they were originally pulled. It is a programming
+ * error to return atoms in any other order.
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] kctx Context pointer
+ * @param[in] atom Pointer to the atom to unpull
+ */
+void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom from jd_done_worker(), removing it from the job
+ * scheduler ringbuffer.
+ *
+ * If the atom failed then all dependee atoms marked for failure propagation
+ * will also fail.
+ *
+ * @param[in] kctx Context pointer
+ * @param[in] katom Pointer to the atom to complete
+ */
+void kbase_js_complete_atom_wq(struct kbase_context *kctx,
+ struct kbase_jd_atom *katom);
+
+/**
+ * @brief Complete an atom.
+ *
+ * Most of the work required to complete an atom will be performed by
+ * jd_done_worker().
+ *
+ * The HW access lock must be held when calling this function.
+ *
+ * @param[in] katom Pointer to the atom to complete
+ * @param[in] end_timestamp The time that the atom completed (may be NULL)
+ */
+void kbase_js_complete_atom(struct kbase_jd_atom *katom,
+ ktime_t *end_timestamp);
+
+/**
+ * @brief Submit atoms from all available contexts.
+ *
+ * This will attempt to submit as many jobs as possible to the provided job
+ * slots. It will exit when either all job slots are full, or all contexts have
+ * been used.
+ *
+ * @param[in] kbdev Device pointer
+ * @param[in] js_mask Mask of job slots to submit to
+ */
+void kbase_js_sched(struct kbase_device *kbdev, int js_mask);
+
+/**
+ * kbase_jd_zap_context - Attempt to deschedule a context that is being
+ * destroyed
+ * @kctx: Context pointer
+ *
+ * This will attempt to remove a context from any internal job scheduler queues
+ * and perform any other actions to ensure a context will not be submitted
+ * from.
+ *
+ * If the context is currently scheduled, then the caller must wait for all
+ * pending jobs to complete before taking any further action.
+ */
+void kbase_js_zap_context(struct kbase_context *kctx);
+
+/**
+ * @brief Validate an atom
+ *
+ * This will determine whether the atom can be scheduled onto the GPU. Atoms
+ * with invalid combinations of core requirements will be rejected.
+ *
+ * @param[in] kbdev Device pointer
+ * @param[in] katom Atom to validate
+ * @return true if atom is valid
+ * false otherwise
+ */
+bool kbase_js_is_atom_valid(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom);
+
+/*
+ * Helpers follow
+ */
+
+/**
+ * @brief Check that a context is allowed to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * As with any bool, never test the return value with true.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+ u16 test_bit;
+
+ /* Ensure context really is scheduled in */
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+ KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+ test_bit = (u16) (1u << kctx->as_nr);
+
+ return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit);
+}
+
+/**
+ * @brief Allow a context to submit jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+ u16 set_bit;
+
+ /* Ensure context really is scheduled in */
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+ KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+ set_bit = (u16) (1u << kctx->as_nr);
+
+ dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+ js_devdata->runpool_irq.submit_allowed |= set_bit;
+}
+
+/**
+ * @brief Prevent a context from submitting more jobs on this policy
+ *
+ * The purpose of this abstraction is to hide the underlying data size, and wrap up
+ * the long repeated line of code.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock.
+ */
+static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx)
+{
+ u16 clear_bit;
+ u16 clear_mask;
+
+ /* Ensure context really is scheduled in */
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+ KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled);
+
+ clear_bit = (u16) (1u << kctx->as_nr);
+ clear_mask = ~clear_bit;
+
+ dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr);
+
+ js_devdata->runpool_irq.submit_allowed &= clear_mask;
+}
+
+/**
+ * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom
+ */
+static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom)
+{
+ atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Mark a slot as requiring resubmission by carrying that information on a
+ * completing atom.
+ *
+ * @note This can ASSERT in debug builds if the submit slot has been set to
+ * something other than the current value for @a js. This is because you might
+ * be unintentionally stopping more jobs being submitted on the old submit
+ * slot, and that might cause a scheduling-hang.
+ *
+ * @note If you can guarantee that the atoms for the original slot will be
+ * submitted on some other slot, then call kbasep_js_clear_job_retry_submit()
+ * first to silence the ASSERT.
+ */
+static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js)
+{
+ KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS);
+ KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot ==
+ KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID)
+ || (atom->retry_submit_on_slot == js));
+
+ atom->retry_submit_on_slot = js;
+}
+
+/**
+ * Create an initial 'invalid' atom retained state, that requires no
+ * atom-related work to be done on releasing with
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state()
+ */
+static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state)
+{
+ retained_state->event_code = BASE_JD_EVENT_NOT_STARTED;
+ retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID;
+ retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID;
+}
+
+/**
+ * Copy atom state that can be made available after jd_done_nolock() is called
+ * on that atom.
+ */
+static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom)
+{
+ retained_state->event_code = katom->event_code;
+ retained_state->core_req = katom->core_req;
+ retained_state->retry_submit_on_slot = katom->retry_submit_on_slot;
+ retained_state->sched_priority = katom->sched_priority;
+ retained_state->device_nr = katom->device_nr;
+}
+
+/**
+ * @brief Determine whether an atom has finished (given its retained state),
+ * and so should be given back to userspace/removed from the system.
+ *
+ * Reasons for an atom not finishing include:
+ * - Being soft-stopped (and so, the atom should be resubmitted sometime later)
+ *
+ * @param[in] katom_retained_state the retained state of the atom to check
+ * @return false if the atom has not finished
+ * @return !=false if the atom has finished
+ */
+static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+ return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT);
+}
+
+/**
+ * @brief Determine whether a struct kbasep_js_atom_retained_state is valid
+ *
+ * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the
+ * code should just ignore it.
+ *
+ * @param[in] katom_retained_state the atom's retained state to check
+ * @return false if the retained state is invalid, and can be ignored
+ * @return !=false if the retained state is valid
+ */
+static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+ return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID);
+}
+
+static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res)
+{
+ int js = katom_retained_state->retry_submit_on_slot;
+
+ *res = js;
+ return (bool) (js >= 0);
+}
+
+#if KBASE_DEBUG_DISABLE_ASSERTS == 0
+/**
+ * Debug Check the refcount of a context. Only use within ASSERTs
+ *
+ * Obtains kbasep_js_device_data::runpool_irq::lock
+ *
+ * @return negative value if the context is not scheduled in
+ * @return current refcount of the context if it is scheduled in. The refcount
+ * is not guarenteed to be kept constant.
+ */
+static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ unsigned long flags;
+ struct kbasep_js_device_data *js_devdata;
+ int result = -1;
+ int as_nr;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ js_devdata = &kbdev->js_data;
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ as_nr = kctx->as_nr;
+ if (as_nr != KBASEP_AS_NR_INVALID)
+ result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount;
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return result;
+}
+#endif /* KBASE_DEBUG_DISABLE_ASSERTS == 0 */
+
+/**
+ * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the
+ * context is guarenteed to be already previously retained.
+ *
+ * It is a programming error to supply the \a as_nr of a context that has not
+ * been previously retained/has a busy refcount of zero. The only exception is
+ * when there is no ctx in \a as_nr (NULL returned).
+ *
+ * The following locking conditions are made on the caller:
+ * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because
+ * it will be used internally.
+ *
+ * @return a valid struct kbase_context on success, with a refcount that is guarenteed
+ * to be non-zero and unmodified by this function.
+ * @return NULL on failure, indicating that no context was found in \a as_nr
+ */
+static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr)
+{
+ unsigned long flags;
+ struct kbasep_js_device_data *js_devdata;
+ struct kbase_context *found_kctx;
+ struct kbasep_js_per_as_data *js_per_as_data;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS);
+ js_devdata = &kbdev->js_data;
+ js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+
+ found_kctx = js_per_as_data->kctx;
+ KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0);
+
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return found_kctx;
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: when you need the number of cycles to guarantee you won't wait for
+ * longer than 'us' time (you might have a shorter wait).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(struct kbase_device *kbdev, u32 us)
+{
+ u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+ KBASE_DEBUG_ASSERT(0 != gpu_freq);
+ return us * (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from time (us) to ticks of the gpu clock
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need the number of cycles to guarantee you'll wait at least
+ * 'us' amount of time (but you might wait longer).
+ */
+static inline u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(struct kbase_device *kbdev, u32 us)
+{
+ u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+ KBASE_DEBUG_ASSERT(0 != gpu_freq);
+ return us * (u32) (gpu_freq / 1000);
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the minimum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the worst-case wait that 'ticks' cycles will
+ * take (you guarantee that you won't wait any longer than this, but it may
+ * be shorter).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(struct kbase_device *kbdev, u32 ticks)
+{
+ u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min;
+
+ KBASE_DEBUG_ASSERT(0 != gpu_freq);
+ return ticks / gpu_freq * 1000;
+}
+
+/**
+ * This will provide a conversion from ticks of the gpu clock to time (us)
+ * based on the maximum available gpu frequency.
+ * This is usually good to compute best/worst case (where the use of current
+ * frequency is not valid due to DVFS).
+ * e.g.: When you need to know the best-case wait for 'tick' cycles (you
+ * guarantee to be waiting for at least this long, but it may be longer).
+ */
+static inline u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(struct kbase_device *kbdev, u32 ticks)
+{
+ u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max;
+
+ KBASE_DEBUG_ASSERT(0 != gpu_freq);
+ return ticks / gpu_freq * 1000;
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_inc_context_count(
+ struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ js_devdata = &kbdev->js_data;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+
+ /* Track total contexts */
+ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX);
+ ++(js_devdata->nr_all_contexts_running);
+
+ if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+ /* Track contexts that can submit jobs */
+ KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running <
+ S8_MAX);
+ ++(js_devdata->nr_user_contexts_running);
+ }
+}
+
+/*
+ * The following locking conditions are made on the caller:
+ * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - The caller must hold the kbasep_js_device_data::runpool_mutex
+ */
+static inline void kbase_js_runpool_dec_context_count(
+ struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ js_devdata = &kbdev->js_data;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex);
+ lockdep_assert_held(&js_devdata->runpool_mutex);
+
+ /* Track total contexts */
+ --(js_devdata->nr_all_contexts_running);
+ KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0);
+
+ if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) {
+ /* Track contexts that can submit jobs */
+ --(js_devdata->nr_user_contexts_running);
+ KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0);
+ }
+}
+
+
+/**
+ * @brief Submit atoms from all available contexts to all job slots.
+ *
+ * This will attempt to submit as many jobs as possible. It will exit when
+ * either all job slots are full, or all contexts have been used.
+ *
+ * @param[in] kbdev Device pointer
+ */
+static inline void kbase_js_sched_all(struct kbase_device *kbdev)
+{
+ kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1);
+}
+
+extern const int
+kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS];
+
+extern const base_jd_prio
+kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
+
+/**
+ * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio)
+ * to relative ordering
+ * @atom_prio: Priority ID to translate.
+ *
+ * Atom priority values for @ref base_jd_prio cannot be compared directly to
+ * find out which are higher or lower.
+ *
+ * This function will convert base_jd_prio values for successively lower
+ * priorities into a monotonically increasing sequence. That is, the lower the
+ * base_jd_prio priority, the higher the value produced by this function. This
+ * is in accordance with how the rest of the kernel treates priority.
+ *
+ * The mapping is 1:1 and the size of the valid input range is the same as the
+ * size of the valid output range, i.e.
+ * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS
+ *
+ * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions
+ *
+ * Return: On success: a value in the inclusive range
+ * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure:
+ * KBASE_JS_ATOM_SCHED_PRIO_INVALID
+ */
+static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio)
+{
+ if (atom_prio >= BASE_JD_NR_PRIO_LEVELS)
+ return KBASE_JS_ATOM_SCHED_PRIO_INVALID;
+
+ return kbasep_js_atom_priority_to_relative[atom_prio];
+}
+
+static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio)
+{
+ unsigned int prio_idx;
+
+ KBASE_DEBUG_ASSERT(0 <= sched_prio
+ && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT);
+
+ prio_idx = (unsigned int)sched_prio;
+
+ return kbasep_js_relative_priority_to_atom[prio_idx];
+}
+
+ /** @} *//* end group kbase_js */
+ /** @} *//* end group base_kbase_api */
+ /** @} *//* end group base_api */
+
+#endif /* _KBASE_JS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
new file mode 100644
index 00000000000..49266007935
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c
@@ -0,0 +1,310 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_config.h>
+
+/*
+ * Private functions follow
+ */
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, retain that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ bool runpool_state_changed = false;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+ js_devdata = &kbdev->js_data;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
+
+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+ KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX);
+ ++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+ if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) {
+ /* First refcount indicates a state change */
+ runpool_state_changed = true;
+ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute);
+ }
+ }
+
+ return runpool_state_changed;
+}
+
+/**
+ * @brief Check whether a ctx has a certain attribute, and if so, release that
+ * attribute on the runpool.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx is scheduled on the runpool
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_kctx_info *js_kctx_info;
+ bool runpool_state_changed = false;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+ js_devdata = &kbdev->js_data;
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false);
+
+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) {
+ KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0);
+ --(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]);
+
+ if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) {
+ /* Last de-refcount indicates a state change */
+ runpool_state_changed = true;
+ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute);
+ }
+ }
+
+ return runpool_state_changed;
+}
+
+/**
+ * @brief Retain a certain attribute on a ctx, also retaining it on the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+ struct kbasep_js_kctx_info *js_kctx_info;
+ bool runpool_state_changed = false;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+ BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX);
+
+ ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+ if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+ /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */
+ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute);
+ runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute);
+ }
+
+ return runpool_state_changed;
+}
+
+/*
+ * @brief Release a certain attribute on a ctx, also releasing it from the runpool
+ * if the context is scheduled.
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * This may allow the scheduler to submit more jobs than previously.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+ struct kbasep_js_kctx_info *js_kctx_info;
+ bool runpool_state_changed = false;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex));
+ KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0);
+
+ if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) {
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+ /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */
+ runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute);
+ KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute);
+ }
+
+ /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */
+ --(js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+
+ return runpool_state_changed;
+}
+
+/*
+ * More commonly used public functions
+ */
+
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ struct kbasep_js_kctx_info *js_kctx_info;
+ bool runpool_state_changed = false;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != false) {
+ /* This context never submits, so don't track any scheduling attributes */
+ return;
+ }
+
+ /* Transfer attributes held in the context flags for contexts that have submit enabled */
+
+ if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_HINT_ONLY_COMPUTE) != false) {
+ /* Compute context */
+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+ }
+ /* NOTE: Whether this is a non-compute context depends on the jobs being
+ * run, e.g. it might be submitting jobs with BASE_JD_REQ_ONLY_COMPUTE */
+
+ /* ... More attributes can be added here ... */
+
+ /* The context should not have been scheduled yet, so ASSERT if this caused
+ * runpool state changes (note that other threads *can't* affect the value
+ * of runpool_state_changed, due to how it's calculated) */
+ KBASE_DEBUG_ASSERT(runpool_state_changed == false);
+ CSTD_UNUSED(runpool_state_changed);
+}
+
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ bool runpool_state_changed;
+ int i;
+
+ /* Retain any existing attributes */
+ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+ /* The context is being scheduled in, so update the runpool with the new attributes */
+ runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+
+ /* We don't need to know about state changed, because retaining a
+ * context occurs on scheduling it, and that itself will also try
+ * to run new atoms */
+ CSTD_UNUSED(runpool_state_changed);
+ }
+ }
+}
+
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ bool runpool_state_changed = false;
+ int i;
+
+ /* Release any existing attributes */
+ for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) {
+ if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) {
+ /* The context is being scheduled out, so update the runpool on the removed attributes */
+ runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i);
+ }
+ }
+
+ return runpool_state_changed;
+}
+
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ bool runpool_state_changed = false;
+ base_jd_core_req core_req;
+
+ KBASE_DEBUG_ASSERT(katom);
+ core_req = katom->core_req;
+
+ if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+ else
+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+ if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+ /* Atom that can run on slot1 or slot2, and can use all cores */
+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+ }
+
+ /* We don't need to know about state changed, because retaining an
+ * atom occurs on adding it, and that itself will also try to run
+ * new atoms */
+ CSTD_UNUSED(runpool_state_changed);
+}
+
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state)
+{
+ bool runpool_state_changed = false;
+ base_jd_core_req core_req;
+
+ KBASE_DEBUG_ASSERT(katom_retained_state);
+ core_req = katom_retained_state->core_req;
+
+ /* No-op for invalid atoms */
+ if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false)
+ return false;
+
+ if (core_req & BASE_JD_REQ_ONLY_COMPUTE)
+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE);
+ else
+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE);
+
+ if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) {
+ /* Atom that can run on slot1 or slot2, and can use all cores */
+ runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES);
+ }
+
+ return runpool_state_changed;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
new file mode 100644
index 00000000000..ce9183326a5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h
@@ -0,0 +1,158 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_ctx_attr.h
+ * Job Scheduler Context Attribute APIs
+ */
+
+#ifndef _KBASE_JS_CTX_ATTR_H_
+#define _KBASE_JS_CTX_ATTR_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+
+/**
+ * Set the initial attributes of a context (when context create flags are set)
+ *
+ * Requires:
+ * - Hold the jsctx_mutex
+ */
+void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of a context
+ *
+ * This occurs on scheduling in the context on the runpool (but after
+ * is_scheduled is set)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ */
+void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Release all attributes of a context
+ *
+ * This occurs on scheduling out the context from the runpool (but before
+ * is_scheduled is cleared)
+ *
+ * Requires:
+ * - jsctx mutex
+ * - runpool_irq spinlock
+ * - ctx->is_scheduled is true
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * Retain all attributes of an atom
+ *
+ * This occurs on adding an atom to a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ */
+void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * Release all attributes of an atom, given its retained state.
+ *
+ * This occurs after (permanently) removing an atom from a context
+ *
+ * Requires:
+ * - jsctx mutex
+ * - If the context is scheduled, then runpool_irq spinlock must also be held
+ *
+ * This is a no-op when \a katom_retained_state is invalid.
+ *
+ * @return true indicates a change in ctx attributes state of the runpool.
+ * In this state, the scheduler might be able to submit more jobs than
+ * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
+ * or similar is called sometime later.
+ * @return false indicates no change in ctx attributes state of the runpool.
+ */
+bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+ struct kbasep_js_device_data *js_devdata;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+ js_devdata = &kbdev->js_data;
+
+ return js_devdata->runpool_irq.ctx_attr_ref_count[attribute];
+}
+
+/**
+ * Requires:
+ * - runpool_irq spinlock
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute)
+{
+ /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */
+ return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute);
+}
+
+/**
+ * Requires:
+ * - jsctx mutex
+ */
+static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
+{
+ struct kbasep_js_kctx_info *js_kctx_info;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT);
+ js_kctx_info = &kctx->jctx.sched_info;
+
+ /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */
+ return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]);
+}
+
+ /** @} *//* end group kbase_js */
+ /** @} *//* end group base_kbase_api */
+ /** @} *//* end group base_api */
+
+#endif /* _KBASE_JS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
new file mode 100644
index 00000000000..7532f9c9e71
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h
@@ -0,0 +1,517 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js.h
+ * Job Scheduler Type Definitions
+ */
+
+#ifndef _KBASE_JS_DEFS_H_
+#define _KBASE_JS_DEFS_H_
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js
+ * @{
+ */
+/* Forward decls */
+struct kbase_device;
+struct kbase_jd_atom;
+
+
+/* Types used by the policies must go here */
+enum {
+ /** Context will not submit any jobs */
+ KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0),
+
+ /** Set if the context uses an address space and should be kept scheduled in */
+ KBASE_CTX_FLAG_PRIVILEGED = (1u << 1),
+
+ /** Kernel-side equivalent of BASE_CONTEXT_HINT_ONLY_COMPUTE. Non-mutable after creation flags set */
+ KBASE_CTX_FLAG_HINT_ONLY_COMPUTE = (1u << 2)
+
+ /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */
+};
+
+typedef u32 kbase_context_flags;
+
+struct kbasep_atom_req {
+ base_jd_core_req core_req;
+ kbase_context_flags ctx_req;
+ u32 device_nr;
+};
+
+#include "mali_kbase_js_policy_cfs.h"
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy {
+ struct kbasep_js_policy_cfs cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_ctx_info {
+ struct kbasep_js_policy_cfs_ctx cfs;
+};
+
+/* Wrapper Interface - doxygen is elsewhere */
+union kbasep_js_policy_job_info {
+ struct kbasep_js_policy_cfs_job cfs;
+};
+
+
+/** Callback function run on all of a context's jobs registered with the Job
+ * Scheduler */
+typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Maximum number of jobs that can be submitted to a job slot whilst
+ * inside the IRQ handler.
+ *
+ * This is important because GPU NULL jobs can complete whilst the IRQ handler
+ * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
+ * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
+ */
+#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
+
+/**
+ * @brief the IRQ_THROTTLE time in microseconds
+ *
+ * This will be converted via the GPU's clock frequency into a cycle-count.
+ *
+ * @note we can make an estimate of the GPU's frequency by periodically
+ * sampling its CYCLE_COUNT register
+ */
+#define KBASE_JS_IRQ_THROTTLE_TIME_US 20
+
+/**
+ * @brief Context attributes
+ *
+ * Each context attribute can be thought of as a boolean value that caches some
+ * state information about either the runpool, or the context:
+ * - In the case of the runpool, it is a cache of "Do any contexts owned by
+ * the runpool have attribute X?"
+ * - In the case of a context, it is a cache of "Do any atoms owned by the
+ * context have attribute X?"
+ *
+ * The boolean value of the context attributes often affect scheduling
+ * decisions, such as affinities to use and job slots to use.
+ *
+ * To accomodate changes of state in the context, each attribute is refcounted
+ * in the context, and in the runpool for all running contexts. Specifically:
+ * - The runpool holds a refcount of how many contexts in the runpool have this
+ * attribute.
+ * - The context holds a refcount of how many atoms have this attribute.
+ *
+ * Examples of use:
+ * - Finding out when there are a mix of @ref BASE_CONTEXT_HINT_ONLY_COMPUTE
+ * and ! @ref BASE_CONTEXT_HINT_ONLY_COMPUTE contexts in the runpool
+ */
+enum kbasep_js_ctx_attr {
+ /** Attribute indicating a context that contains Compute jobs. That is,
+ * @ref BASE_CONTEXT_HINT_ONLY_COMPUTE is \b set and/or the context has jobs of type
+ * @ref BASE_JD_REQ_ONLY_COMPUTE
+ *
+ * @note A context can be both 'Compute' and 'Non Compute' if it contains
+ * both types of jobs.
+ */
+ KBASEP_JS_CTX_ATTR_COMPUTE,
+
+ /** Attribute indicating a context that contains Non-Compute jobs. That is,
+ * the context has some jobs that are \b not of type @ref
+ * BASE_JD_REQ_ONLY_COMPUTE. The context usually has
+ * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW
+ * workarounds in use in the Job Scheduling Policy.
+ *
+ * @note A context can be both 'Compute' and 'Non Compute' if it contains
+ * both types of jobs.
+ */
+ KBASEP_JS_CTX_ATTR_NON_COMPUTE,
+
+ /** Attribute indicating that a context contains compute-job atoms that
+ * aren't restricted to a coherent group, and can run on all cores.
+ *
+ * Specifically, this is when the atom's \a core_req satisfy:
+ * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
+ * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
+ *
+ * Such atoms could be blocked from running if one of the coherent groups
+ * is being used by another job slot, so tracking this context attribute
+ * allows us to prevent such situations.
+ *
+ * @note This doesn't take into account the 1-coregroup case, where all
+ * compute atoms would effectively be able to run on 'all cores', but
+ * contexts will still not always get marked with this attribute. Instead,
+ * it is the caller's responsibility to take into account the number of
+ * coregroups when interpreting this attribute.
+ *
+ * @note Whilst Tiler atoms are normally combined with
+ * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
+ * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
+ * enough to handle anyway.
+ */
+ KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
+
+ /** Must be the last in the enum */
+ KBASEP_JS_CTX_ATTR_COUNT
+};
+
+enum {
+ /** Bit indicating that new atom should be started because this atom completed */
+ KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
+ /** Bit indicating that the atom was evicted from the JS_NEXT registers */
+ KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
+};
+
+/** Combination of KBASE_JS_ATOM_DONE_<...> bits */
+typedef u32 kbasep_js_atom_done_code;
+
+/**
+ * Data used by the scheduler that is unique for each Address Space.
+ *
+ * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock
+ * must be held whilst accessing this data (inculding reads and atomic
+ * decisions based on the read).
+ */
+struct kbasep_js_per_as_data {
+ /**
+ * Ref count of whether this AS is busy, and must not be scheduled out
+ *
+ * When jobs are running this is always positive. However, it can still be
+ * positive when no jobs are running. If all you need is a heuristic to
+ * tell you whether jobs might be running, this should be sufficient.
+ */
+ int as_busy_refcount;
+
+ /** Pointer to the current context on this address space, or NULL for no context */
+ struct kbase_context *kctx;
+};
+
+/**
+ * @brief KBase Device Data Job Scheduler sub-structure
+ *
+ * This encapsulates the current context of the Job Scheduler on a particular
+ * device. This context is global to the device, and is not tied to any
+ * particular struct kbase_context running on the device.
+ *
+ * nr_contexts_running and as_free are optimized for packing together (by making
+ * them smaller types than u32). The operations on them should rarely involve
+ * masking. The use of signed types for arithmetic indicates to the compiler that
+ * the value will not rollover (which would be undefined behavior), and so under
+ * the Total License model, it is free to make optimizations based on that (i.e.
+ * to remove masking).
+ */
+struct kbasep_js_device_data {
+ /** Sub-structure to collect together Job Scheduling data used in IRQ context */
+ struct runpool_irq {
+ /**
+ * Lock for accessing Job Scheduling data used in IRQ context
+ *
+ * This lock must be held whenever this data is accessed (read, or
+ * write). Even for read-only access, memory barriers would be needed.
+ * In any case, it is likely that decisions based on only reading must
+ * also be atomic with respect to data held here and elsewhere in the
+ * Job Scheduler.
+ *
+ * This lock must also be held for accessing:
+ * - kbase_context::as_nr
+ * - kbase_device::jm_slots
+ * - Parts of the kbasep_js_policy, dependent on the policy (refer to
+ * the policy in question for more information)
+ * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to
+ * the policy in question for more information)
+ */
+ spinlock_t lock;
+
+ /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
+ * When bit 'N' is set in this, it indicates whether the context bound to address space
+ * 'N' (per_as_data[N].kctx) is allowed to submit jobs.
+ *
+ * It is placed here because it's much more memory efficient than having a u8 in
+ * struct kbasep_js_per_as_data to store this flag */
+ u16 submit_allowed;
+
+ /** Context Attributes:
+ * Each is large enough to hold a refcount of the number of contexts
+ * that can fit into the runpool. This is currently BASE_MAX_NR_AS
+ *
+ * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+ * the refcount. Hence, it's not worthwhile reducing this to
+ * bit-manipulation on u32s to save space (where in contrast, 4 bit
+ * sub-fields would be easy to do and would save space).
+ *
+ * Whilst this must not become negative, the sign bit is used for:
+ * - error detection in debug builds
+ * - Optimization: it is undefined for a signed int to overflow, and so
+ * the compiler can optimize for that never happening (thus, no masking
+ * is required on updating the variable) */
+ s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+ /** Data that is unique for each AS */
+ struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS];
+
+ /*
+ * Affinity management and tracking
+ */
+ /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
+ * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
+ u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
+ /** Refcount for each core owned by each slot. Used to generate the
+ * slot_affinities array of bitvectors
+ *
+ * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+ * because it is refcounted only when a job is definitely about to be
+ * submitted to a slot, and is de-refcounted immediately after a job
+ * finishes */
+ s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
+
+ /*
+ * true when GPU is put into secure mode
+ */
+ bool secure_mode;
+ } runpool_irq;
+
+ /**
+ * Run Pool mutex, for managing contexts within the runpool.
+ * Unless otherwise specified, you must hold this lock whilst accessing any
+ * members that follow
+ *
+ * In addition, this is used to access:
+ * - the kbasep_js_kctx_info::runpool substructure
+ */
+ struct mutex runpool_mutex;
+
+ /**
+ * Queue Lock, used to access the Policy's queue of contexts independently
+ * of the Run Pool.
+ *
+ * Of course, you don't need the Run Pool lock to access this.
+ */
+ struct mutex queue_mutex;
+
+ /**
+ * Scheduling semaphore. This must be held when calling
+ * kbase_jm_kick()
+ */
+ struct semaphore schedule_sem;
+
+ /**
+ * List of contexts that can currently be pulled from
+ */
+ struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
+ /**
+ * List of contexts that can not currently be pulled from, but have
+ * jobs currently running.
+ */
+ struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
+
+ u16 as_free; /**< Bitpattern of free Address Spaces */
+
+ /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
+ s8 nr_user_contexts_running;
+ /** Number of currently scheduled contexts (including ones that are not submitting jobs) */
+ s8 nr_all_contexts_running;
+
+ /**
+ * Policy-specific information.
+ *
+ * Refer to the structure defined by the current policy to determine which
+ * locks must be held when accessing this.
+ */
+ union kbasep_js_policy policy;
+
+ /** Core Requirements to match up with base_js_atom's core_req memeber
+ * @note This is a write-once member, and so no locking is required to read */
+ base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
+
+ u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */
+ u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */
+ u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */
+ u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */
+ u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */
+ u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
+ u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */
+ u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */
+ u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
+ u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */
+ u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */
+ u32 cfs_ctx_runtime_min_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */
+
+ /** List of suspended soft jobs */
+ struct list_head suspended_soft_jobs_list;
+
+#ifdef CONFIG_MALI_DEBUG
+ /* Support soft-stop on a single context */
+ bool softstop_always;
+#endif /* CONFIG_MALI_DEBUG */
+ /** The initalized-flag is placed at the end, to avoid cache-pollution (we should
+ * only be using this during init/term paths).
+ * @note This is a write-once member, and so no locking is required to read */
+ int init_status;
+
+ /* Number of contexts that can currently be pulled from */
+ u32 nr_contexts_pullable;
+
+ /* Number of contexts that can either be pulled from or are currently
+ * running */
+ atomic_t nr_contexts_runnable;
+};
+
+/**
+ * @brief KBase Context Job Scheduling information structure
+ *
+ * This is a substructure in the struct kbase_context that encapsulates all the
+ * scheduling information.
+ */
+struct kbasep_js_kctx_info {
+ /**
+ * Runpool substructure. This must only be accessed whilst the Run Pool
+ * mutex ( kbasep_js_device_data::runpool_mutex ) is held.
+ *
+ * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be
+ * held for certain sub-members.
+ *
+ * @note some of the members could be moved into struct kbasep_js_device_data for
+ * improved d-cache/tlb efficiency.
+ */
+ struct {
+ union kbasep_js_policy_ctx_info policy_ctx; /**< Policy-specific context */
+ } runpool;
+
+ /**
+ * Job Scheduler Context information sub-structure. These members are
+ * accessed regardless of whether the context is:
+ * - In the Policy's Run Pool
+ * - In the Policy's Queue
+ * - Not queued nor in the Run Pool.
+ *
+ * You must obtain the jsctx_mutex before accessing any other members of
+ * this substructure.
+ *
+ * You may not access any of these members from IRQ context.
+ */
+ struct {
+ struct mutex jsctx_mutex; /**< Job Scheduler Context lock */
+
+ /** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
+ * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
+ * for such jobs*/
+ u32 nr_jobs;
+
+ /** Context Attributes:
+ * Each is large enough to hold a refcount of the number of atoms on
+ * the context. **/
+ u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
+
+ kbase_context_flags flags;
+ /* NOTE: Unify the following flags into kbase_context_flags */
+ /**
+ * Is the context scheduled on the Run Pool?
+ *
+ * This is only ever updated whilst the jsctx_mutex is held.
+ */
+ bool is_scheduled;
+ /**
+ * Wait queue to wait for is_scheduled state changes.
+ * */
+ wait_queue_head_t is_scheduled_wait;
+
+ bool is_dying; /**< Is the context in the process of being evicted? */
+
+ /** Link implementing JS queues. Context can be present on one
+ * list per job slot
+ */
+ struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
+
+ atomic_t fault_count; /**< The no. of times the context is retained due to the fault job. */
+ } ctx;
+
+ /* The initalized-flag is placed at the end, to avoid cache-pollution (we should
+ * only be using this during init/term paths) */
+ int init_status;
+};
+
+/** Subset of atom state that can be available after jd_done_nolock() is called
+ * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
+ * because the original atom could disappear. */
+struct kbasep_js_atom_retained_state {
+ /** Event code - to determine whether the atom has finished */
+ enum base_jd_event_code event_code;
+ /** core requirements */
+ base_jd_core_req core_req;
+ /* priority */
+ int sched_priority;
+ /** Job Slot to retry submitting to if submission from IRQ handler failed */
+ int retry_submit_on_slot;
+ /* Core group atom was executed on */
+ u32 device_nr;
+
+};
+
+/**
+ * Value signifying 'no retry on a slot required' for:
+ * - kbase_js_atom_retained_state::retry_submit_on_slot
+ * - kbase_jd_atom::retry_submit_on_slot
+ */
+#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
+
+/**
+ * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
+ *
+ * @see kbase_atom_retained_state_is_valid()
+ */
+#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
+
+/**
+ * @brief The JS timer resolution, in microseconds
+ *
+ * Any non-zero difference in time will be at least this size.
+ */
+#define KBASEP_JS_TICK_RESOLUTION_US 1
+
+/*
+ * Internal atom priority defines for kbase_jd_atom::sched_prio
+ */
+enum {
+ KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
+ KBASE_JS_ATOM_SCHED_PRIO_MED,
+ KBASE_JS_ATOM_SCHED_PRIO_LOW,
+ KBASE_JS_ATOM_SCHED_PRIO_COUNT,
+};
+
+/* Invalid priority for kbase_jd_atom::sched_prio */
+#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
+
+/* Default priority in the case of contexts with no atoms, or being lenient
+ * about invalid priorities from userspace */
+#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
+
+ /** @} *//* end group kbase_js */
+ /** @} *//* end group base_kbase_api */
+ /** @} *//* end group base_api */
+
+#endif /* _KBASE_JS_DEFS_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
new file mode 100644
index 00000000000..2094586ff2d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h
@@ -0,0 +1,763 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_js_policy.h
+ * Job Scheduler Policy APIs.
+ */
+
+#ifndef _KBASE_JS_POLICY_H_
+#define _KBASE_JS_POLICY_H_
+
+/**
+ * @page page_kbase_js_policy Job Scheduling Policies
+ * The Job Scheduling system is described in the following:
+ * - @subpage page_kbase_js_policy_overview
+ * - @subpage page_kbase_js_policy_operation
+ *
+ * The API details are as follows:
+ * - @ref kbase_jm
+ * - @ref kbase_js
+ * - @ref kbase_js_policy
+ */
+
+/**
+ * @page page_kbase_js_policy_overview Overview of the Policy System
+ *
+ * The Job Scheduler Policy manages:
+ * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs)
+ * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the
+ * GPU's Job Slots (\em JSs).
+ * - The amount of \em time a context is assigned to (<em>scheduled on</em>) an
+ * Address Space
+ * - The amount of \em time a Job spends running on the GPU
+ *
+ * The Policy implements this management via 2 components:
+ * - A Policy Queue, which manages a set of contexts that are ready to run,
+ * but not currently running.
+ * - A Policy Run Pool, which manages the currently running contexts (one per Address
+ * Space) and the jobs to run on the Job Slots.
+ *
+ * Each Graphics Process in the system has at least one KBase Context. Therefore,
+ * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on
+ * the GPU.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted"
+ *
+ * The main operations on the queue are:
+ * - Enqueuing a Context to it
+ * - Dequeuing a Context from it, to run it.
+ * - Note: requeuing a context is much the same as enqueuing a context, but
+ * occurs when a context is scheduled out of the system to allow other contexts
+ * to run.
+ *
+ * These operations have much the same meaning for the Run Pool - Jobs are
+ * dequeued to run on a Jobslot, and requeued when they are scheduled out of
+ * the GPU.
+ *
+ * @note This is an over-simplification of the Policy APIs - there are more
+ * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue
+ * takes at least two function calls: one to Dequeue from the Queue, one to add
+ * to the Run Pool.
+ *
+ * As indicated on the diagram, Jobs permanently leave the scheduling system
+ * when they are completed, otherwise they get dequeued/requeued until this
+ * happens. Similarly, Contexts leave the scheduling system when their jobs
+ * have all completed. However, Contexts may later return to the scheduling
+ * system (not shown on the diagram) if more Bags of Jobs are submitted to
+ * them.
+ */
+
+/**
+ * @page page_kbase_js_policy_operation Policy Operation
+ *
+ * We describe the actions that the Job Scheduler Core takes on the Policy in
+ * the following cases:
+ * - The IRQ Path
+ * - The Job Submission Path
+ * - The High Priority Job Submission Path
+ *
+ * This shows how the Policy APIs will be used by the Job Scheduler core.
+ *
+ * The following diagram shows an example Policy that contains a Low Priority
+ * queue, and a Real-time (High Priority) Queue. The RT queue is examined
+ * before the LowP one on dequeuing from the head. The Low Priority Queue is
+ * ordered by time, and the RT queue is ordered by time weighted by
+ * RT-priority. In addition, it shows that the Job Scheduler Core will start a
+ * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The
+ * Soft-Stop time is set by a global configuration value, and must be a value
+ * appropriate for the policy. For example, this could include "don't run a
+ * soft-stop timer" for a First-Come-First-Served (FCFS) policy.
+ *
+ * <!-- The following needs to be all on one line, due to doxygen's parser -->
+ * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core."
+ *
+ * @section sec_kbase_js_policy_operation_prio Dealing with Priority
+ *
+ * Priority applies separately to a context as a whole, and to the jobs within
+ * a context. The jobs specify a priority in the base_jd_atom::prio member, but
+ * it is independent of the context priority. That is, it only affects
+ * scheduling of atoms within a context. Refer to @ref base_jd_prio for more
+ * details. The meaning of the context's priority value is up to the policy
+ * itself, and could be a logarithmic scale instead of a linear scale (e.g. the
+ * policy could implement an increase/decrease in priority by 1 results in an
+ * increase/decrease in \em proportion of time spent scheduled in by 25%, an
+ * effective change in timeslice by 11%).
+ *
+ * It is up to the policy whether a boost in priority boosts the priority of
+ * the entire context (e.g. to such an extent where it may pre-empt other
+ * running contexts). If it chooses to do this, the Policy must make sure that
+ * only jobs from high-priority contexts are run, and that the context is
+ * scheduled out once only jobs from low priority contexts remain. This ensures
+ * that the low priority contexts do not gain from the priority boost, yet they
+ * still get scheduled correctly with respect to other low priority contexts.
+ *
+ *
+ * @section sec_kbase_js_policy_operation_irq IRQ Path
+ *
+ * The following happens on the IRQ path from the Job Scheduler Core:
+ * - Note the slot that completed (for later)
+ * - Log the time spent by the job (and implicitly, the time spent by the
+ * context)
+ * - call kbasep_js_policy_log_job_result() <em>in the context of the irq
+ * handler.</em>
+ * - This must happen regardless of whether the job completed successfully or
+ * not (otherwise the context gets away with DoS'ing the system with faulty jobs)
+ * - What was the result of the job?
+ * - If Completed: job is just removed from the system
+ * - If Hard-stop or failure: job is removed from the system
+ * - If Soft-stop: queue the book-keeping work onto a work-queue: have a
+ * work-queue call kbasep_js_policy_enqueue_job()
+ * - Check the timeslice used by the owning context
+ * - call kbasep_js_policy_should_remove_ctx() <em>in the context of the irq
+ * handler.</em>
+ * - If this returns true, clear the "allowed" flag.
+ * - Check the ctx's flags for "allowed", "has jobs to run" and "is running
+ * jobs"
+ * - And so, should the context stay scheduled in?
+ * - If No, push onto a work-queue the work of scheduling out the old context,
+ * and getting a new one. That is:
+ * - kbasep_js_policy_runpool_remove_ctx() on old_ctx
+ * - kbasep_js_policy_enqueue_ctx() on old_ctx
+ * - kbasep_js_policy_dequeue_head_ctx() to get new_ctx
+ * - kbasep_js_policy_runpool_add_ctx() on new_ctx
+ * - (all of this work is deferred on a work-queue to keep the IRQ handler quick)
+ * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job:
+ * - kbasep_js_policy_dequeue_job() <em>in the context of the irq
+ * handler</em> with core_req set to that of the completing slot
+ * - if this returned true, submit the job to the completed slot.
+ * - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ * - If kbasep_js_policy_dequeue_job() returned false, submit some work to
+ * the work-queue to retry from outside of IRQ context (calling
+ * kbasep_js_policy_dequeue_job() from a work-queue).
+ *
+ * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT,
+ * this sequence could loop a large number of times: this could happen if
+ * the jobs submitted completed on the GPU very quickly (in a few cycles), such
+ * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take
+ * more jobs, causing us to loop until we run out of jobs.
+ *
+ * To mitigate this, we must limit the number of jobs submitted per slot during
+ * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should
+ * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For
+ * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per
+ * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs
+ * could complete but the IRQ for each of them is delayed by the throttling. By
+ * the time you get the IRQ, all 6 jobs could've completed, meaning you can
+ * submit jobs to fill all 6 HEAD+NEXT registers again.
+ *
+ * @note As much work is deferred as possible, which includes the scheduling
+ * out of a context and scheduling in a new context. However, we can still make
+ * starting a single high-priorty context quick despite this:
+ * - On Mali-T600 family, there is one more AS than JSs.
+ * - This means we can very quickly schedule out one AS, no matter what the
+ * situation (because there will always be one AS that's not currently running
+ * on the job slot - it can only have a job in the NEXT register).
+ * - Even with this scheduling out, fair-share can still be guaranteed e.g. by
+ * a timeline-based Completely Fair Scheduler.
+ * - When our high-priority context comes in, we can do this quick-scheduling
+ * out immediately, and then schedule in the high-priority context without having to block.
+ * - This all assumes that the context to schedule out is of lower
+ * priority. Otherwise, we will have to block waiting for some other low
+ * priority context to finish its jobs. Note that it's likely (but not
+ * impossible) that the high-priority context \b is running jobs, by virtue of
+ * it being high priority.
+ * - Therefore, we can give a high liklihood that on Mali-T600 at least one
+ * high-priority context can be started very quickly. For the general case, we
+ * can guarantee starting (no. ASs) - (no. JSs) high priority contexts
+ * quickly. In any case, there is a high likelihood that we're able to start
+ * more than one high priority context quickly.
+ *
+ * In terms of the functions used in the IRQ handler directly, these are the
+ * perfomance considerations:
+ * - kbase_js_policy_log_job_result():
+ * - This is just adding to a 64-bit value (possibly even a 32-bit value if we
+ * only store the time the job's recently spent - see below on 'priority weighting')
+ * - For priority weighting, a divide operation ('div') could happen, but
+ * this can happen in a deferred context (outside of IRQ) when scheduling out
+ * the ctx; as per our Engineering Specification, the contexts of different
+ * priority still stay scheduled in for the same timeslice, but higher priority
+ * ones scheduled back in more often.
+ * - That is, the weighted and unweighted times must be stored separately, and
+ * the weighted time is only updated \em outside of IRQ context.
+ * - Of course, this divide is more likely to be a 'multiply by inverse of the
+ * weight', assuming that the weight (priority) doesn't change.
+ * - kbasep_js_policy_should_remove_ctx():
+ * - This is usually just a comparison of the stored time value against some
+ * maximum value.
+ *
+ * @note all deferred work can be wrapped up into one call - we usually need to
+ * indicate that a job/bag is done outside of IRQ context anyway.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit Submission path
+ *
+ * Start with a Context with no jobs present, and assume equal priority of all
+ * contexts in the system. The following work all happens outside of IRQ
+ * Context :
+ * - As soon as job is made 'ready to 'run', then is must be registerd with the Job
+ * Scheduler Policy:
+ * - 'Ready to run' means they've satisified their dependencies in the
+ * Kernel-side Job Dispatch system.
+ * - Call kbasep_js_policy_enqueue_job()
+ * - This indicates that the job should be scheduled (it is ready to run).
+ * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs
+ * 'ready to run', we enqueue the context on the policy queue:
+ * - Call kbasep_js_policy_enqueue_ctx()
+ * - This indicates that the \em ctx should be scheduled (it is ready to run)
+ *
+ * Next, we need to handle adding a context to the Run Pool - if it's sensible
+ * to do so. This can happen due to two reasons:
+ * -# A context is enqueued as above, and there are ASs free for it to run on
+ * (e.g. it is the first context to be run, in which case it can be added to
+ * the Run Pool immediately after enqueuing on the Policy Queue)
+ * -# A previous IRQ caused another ctx to be scheduled out, requiring that the
+ * context at the head of the queue be scheduled in. Such steps would happen in
+ * a work queue (work deferred from the IRQ context).
+ *
+ * In both cases, we'd handle it as follows:
+ * - Get the context at the Head of the Policy Queue:
+ * - Call kbasep_js_policy_dequeue_head_ctx()
+ * - Assign the Context an Address Space (Assert that there will be one free,
+ * given the above two reasons)
+ * - Add this context to the Run Pool:
+ * - Call kbasep_js_policy_runpool_add_ctx()
+ * - Now see if a job should be run:
+ * - Mostly, this will be done in the IRQ handler at the completion of a
+ * previous job.
+ * - However, there are two cases where this cannot be done: a) The first job
+ * enqueued to the system (there is no previous IRQ to act upon) b) When jobs
+ * are submitted at a low enough rate to not fill up all Job Slots (or, not to
+ * fill both the 'HEAD' and 'NEXT' registers in the job-slots)
+ * - Hence, on each ctx <b>and job</b> submission we should try to see if we
+ * can run a job:
+ * - For each job slot that has free space (in NEXT or HEAD+NEXT registers):
+ * - Call kbasep_js_policy_dequeue_job() with core_req set to that of the
+ * slot
+ * - if we got one, submit it to the job slot.
+ * - This is repeated until kbasep_js_policy_dequeue_job() returns
+ * false, or the job slot has a job queued on both the HEAD and NEXT registers.
+ *
+ * The above case shows that we should attempt to run jobs in cases where a) a ctx
+ * has been added to the Run Pool, and b) new jobs have been added to a context
+ * in the Run Pool:
+ * - In the latter case, the context is in the runpool because it's got a job
+ * ready to run, or is already running a job
+ * - We could just wait until the IRQ handler fires, but for certain types of
+ * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs
+ * generally take much longer to run that GLES CS jobs, which are vertex shader
+ * jobs.
+ * - Therefore, when a new job appears in the ctx, we must check the job-slots
+ * to see if they're free, and run the jobs as before.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts
+ *
+ * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of
+ * them can be scheduled in immediately to start high prioriy jobs. In general,
+ * (no. ASs) - (no JSs) high priority contexts may be started immediately. The
+ * following describes how this happens:
+ *
+ * Similar to the previous section, consider what happens with a high-priority
+ * context (a context with a priority higher than that of any in the Run Pool)
+ * that starts out with no jobs:
+ * - A job becomes ready to run on the context, and so we enqueue the context
+ * on the Policy's Queue.
+ * - However, we'd like to schedule in this context immediately, instead of
+ * waiting for one of the Run Pool contexts' timeslice to expire
+ * - The policy's Enqueue function must detect this (because it is the policy
+ * that embodies the concept of priority), and take appropriate action
+ * - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run
+ * Pool to see if a lower priority context should be scheduled out, and then
+ * schedule in the High Priority context.
+ * - For Mali-T600, we can always pick a context to schedule out immediately
+ * (because there are more ASs than JSs), and so scheduling out a victim context
+ * and scheduling in the high priority context can happen immediately.
+ * - If a policy implements fair-sharing, then this can still ensure the
+ * victim later on gets a fair share of the GPU.
+ * - As a note, consider whether the victim can be of equal/higher priority
+ * than the incoming context:
+ * - Usually, higher priority contexts will be the ones currently running
+ * jobs, and so the context with the lowest priority is usually not running
+ * jobs.
+ * - This makes it likely that the victim context is low priority, but
+ * it's not impossible for it to be a high priority one:
+ * - Suppose 3 high priority contexts are submitting only FS jobs, and one low
+ * priority context submitting CS jobs. Then, the context not running jobs will
+ * be one of the hi priority contexts (because only 2 FS jobs can be
+ * queued/running on the GPU HW for Mali-T600).
+ * - The problem can be mitigated by extra action, but it's questionable
+ * whether we need to: we already have a high likelihood that there's at least
+ * one high priority context - that should be good enough.
+ * - And so, this method makes sure that at least one high priority context
+ * can be started very quickly, but more than one high priority contexts could be
+ * delayed (up to one timeslice).
+ * - To improve this, use a GPU with a higher number of Address Spaces vs Job
+ * Slots.
+ * - At this point, let's assume this high priority context has been scheduled
+ * in immediately. The next step is to ensure it can start some jobs quickly.
+ * - It must do this by Soft-Stopping jobs on any of the Job Slots that it can
+ * submit to.
+ * - The rest of the logic for starting the jobs is taken care of by the IRQ
+ * handler. All the policy needs to do is ensure that
+ * kbasep_js_policy_dequeue_job() will return the jobs from the high priority
+ * context.
+ *
+ * @note in SS state, we currently only use 2 job-slots (even for T608, but
+ * this might change in future). In this case, it's always possible to schedule
+ * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same
+ * time, this maximizes usage of the job-slots (only 2 are in use), because you
+ * can guarantee starting of the jobs from the High Priority contexts immediately too.
+ *
+ *
+ *
+ * @section sec_kbase_js_policy_operation_notes Notes
+ *
+ * - In this design, a separate 'init' is needed from dequeue/requeue, so that
+ * information can be retained between the dequeue/requeue calls. For example,
+ * the total time spent for a context/job could be logged between
+ * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just
+ * initializes that information to some known state.
+ *
+ *
+ *
+ */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup kbase_js_policy Job Scheduler Policy APIs
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy structure
+ */
+union kbasep_js_policy;
+
+/**
+ * @brief Initialize the Job Scheduler Policy
+ */
+int kbasep_js_policy_init(struct kbase_device *kbdev);
+
+/**
+ * @brief Terminate the Job Scheduler Policy
+ */
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy);
+
+/**
+ * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Ctx Info structure
+ *
+ * This structure is embedded in the struct kbase_context structure. It is used to:
+ * - track information needed for the policy to schedule the context (e.g. time
+ * used, OS priority etc.)
+ * - link together kbase_contexts into a queue, so that a struct kbase_context can be
+ * obtained as the container of the policy ctx info. This allows the API to
+ * return what "the next context" should be.
+ * - obtain other information already stored in the struct kbase_context for
+ * scheduling purposes (e.g process ID to get the priority of the originating
+ * process)
+ */
+union kbasep_js_policy_ctx_info;
+
+/**
+ * @brief Initialize a ctx for use with the Job Scheduler Policy
+ *
+ * This effectively initializes the union kbasep_js_policy_ctx_info structure within
+ * the struct kbase_context (itself located within the kctx->jctx.sched_info structure).
+ */
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
+
+/**
+ * @brief Terminate resources associated with using a ctx in the Job Scheduler
+ * Policy.
+ */
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Enqueue a context onto the Job Scheduler Policy Queue
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out a low
+ * priority context from the Run Pool to allow the high priority context to be
+ * scheduled in.
+ *
+ * If the context has the privileged flag set, it will always be kept at the
+ * head of the queue.
+ *
+ * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ */
+void kbasep_js_policy_enqueue_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if a context was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no contexts were available.
+ */
+bool kbasep_js_policy_dequeue_head_ctx(union kbasep_js_policy *js_policy, struct kbase_context ** const kctx_ptr);
+
+/**
+ * @brief Evict a context from the Job Scheduler Policy Queue
+ *
+ * This is only called as part of destroying a kbase_context.
+ *
+ * There are many reasons why this might fail during the lifetime of a
+ * context. For example, the context is in the process of being scheduled. In
+ * that case a thread doing the scheduling might have a pointer to it, but the
+ * context is neither in the Policy Queue, nor is it in the Run
+ * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself
+ * are locked.
+ *
+ * Hence to find out where in the system the context is, it is important to do
+ * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member.
+ *
+ * The caller will be holding kbasep_js_device_data::queue_mutex.
+ *
+ * @return true if the context was evicted from the Policy Queue
+ * @return false if the context was not found in the Policy Queue
+ */
+bool kbasep_js_policy_try_evict_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Call a function on all jobs belonging to a non-queued, non-running
+ * context, optionally detaching the jobs from the context as it goes.
+ *
+ * At the time of the call, the context is guarenteed to be not-currently
+ * scheduled on the Run Pool (is_scheduled == false), and not present in
+ * the Policy Queue. This is because one of the following functions was used
+ * recently on the context:
+ * - kbasep_js_policy_evict_ctx()
+ * - kbasep_js_policy_runpool_remove_ctx()
+ *
+ * In both cases, no subsequent call was made on the context to any of:
+ * - kbasep_js_policy_runpool_add_ctx()
+ * - kbasep_js_policy_enqueue_ctx()
+ *
+ * Due to the locks that might be held at the time of the call, the callback
+ * may need to defer work on a workqueue to complete its actions (e.g. when
+ * cancelling jobs)
+ *
+ * \a detach_jobs must only be set when cancelling jobs (which occurs as part
+ * of context destruction).
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ */
+void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx,
+ kbasep_js_policy_ctx_job_cb callback, bool detach_jobs);
+
+/**
+ * @brief Add a context to the Job Scheduler Policy's Run Pool
+ *
+ * If the context enqueued has a priority higher than any in the Run Pool, then
+ * it is the Policy's responsibility to decide whether to schedule out low
+ * priority jobs that are currently running on the GPU.
+ *
+ * The number of contexts present in the Run Pool will never be more than the
+ * number of Address Spaces.
+ *
+ * The following guarentees are made about the state of the system when this
+ * is called:
+ * - kctx->as_nr member is valid
+ * - the context has its submit_allowed flag set
+ * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid
+ * - The refcount of the context is guarenteed to be zero.
+ * - kbasep_js_kctx_info::ctx::is_scheduled will be true.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Remove a context from the Job Scheduler Policy's Run Pool
+ *
+ * The kctx->as_nr member is valid and the context has its submit_allowed flag
+ * set when this is called. The state of
+ * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also
+ * valid. The refcount of the context is guarenteed to be zero.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock)
+ *
+ * Due to a spinlock being held, this function must not call any APIs that sleep.
+ */
+void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Indicate whether a context should be removed from the Run Pool
+ * (should be scheduled out).
+ *
+ * The kbasep_js_device_data::runpool_irq::lock will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ */
+bool kbasep_js_policy_should_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx);
+
+/**
+ * @brief Synchronize with any timers acting upon the runpool
+ *
+ * The policy should check whether any timers it owns should be running. If
+ * they should not, the policy must cancel such timers and ensure they are not
+ * re-run by the time this function finishes.
+ *
+ * In particular, the timers must not be running when there are no more contexts
+ * on the runpool, because the GPU could be powered off soon after this call.
+ *
+ * The locking conditions on the caller are as follows:
+ * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex.
+ * - it will be holding kbasep_js_device_data::runpool_mutex.
+ */
+void kbasep_js_policy_runpool_timers_sync(union kbasep_js_policy *js_policy);
+
+
+/**
+ * @brief Indicate whether a new context has an higher priority than the current context.
+ *
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx
+ *
+ * This function must not sleep, because an IRQ spinlock might be held whilst
+ * this is called.
+ *
+ * @note There is nothing to stop the priority of \a current_ctx changing
+ * during or immediately after this function is called (because its jsctx_mutex
+ * cannot be held). Therefore, this function should only be seen as a heuristic
+ * guide as to whether \a new_ctx is higher priority than \a current_ctx
+ */
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx);
+
+ /** @} *//* end group kbase_js_policy_ctx */
+
+/**
+ * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API
+ * @{
+ *
+ * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of
+ * the Job Scheduler Policy and its use from the Job Scheduler Core</b>.
+ */
+
+/**
+ * @brief Job Scheduler Policy Job Info structure
+ *
+ * This structure is embedded in the struct kbase_jd_atom structure. It is used to:
+ * - track information needed for the policy to schedule the job (e.g. time
+ * used, etc.)
+ * - link together jobs into a queue/buffer, so that a struct kbase_jd_atom can be
+ * obtained as the container of the policy job info. This allows the API to
+ * return what "the next job" should be.
+ */
+union kbasep_js_policy_job_info;
+
+/**
+ * @brief Initialize a job for use with the Job Scheduler Policy
+ *
+ * This function initializes the union kbasep_js_policy_job_info structure within the
+ * kbase_jd_atom. It will only initialize/allocate resources that are specific
+ * to the job.
+ *
+ * That is, this function makes \b no attempt to:
+ * - initialize any context/policy-wide information
+ * - enqueue the job on the policy.
+ *
+ * At some later point, the following functions must be called on the job, in this order:
+ * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data.
+ * - kbasep_js_policy_enqueue_job() to enqueue the job
+ *
+ * A job must only ever be initialized on the Policy once, and must be
+ * terminated on the Policy before the job is freed.
+ *
+ * The caller will not be holding any locks, and so this function will not
+ * modify any information in \a kctx or \a js_policy.
+ *
+ * @return 0 if initialization was correct.
+ */
+int kbasep_js_policy_init_job(const union kbasep_js_policy *js_policy, const struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Register context/policy-wide information for a job on the Job Scheduler Policy.
+ *
+ * Registers the job with the policy. This is used to track the job before it
+ * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically,
+ * it is used to update information under a lock that could not be updated at
+ * kbasep_js_policy_init_job() time (such as context/policy-wide data).
+ *
+ * @note This function will not fail, and hence does not allocate any
+ * resources. Any failures that could occur on registration will be caught
+ * during kbasep_js_policy_init_job() instead.
+ *
+ * A job must only ever be registerd on the Policy once, and must be
+ * deregistered on the Policy on completion (whether or not that completion was
+ * success/failure).
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_register_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief De-register context/policy-wide information for a on the Job Scheduler Policy.
+ *
+ * This must be used before terminating the resources associated with using a
+ * job in the Job Scheduler Policy. This function does not itself terminate any
+ * resources, at most it just updates information in the policy and context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool
+ *
+ * The job returned by the policy will match at least one of the bits in the
+ * job slot's core requirements (but it may match more than one, or all @ref
+ * base_jd_core_req bits supported by the job slot).
+ *
+ * In addition, the requirements of the job returned will be a subset of those
+ * requested - the job returned will not have requirements that \a job_slot_idx
+ * cannot satisfy.
+ *
+ * The caller will submit the job to the GPU as soon as the GPU's NEXT register
+ * for the corresponding slot is empty. Of course, the GPU will then only run
+ * this new job when the currently executing job (in the jobslot's HEAD
+ * register) has completed.
+ *
+ * @return true if a job was available, and *kctx_ptr points to
+ * the kctx dequeued.
+ * @return false if no jobs were available among all ctxs in the Run Pool.
+ *
+ * @note base_jd_core_req is currently a u8 - beware of type conversion.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_lock::irq will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held
+ */
+bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr);
+
+/**
+ * @brief Requeue a Job back into the the Job Scheduler Policy Run Pool
+ *
+ * This will be used to enqueue a job after its creation and also to requeue
+ * a job into the Run Pool that was previously dequeued (running). It notifies
+ * the policy that the job should be run again at some point later.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held.
+ * - kbasep_js_device_data::runpool_mutex will be held.
+ * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held.
+ */
+void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom);
+
+/**
+ * @brief Log the result of a job: the time spent on a job/context, and whether
+ * the job failed or not.
+ *
+ * Since a struct kbase_jd_atom contains a pointer to the struct kbase_context owning it,
+ * then this can also be used to log time on either/both the job and the
+ * containing context.
+ *
+ * The completion state of the job can be found by examining \a katom->event.event_code
+ *
+ * If the Job failed and the policy is implementing fair-sharing, then the
+ * policy must penalize the failing job/context:
+ * - At the very least, it should penalize the time taken by the amount of
+ * time spent processing the IRQ in SW. This because a job in the NEXT slot
+ * waiting to run will be delayed until the failing job has had the IRQ
+ * cleared.
+ * - \b Optionally, the policy could apply other penalties. For example, based
+ * on a threshold of a number of failing jobs, after which a large penalty is
+ * applied.
+ *
+ * The kbasep_js_device_data::runpool_mutex will be held by the caller.
+ *
+ * @note This API is called from IRQ context.
+ *
+ * The caller has the following conditions on locking:
+ * - kbasep_js_device_data::runpool_irq::lock will be held.
+ *
+ * @param js_policy job scheduler policy
+ * @param katom job dispatch atom
+ * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds).
+ */
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us);
+
+ /** @} *//* end group kbase_js_policy_job */
+
+ /** @} *//* end group kbase_js_policy */
+ /** @} *//* end group base_kbase_api */
+ /** @} *//* end group base_api */
+
+#endif /* _KBASE_JS_POLICY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
new file mode 100644
index 00000000000..692460710ce
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c
@@ -0,0 +1,297 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Job Scheduler: Completely Fair Policy Implementation
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_js.h>
+#include <mali_kbase_js_policy_cfs.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)
+#include <linux/sched/rt.h>
+#endif
+
+/**
+ * Define for when dumping is enabled.
+ * This should not be based on the instrumentation level as whether dumping is enabled for a particular level is down to the integrator.
+ * However this is being used for now as otherwise the cinstr headers would be needed.
+ */
+#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL)
+
+/* Fixed point constants used for runtime weight calculations */
+#define WEIGHT_FIXEDPOINT_SHIFT 10
+#define WEIGHT_TABLE_SIZE 40
+#define WEIGHT_0_NICE (WEIGHT_TABLE_SIZE/2)
+#define WEIGHT_0_VAL (1 << WEIGHT_FIXEDPOINT_SHIFT)
+
+#define PROCESS_PRIORITY_MIN (-20)
+#define PROCESS_PRIORITY_MAX (19)
+
+/* Defines for easy asserts 'is scheduled'/'is queued'/'is neither queued norscheduled' */
+#define KBASEP_JS_CHECKFLAG_QUEUED (1u << 0) /**< Check the queued state */
+#define KBASEP_JS_CHECKFLAG_SCHEDULED (1u << 1) /**< Check the scheduled state */
+#define KBASEP_JS_CHECKFLAG_IS_QUEUED (1u << 2) /**< Expect queued state to be set */
+#define KBASEP_JS_CHECKFLAG_IS_SCHEDULED (1u << 3) /**< Expect scheduled state to be set */
+
+enum {
+ KBASEP_JS_CHECK_NOTQUEUED = KBASEP_JS_CHECKFLAG_QUEUED,
+ KBASEP_JS_CHECK_NOTSCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED,
+ KBASEP_JS_CHECK_QUEUED = KBASEP_JS_CHECKFLAG_QUEUED | KBASEP_JS_CHECKFLAG_IS_QUEUED,
+ KBASEP_JS_CHECK_SCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED | KBASEP_JS_CHECKFLAG_IS_SCHEDULED
+};
+
+typedef u32 kbasep_js_check;
+
+/*
+ * Private Functions
+ */
+
+/* Table autogenerated using util built from: base/tools/gen_cfs_weight_of_prio/ */
+
+/* weight = 1.25 */
+static const int weight_of_priority[] = {
+ /* -20 */ 11, 14, 18, 23,
+ /* -16 */ 29, 36, 45, 56,
+ /* -12 */ 70, 88, 110, 137,
+ /* -8 */ 171, 214, 268, 335,
+ /* -4 */ 419, 524, 655, 819,
+ /* 0 */ 1024, 1280, 1600, 2000,
+ /* 4 */ 2500, 3125, 3906, 4883,
+ /* 8 */ 6104, 7630, 9538, 11923,
+ /* 12 */ 14904, 18630, 23288, 29110,
+ /* 16 */ 36388, 45485, 56856, 71070
+};
+
+/*
+ * Note: There is nothing to stop the priority of the ctx containing
+ * ctx_info changing during or immediately after this function is called
+ * (because its jsctx_mutex cannot be held during IRQ). Therefore, this
+ * function should only be seen as a heuristic guide as to the priority weight
+ * of the context.
+ */
+static u64 priority_weight(struct kbasep_js_policy_cfs_ctx *ctx_info, u64 time_us)
+{
+ u64 time_delta_us;
+ int priority;
+
+ priority = ctx_info->process_priority;
+
+ /* Adjust runtime_us using priority weight if required */
+ if (priority != 0 && time_us != 0) {
+ int clamped_priority;
+
+ /* Clamp values to min..max weights */
+ if (priority > PROCESS_PRIORITY_MAX)
+ clamped_priority = PROCESS_PRIORITY_MAX;
+ else if (priority < PROCESS_PRIORITY_MIN)
+ clamped_priority = PROCESS_PRIORITY_MIN;
+ else
+ clamped_priority = priority;
+
+ /* Fixed point multiplication */
+ time_delta_us = (time_us * weight_of_priority[WEIGHT_0_NICE + clamped_priority]);
+ /* Remove fraction */
+ time_delta_us = time_delta_us >> WEIGHT_FIXEDPOINT_SHIFT;
+ /* Make sure the time always increases */
+ if (0 == time_delta_us)
+ time_delta_us++;
+ } else {
+ time_delta_us = time_us;
+ }
+
+ return time_delta_us;
+}
+
+#if KBASE_TRACE_ENABLE
+static int kbasep_js_policy_trace_get_refcnt_nolock(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ int as_nr;
+ int refcnt = 0;
+
+ js_devdata = &kbdev->js_data;
+
+ as_nr = kctx->as_nr;
+ if (as_nr != KBASEP_AS_NR_INVALID) {
+ struct kbasep_js_per_as_data *js_per_as_data;
+
+ js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr];
+
+ refcnt = js_per_as_data->as_busy_refcount;
+ }
+
+ return refcnt;
+}
+
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ unsigned long flags;
+ struct kbasep_js_device_data *js_devdata;
+ int refcnt = 0;
+
+ js_devdata = &kbdev->js_data;
+
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx);
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ return refcnt;
+}
+#else /* KBASE_TRACE_ENABLE */
+static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ CSTD_UNUSED(kbdev);
+ CSTD_UNUSED(kctx);
+ return 0;
+}
+#endif /* KBASE_TRACE_ENABLE */
+
+
+/*
+ * Non-private functions
+ */
+
+int kbasep_js_policy_init(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_policy_cfs *policy_info;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ js_devdata = &kbdev->js_data;
+ policy_info = &js_devdata->policy.cfs;
+
+ atomic64_set(&policy_info->least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+ atomic64_set(&policy_info->rt_least_runtime_us, KBASEP_JS_RUNTIME_EMPTY);
+
+ policy_info->head_runtime_us = 0;
+
+ return 0;
+}
+
+void kbasep_js_policy_term(union kbasep_js_policy *js_policy)
+{
+ CSTD_UNUSED(js_policy);
+}
+
+int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
+{
+ struct kbasep_js_device_data *js_devdata;
+ struct kbasep_js_policy_cfs_ctx *ctx_info;
+ struct kbasep_js_policy_cfs *policy_info;
+ int policy;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ js_devdata = &kbdev->js_data;
+ policy_info = &kbdev->js_data.policy.cfs;
+ ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_INIT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+ policy = current->policy;
+ if (policy == SCHED_FIFO || policy == SCHED_RR) {
+ ctx_info->process_rt_policy = true;
+ ctx_info->process_priority = (((MAX_RT_PRIO - 1) - current->rt_priority) / 5) - 20;
+ } else {
+ ctx_info->process_rt_policy = false;
+ ctx_info->process_priority = (current->static_prio - MAX_RT_PRIO) - 20;
+ }
+
+ /* Initial runtime (relative to least-run context runtime)
+ *
+ * This uses the Policy Queue's most up-to-date head_runtime_us by using the
+ * queue mutex to issue memory barriers - also ensure future updates to
+ * head_runtime_us occur strictly after this context is initialized */
+ mutex_lock(&js_devdata->queue_mutex);
+
+ /* No need to hold the the runpool_irq.lock here, because we're initializing
+ * the value, and the context is definitely not being updated in the
+ * runpool at this point. The queue_mutex ensures the memory barrier. */
+ ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u));
+
+ mutex_unlock(&js_devdata->queue_mutex);
+
+ return 0;
+}
+
+void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx)
+{
+ struct kbasep_js_policy_cfs_ctx *ctx_info;
+ struct kbasep_js_policy_cfs *policy_info;
+ struct kbase_device *kbdev;
+
+ KBASE_DEBUG_ASSERT(js_policy != NULL);
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ policy_info = &js_policy->cfs;
+ ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+ kbdev = container_of(js_policy, struct kbase_device, js_data.policy);
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx));
+
+ /* No work to do */
+}
+
+/*
+ * Job Chain Management
+ */
+
+void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us)
+{
+ struct kbasep_js_policy_cfs_ctx *ctx_info;
+ struct kbase_context *parent_ctx;
+
+ KBASE_DEBUG_ASSERT(js_policy != NULL);
+ KBASE_DEBUG_ASSERT(katom != NULL);
+ CSTD_UNUSED(js_policy);
+
+ parent_ctx = katom->kctx;
+ KBASE_DEBUG_ASSERT(parent_ctx != NULL);
+
+ ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+ ctx_info->runtime_us += priority_weight(ctx_info, time_spent_us);
+
+ katom->time_spent_us += time_spent_us;
+}
+
+bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx)
+{
+ struct kbasep_js_policy_cfs_ctx *current_ctx_info;
+ struct kbasep_js_policy_cfs_ctx *new_ctx_info;
+
+ KBASE_DEBUG_ASSERT(current_ctx != NULL);
+ KBASE_DEBUG_ASSERT(new_ctx != NULL);
+ CSTD_UNUSED(js_policy);
+
+ current_ctx_info = &current_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+ new_ctx_info = &new_ctx->jctx.sched_info.runpool.policy_ctx.cfs;
+
+ if (!current_ctx_info->process_rt_policy && new_ctx_info->process_rt_policy)
+ return true;
+
+ if (current_ctx_info->process_rt_policy ==
+ new_ctx_info->process_rt_policy)
+ return true;
+
+ return false;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
new file mode 100644
index 00000000000..b457d8215ab
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h
@@ -0,0 +1,81 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/*
+ * Completely Fair Job Scheduler Policy structure definitions
+ */
+
+#ifndef _KBASE_JS_POLICY_CFS_H_
+#define _KBASE_JS_POLICY_CFS_H_
+
+#define KBASEP_JS_RUNTIME_EMPTY ((u64)-1)
+
+/**
+ * struct kbasep_js_policy_cfs - Data for the CFS policy
+ * @head_runtime_us: Number of microseconds the least-run context has been
+ * running for. The kbasep_js_device_data.queue_mutex must
+ * be held whilst updating this.
+ * Reads are possible without this mutex, but an older value
+ * might be read if no memory barries are issued beforehand.
+ * @least_runtime_us: Number of microseconds the least-run context in the
+ * context queue has been running for.
+ * -1 if context queue is empty.
+ * @rt_least_runtime_us: Number of microseconds the least-run context in the
+ * realtime (priority) context queue has been running for.
+ * -1 if realtime context queue is empty
+ */
+struct kbasep_js_policy_cfs {
+ u64 head_runtime_us;
+ atomic64_t least_runtime_us;
+ atomic64_t rt_least_runtime_us;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_ctx - a single linked list of all contexts
+ * @runtime_us: microseconds this context has been running for
+ * @process_rt_policy: set if calling process policy scheme is a realtime
+ * scheduler and will use the priority queue. Non-mutable
+ * after ctx init
+ * @process_priority: calling process NICE priority, in the range -20..19
+ *
+ * &kbasep_js_device_data.runpool_irq.lock must be held when updating
+ * @runtime_us. Initializing will occur on context init and context enqueue
+ * (which can only occur in one thread at a time), but multi-thread access only
+ * occurs while the context is in the runpool.
+ *
+ * Reads are possible without the spinlock, but an older value might be read if
+ * no memory barries are issued beforehand.
+ */
+struct kbasep_js_policy_cfs_ctx {
+ u64 runtime_us;
+ bool process_rt_policy;
+ int process_priority;
+};
+
+/**
+ * struct kbasep_js_policy_cfs_job - per job information for CFS
+ * @ticks: number of ticks that this job has been executing for
+ *
+ * &kbasep_js_device_data.runpool_irq.lock must be held when accessing @ticks.
+ */
+struct kbasep_js_policy_cfs_job {
+ u32 ticks;
+};
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_linux.h b/drivers/gpu/arm/midgard/mali_kbase_linux.h
new file mode 100644
index 00000000000..6d1e61fd41e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_linux.h
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_linux.h
+ * Base kernel APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_LINUX_H_
+#define _KBASE_LINUX_H_
+
+/* All things that are needed for the Linux port. */
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+
+#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API))
+ #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+#else
+ #define KBASE_EXPORT_TEST_API(func)
+#endif
+
+#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
+
+#endif /* _KBASE_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c
new file mode 100644
index 00000000000..2c7fb5a4032
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c
@@ -0,0 +1,1363 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/compat.h>
+
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_cache_policy.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_gator.h>
+#include <mali_kbase_hwaccess_time.h>
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+
+/**
+ * @brief Check the zone compatibility of two regions.
+ */
+static int kbase_region_tracker_match_zone(struct kbase_va_region *reg1,
+ struct kbase_va_region *reg2)
+{
+ return ((reg1->flags & KBASE_REG_ZONE_MASK) ==
+ (reg2->flags & KBASE_REG_ZONE_MASK));
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_match_zone);
+
+/* This function inserts a region into the tree. */
+static void kbase_region_tracker_insert(struct kbase_context *kctx, struct kbase_va_region *new_reg)
+{
+ u64 start_pfn = new_reg->start_pfn;
+ struct rb_node **link = &(kctx->reg_rbtree.rb_node);
+ struct rb_node *parent = NULL;
+
+ /* Find the right place in the tree using tree search */
+ while (*link) {
+ struct kbase_va_region *old_reg;
+
+ parent = *link;
+ old_reg = rb_entry(parent, struct kbase_va_region, rblink);
+
+ /* RBTree requires no duplicate entries. */
+ KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
+
+ if (old_reg->start_pfn > start_pfn)
+ link = &(*link)->rb_left;
+ else
+ link = &(*link)->rb_right;
+ }
+
+ /* Put the new node there, and rebalance tree */
+ rb_link_node(&(new_reg->rblink), parent, link);
+ rb_insert_color(&(new_reg->rblink), &(kctx->reg_rbtree));
+}
+
+/* Find allocated region enclosing free range. */
+static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free(
+ struct kbase_context *kctx, u64 start_pfn, size_t nr_pages)
+{
+ struct rb_node *rbnode;
+ struct kbase_va_region *reg;
+
+ u64 end_pfn = start_pfn + nr_pages;
+
+ rbnode = kctx->reg_rbtree.rb_node;
+ while (rbnode) {
+ u64 tmp_start_pfn, tmp_end_pfn;
+
+ reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+ tmp_start_pfn = reg->start_pfn;
+ tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+ /* If start is lower than this, go left. */
+ if (start_pfn < tmp_start_pfn)
+ rbnode = rbnode->rb_left;
+ /* If end is higher than this, then go right. */
+ else if (end_pfn > tmp_end_pfn)
+ rbnode = rbnode->rb_right;
+ else /* Enclosing */
+ return reg;
+ }
+
+ return NULL;
+}
+
+/* Find region enclosing given address. */
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+ struct rb_node *rbnode;
+ struct kbase_va_region *reg;
+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ rbnode = kctx->reg_rbtree.rb_node;
+ while (rbnode) {
+ u64 tmp_start_pfn, tmp_end_pfn;
+
+ reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+ tmp_start_pfn = reg->start_pfn;
+ tmp_end_pfn = reg->start_pfn + reg->nr_pages;
+
+ /* If start is lower than this, go left. */
+ if (gpu_pfn < tmp_start_pfn)
+ rbnode = rbnode->rb_left;
+ /* If end is higher than this, then go right. */
+ else if (gpu_pfn >= tmp_end_pfn)
+ rbnode = rbnode->rb_right;
+ else /* Enclosing */
+ return reg;
+ }
+
+ return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
+
+/* Find region with given base address */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr)
+{
+ u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
+ struct rb_node *rbnode;
+ struct kbase_va_region *reg;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ rbnode = kctx->reg_rbtree.rb_node;
+ while (rbnode) {
+ reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+ if (reg->start_pfn > gpu_pfn)
+ rbnode = rbnode->rb_left;
+ else if (reg->start_pfn < gpu_pfn)
+ rbnode = rbnode->rb_right;
+ else
+ return reg;
+
+ }
+
+ return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
+
+/* Find region meeting given requirements */
+static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align)
+{
+ struct rb_node *rbnode;
+ struct kbase_va_region *reg;
+
+ /* Note that this search is a linear search, as we do not have a target
+ address in mind, so does not benefit from the rbtree search */
+ rbnode = rb_first(&(kctx->reg_rbtree));
+ while (rbnode) {
+ reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+ if ((reg->nr_pages >= nr_pages) &&
+ (reg->flags & KBASE_REG_FREE) &&
+ kbase_region_tracker_match_zone(reg, reg_reqs)) {
+ /* Check alignment */
+ u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1);
+
+ if ((start_pfn >= reg->start_pfn) &&
+ (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
+ ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1)))
+ return reg;
+ }
+ rbnode = rb_next(rbnode);
+ }
+
+ return NULL;
+}
+
+/**
+ * @brief Remove a region object from the global list.
+ *
+ * The region reg is removed, possibly by merging with other free and
+ * compatible adjacent regions. It must be called with the context
+ * region lock held. The associated memory is not released (see
+ * kbase_free_alloced_region). Internal use only.
+ */
+static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+ struct rb_node *rbprev;
+ struct kbase_va_region *prev = NULL;
+ struct rb_node *rbnext;
+ struct kbase_va_region *next = NULL;
+
+ int merged_front = 0;
+ int merged_back = 0;
+ int err = 0;
+
+ /* Try to merge with the previous block first */
+ rbprev = rb_prev(&(reg->rblink));
+ if (rbprev) {
+ prev = rb_entry(rbprev, struct kbase_va_region, rblink);
+ if ((prev->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(prev, reg)) {
+ /* We're compatible with the previous VMA, merge with it */
+ prev->nr_pages += reg->nr_pages;
+ rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+ reg = prev;
+ merged_front = 1;
+ }
+ }
+
+ /* Try to merge with the next block second */
+ /* Note we do the lookup here as the tree may have been rebalanced. */
+ rbnext = rb_next(&(reg->rblink));
+ if (rbnext) {
+ /* We're compatible with the next VMA, merge with it */
+ next = rb_entry(rbnext, struct kbase_va_region, rblink);
+ if ((next->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(next, reg)) {
+ next->start_pfn = reg->start_pfn;
+ next->nr_pages += reg->nr_pages;
+ rb_erase(&(reg->rblink), &kctx->reg_rbtree);
+ merged_back = 1;
+ if (merged_front) {
+ /* We already merged with prev, free it */
+ kbase_free_alloced_region(reg);
+ }
+ }
+ }
+
+ /* If we failed to merge then we need to add a new block */
+ if (!(merged_front || merged_back)) {
+ /*
+ * We didn't merge anything. Add a new free
+ * placeholder and remove the original one.
+ */
+ struct kbase_va_region *free_reg;
+
+ free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK);
+ if (!free_reg) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rb_replace_node(&(reg->rblink), &(free_reg->rblink), &(kctx->reg_rbtree));
+ }
+
+ out:
+ return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_remove_va_region);
+
+/**
+ * @brief Insert a VA region to the list, replacing the current at_reg.
+ */
+static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+{
+ int err = 0;
+
+ /* Must be a free region */
+ KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
+ /* start_pfn should be contained within at_reg */
+ KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
+ /* at least nr_pages from start_pfn should be contained within at_reg */
+ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
+
+ new_reg->start_pfn = start_pfn;
+ new_reg->nr_pages = nr_pages;
+
+ /* Regions are a whole use, so swap and delete old one. */
+ if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
+ rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), &(kctx->reg_rbtree));
+ kbase_free_alloced_region(at_reg);
+ }
+ /* New region replaces the start of the old one, so insert before. */
+ else if (at_reg->start_pfn == start_pfn) {
+ at_reg->start_pfn += nr_pages;
+ KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
+ at_reg->nr_pages -= nr_pages;
+
+ kbase_region_tracker_insert(kctx, new_reg);
+ }
+ /* New region replaces the end of the old one, so insert after. */
+ else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
+ at_reg->nr_pages -= nr_pages;
+
+ kbase_region_tracker_insert(kctx, new_reg);
+ }
+ /* New region splits the old one, so insert and create new */
+ else {
+ struct kbase_va_region *new_front_reg;
+
+ new_front_reg = kbase_alloc_free_region(kctx,
+ at_reg->start_pfn,
+ start_pfn - at_reg->start_pfn,
+ at_reg->flags & KBASE_REG_ZONE_MASK);
+
+ if (new_front_reg) {
+ at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
+ at_reg->start_pfn = start_pfn + nr_pages;
+
+ kbase_region_tracker_insert(kctx, new_front_reg);
+ kbase_region_tracker_insert(kctx, new_reg);
+ } else {
+ err = -ENOMEM;
+ }
+ }
+
+ return err;
+}
+
+/**
+ * @brief Add a VA region to the list.
+ */
+int kbase_add_va_region(struct kbase_context *kctx,
+ struct kbase_va_region *reg, u64 addr,
+ size_t nr_pages, size_t align)
+{
+ struct kbase_va_region *tmp;
+ u64 gpu_pfn = addr >> PAGE_SHIFT;
+ int err = 0;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(NULL != reg);
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ if (!align)
+ align = 1;
+
+ /* must be a power of 2 */
+ KBASE_DEBUG_ASSERT((align & (align - 1)) == 0);
+ KBASE_DEBUG_ASSERT(nr_pages > 0);
+
+ /* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */
+ if (gpu_pfn) {
+ struct device *dev = kctx->kbdev->dev;
+
+ KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
+
+ tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages);
+ if (!tmp) {
+ dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ if ((!kbase_region_tracker_match_zone(tmp, reg)) ||
+ (!(tmp->flags & KBASE_REG_FREE))) {
+ dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK);
+ dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages);
+ dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align);
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages);
+ if (err) {
+ dev_warn(dev, "Failed to insert va region");
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ goto exit;
+ }
+
+ /* Path 2: Map any free address which meets the requirements. */
+ {
+ u64 start_pfn;
+
+ tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1);
+ err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages);
+ }
+
+ exit:
+ return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_add_va_region);
+
+/**
+ * @brief Initialize the internal region tracker data structure.
+ */
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg)
+{
+ kctx->reg_rbtree = RB_ROOT;
+ kbase_region_tracker_insert(kctx, same_va_reg);
+
+ /* exec and custom_va_reg doesn't always exist */
+ if (exec_reg && custom_va_reg) {
+ kbase_region_tracker_insert(kctx, exec_reg);
+ kbase_region_tracker_insert(kctx, custom_va_reg);
+ }
+}
+
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+ struct rb_node *rbnode;
+ struct kbase_va_region *reg;
+
+ do {
+ rbnode = rb_first(&(kctx->reg_rbtree));
+ if (rbnode) {
+ rb_erase(rbnode, &(kctx->reg_rbtree));
+ reg = rb_entry(rbnode, struct kbase_va_region, rblink);
+ kbase_free_alloced_region(reg);
+ }
+ } while (rbnode);
+}
+
+/**
+ * Initialize the region tracker data structure.
+ */
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+ struct kbase_va_region *same_va_reg;
+ struct kbase_va_region *exec_reg = NULL;
+ struct kbase_va_region *custom_va_reg = NULL;
+ size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE;
+ u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
+ u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+
+#if defined(CONFIG_ARM64)
+ same_va_bits = VA_BITS;
+#elif defined(CONFIG_X86_64)
+ same_va_bits = 47;
+#elif defined(CONFIG_64BIT)
+#error Unsupported 64-bit architecture
+#endif
+
+#ifdef CONFIG_64BIT
+ if (kctx->is_compat)
+ same_va_bits = 32;
+ else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA))
+ same_va_bits = 33;
+#endif
+
+ if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits)
+ return -EINVAL;
+
+ /* all have SAME_VA */
+ same_va_reg = kbase_alloc_free_region(kctx, 1,
+ (1ULL << (same_va_bits - PAGE_SHIFT)) - 2,
+ KBASE_REG_ZONE_SAME_VA);
+
+ if (!same_va_reg)
+ return -ENOMEM;
+
+#ifdef CONFIG_64BIT
+ /* only 32-bit clients have the other two zones */
+ if (kctx->is_compat) {
+#endif
+ if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
+ kbase_free_alloced_region(same_va_reg);
+ return -EINVAL;
+ }
+ /* If the current size of TMEM is out of range of the
+ * virtual address space addressable by the MMU then
+ * we should shrink it to fit
+ */
+ if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+ custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+
+ exec_reg = kbase_alloc_free_region(kctx,
+ KBASE_REG_ZONE_EXEC_BASE,
+ KBASE_REG_ZONE_EXEC_SIZE,
+ KBASE_REG_ZONE_EXEC);
+
+ if (!exec_reg) {
+ kbase_free_alloced_region(same_va_reg);
+ return -ENOMEM;
+ }
+
+ custom_va_reg = kbase_alloc_free_region(kctx,
+ KBASE_REG_ZONE_CUSTOM_VA_BASE,
+ custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+
+ if (!custom_va_reg) {
+ kbase_free_alloced_region(same_va_reg);
+ kbase_free_alloced_region(exec_reg);
+ return -ENOMEM;
+ }
+#ifdef CONFIG_64BIT
+ }
+#endif
+
+ kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg);
+
+ return 0;
+}
+
+int kbase_mem_init(struct kbase_device *kbdev)
+{
+ struct kbasep_mem_device *memdev;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ memdev = &kbdev->memdev;
+
+ /* Initialize memory usage */
+ atomic_set(&memdev->used_pages, 0);
+
+ /* nothing to do, zero-inited when struct kbase_device was created */
+ return 0;
+}
+
+void kbase_mem_halt(struct kbase_device *kbdev)
+{
+ CSTD_UNUSED(kbdev);
+}
+
+void kbase_mem_term(struct kbase_device *kbdev)
+{
+ struct kbasep_mem_device *memdev;
+ int pages;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ memdev = &kbdev->memdev;
+
+ pages = atomic_read(&memdev->used_pages);
+ if (pages != 0)
+ dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_term);
+
+
+
+
+/**
+ * @brief Allocate a free region object.
+ *
+ * The allocated object is not part of any list yet, and is flagged as
+ * KBASE_REG_FREE. No mapping is allocated yet.
+ *
+ * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC
+ *
+ */
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone)
+{
+ struct kbase_va_region *new_reg;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ /* zone argument should only contain zone related region flags */
+ KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
+ KBASE_DEBUG_ASSERT(nr_pages > 0);
+ /* 64-bit address range is the max */
+ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
+
+ new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+
+ if (!new_reg)
+ return NULL;
+
+ new_reg->cpu_alloc = NULL; /* no alloc bound yet */
+ new_reg->gpu_alloc = NULL; /* no alloc bound yet */
+ new_reg->kctx = kctx;
+ new_reg->flags = zone | KBASE_REG_FREE;
+
+ new_reg->flags |= KBASE_REG_GROWABLE;
+
+ /* Set up default MEMATTR usage */
+ new_reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
+
+ new_reg->start_pfn = start_pfn;
+ new_reg->nr_pages = nr_pages;
+
+ return new_reg;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+
+/**
+ * @brief Free a region object.
+ *
+ * The described region must be freed of any mapping.
+ *
+ * If the region is not flagged as KBASE_REG_FREE, the region's
+ * alloc object will be released.
+ * It is a bug if no alloc object exists for non-free regions.
+ *
+ */
+void kbase_free_alloced_region(struct kbase_va_region *reg)
+{
+ KBASE_DEBUG_ASSERT(NULL != reg);
+ if (!(reg->flags & KBASE_REG_FREE)) {
+ kbase_mem_phy_alloc_put(reg->cpu_alloc);
+ kbase_mem_phy_alloc_put(reg->gpu_alloc);
+ /* To detect use-after-free in debug builds */
+ KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE);
+ }
+ kfree(reg);
+}
+
+KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
+
+void kbase_mmu_update(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock);
+ /* ASSERT that the context has a valid as_nr, which is only the case
+ * when it's scheduled in.
+ *
+ * as_nr won't change because the caller has the runpool_irq lock */
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+ kctx->kbdev->mmu_mode->update(kctx);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_update);
+
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ /* ASSERT that the context has a valid as_nr, which is only the case
+ * when it's scheduled in.
+ *
+ * as_nr won't change because the caller has the runpool_irq lock */
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+ kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_disable);
+
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+ kbdev->mmu_mode->disable_as(kbdev, as_nr);
+}
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align)
+{
+ int err;
+ size_t i = 0;
+ unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
+#if defined(CONFIG_MALI_CACHE_COHERENT)
+ unsigned long attr =
+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
+#else
+ unsigned long attr =
+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
+#endif
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(NULL != reg);
+
+ err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
+ if (err)
+ return err;
+
+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+ u64 stride;
+ struct kbase_mem_phy_alloc *alloc;
+
+ alloc = reg->gpu_alloc;
+ stride = alloc->imported.alias.stride;
+ KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
+ for (i = 0; i < alloc->imported.alias.nents; i++) {
+ if (alloc->imported.alias.aliased[i].alloc) {
+ err = kbase_mmu_insert_pages(kctx,
+ reg->start_pfn + (i * stride),
+ alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset,
+ alloc->imported.alias.aliased[i].length,
+ reg->flags);
+ if (err)
+ goto bad_insert;
+
+ kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc);
+ } else {
+ err = kbase_mmu_insert_single_page(kctx,
+ reg->start_pfn + i * stride,
+ kctx->aliasing_sink_page,
+ alloc->imported.alias.aliased[i].length,
+ (reg->flags & mask) | attr);
+
+ if (err)
+ goto bad_insert;
+ }
+ }
+ } else {
+ err = kbase_mmu_insert_pages(kctx, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg),
+ kbase_reg_current_backed_size(reg),
+ reg->flags);
+ if (err)
+ goto bad_insert;
+ kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc);
+ }
+
+ return err;
+
+bad_insert:
+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+ u64 stride;
+
+ stride = reg->gpu_alloc->imported.alias.stride;
+ KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+ while (i--)
+ if (reg->gpu_alloc->imported.alias.aliased[i].alloc) {
+ kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length);
+ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+ }
+ }
+
+ kbase_remove_va_region(kctx, reg);
+
+ return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
+
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+ int err;
+
+ if (reg->start_pfn == 0)
+ return 0;
+
+ if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
+ size_t i;
+
+ err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages);
+ KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased);
+ for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++)
+ if (reg->gpu_alloc->imported.alias.aliased[i].alloc)
+ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc);
+ } else {
+ err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg));
+ kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
+ }
+
+ if (err)
+ return err;
+
+ err = kbase_remove_va_region(kctx, reg);
+ return err;
+}
+
+static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping_of_region(const struct kbase_va_region *reg, unsigned long uaddr, size_t size)
+{
+ struct kbase_cpu_mapping *map;
+ struct list_head *pos;
+
+ KBASE_DEBUG_ASSERT(NULL != reg);
+ KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+
+ if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
+ return NULL;
+
+ list_for_each(pos, &reg->cpu_alloc->mappings) {
+ map = list_entry(pos, struct kbase_cpu_mapping, mappings_list);
+ if (map->vm_start <= uaddr && map->vm_end >= uaddr + size)
+ return map;
+ }
+
+ return NULL;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_of_region);
+
+int kbasep_find_enclosing_cpu_mapping_offset(
+ struct kbase_context *kctx, u64 gpu_addr,
+ unsigned long uaddr, size_t size, u64 * offset)
+{
+ struct kbase_cpu_mapping *map = NULL;
+ const struct kbase_va_region *reg;
+ int err = -EINVAL;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ kbase_gpu_vm_lock(kctx);
+
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+ if (reg && !(reg->flags & KBASE_REG_FREE)) {
+ map = kbasep_find_enclosing_cpu_mapping_of_region(reg, uaddr,
+ size);
+ if (map) {
+ *offset = (uaddr - PTR_TO_U64(map->vm_start)) +
+ (map->page_off << PAGE_SHIFT);
+ err = 0;
+ }
+ }
+
+ kbase_gpu_vm_unlock(kctx);
+
+ return err;
+}
+
+KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
+
+void kbase_sync_single(struct kbase_context *kctx,
+ phys_addr_t cpu_pa, phys_addr_t gpu_pa,
+ off_t offset, size_t size, enum kbase_sync_type sync_fn)
+{
+ struct page *cpu_page;
+
+ cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
+
+ if (likely(cpu_pa == gpu_pa)) {
+ dma_addr_t dma_addr;
+
+ BUG_ON(!cpu_page);
+ BUG_ON(offset + size > PAGE_SIZE);
+
+ dma_addr = kbase_dma_addr(cpu_page) + offset;
+ if (sync_fn == KBASE_SYNC_TO_CPU)
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
+ size, DMA_BIDIRECTIONAL);
+ else if (sync_fn == KBASE_SYNC_TO_DEVICE)
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+ size, DMA_BIDIRECTIONAL);
+ } else {
+ void *src = NULL;
+ void *dst = NULL;
+ struct page *gpu_page;
+
+ if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
+ return;
+
+ gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+
+ if (sync_fn == KBASE_SYNC_TO_DEVICE) {
+ src = ((unsigned char *)kmap(cpu_page)) + offset;
+ dst = ((unsigned char *)kmap(gpu_page)) + offset;
+ } else if (sync_fn == KBASE_SYNC_TO_CPU) {
+ dma_sync_single_for_cpu(kctx->kbdev->dev,
+ kbase_dma_addr(gpu_page) + offset,
+ size, DMA_BIDIRECTIONAL);
+ src = ((unsigned char *)kmap(gpu_page)) + offset;
+ dst = ((unsigned char *)kmap(cpu_page)) + offset;
+ }
+ memcpy(dst, src, size);
+ kunmap(gpu_page);
+ kunmap(cpu_page);
+ if (sync_fn == KBASE_SYNC_TO_DEVICE)
+ dma_sync_single_for_device(kctx->kbdev->dev,
+ kbase_dma_addr(gpu_page) + offset,
+ size, DMA_BIDIRECTIONAL);
+ }
+}
+
+static int kbase_do_syncset(struct kbase_context *kctx,
+ struct base_syncset *set, enum kbase_sync_type sync_fn)
+{
+ int err = 0;
+ struct basep_syncset *sset = &set->basep_sset;
+ struct kbase_va_region *reg;
+ struct kbase_cpu_mapping *map;
+ unsigned long start;
+ size_t size;
+ phys_addr_t *cpu_pa;
+ phys_addr_t *gpu_pa;
+ u64 page_off, page_count;
+ u64 i;
+ off_t offset;
+
+ kbase_os_mem_map_lock(kctx);
+ kbase_gpu_vm_lock(kctx);
+
+ /* find the region where the virtual address is contained */
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx,
+ sset->mem_handle);
+ if (!reg) {
+ dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX",
+ sset->mem_handle);
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (!(reg->flags & KBASE_REG_CPU_CACHED))
+ goto out_unlock;
+
+ start = (uintptr_t)sset->user_addr;
+ size = (size_t)sset->size;
+
+ map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size);
+ if (!map) {
+ dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
+ start, sset->mem_handle);
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ offset = start & (PAGE_SIZE - 1);
+ page_off = map->page_off + ((start - map->vm_start) >> PAGE_SHIFT);
+ page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+ cpu_pa = kbase_get_cpu_phy_pages(reg);
+ gpu_pa = kbase_get_gpu_phy_pages(reg);
+
+ /* Sync first page */
+ if (cpu_pa[page_off]) {
+ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+
+ kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
+ offset, sz, sync_fn);
+ }
+
+ /* Sync middle pages (if any) */
+ for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+ /* we grow upwards, so bail on first non-present page */
+ if (!cpu_pa[page_off + i])
+ break;
+
+ kbase_sync_single(kctx, cpu_pa[page_off + i],
+ gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
+ }
+
+ /* Sync last page (if any) */
+ if (page_count > 1 && cpu_pa[page_off + page_count - 1]) {
+ size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
+
+ kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
+ gpu_pa[page_off + page_count - 1], 0, sz,
+ sync_fn);
+ }
+
+out_unlock:
+ kbase_gpu_vm_unlock(kctx);
+ kbase_os_mem_map_unlock(kctx);
+ return err;
+}
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset)
+{
+ int err = -EINVAL;
+ struct basep_syncset *sset;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(NULL != syncset);
+
+ sset = &syncset->basep_sset;
+
+ switch (sset->type) {
+ case BASE_SYNCSET_OP_MSYNC:
+ err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_DEVICE);
+ break;
+
+ case BASE_SYNCSET_OP_CSYNC:
+ err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_CPU);
+ break;
+
+ default:
+ dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
+ break;
+ }
+
+ return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_sync_now);
+
+/* vm lock must be held */
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
+{
+ int err;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(NULL != reg);
+ BUG_ON(!mutex_is_locked(&kctx->reg_lock));
+ err = kbase_gpu_munmap(kctx, reg);
+ if (err) {
+ dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n");
+ goto out;
+ }
+#ifndef CONFIG_MALI_NO_MALI
+ if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+ /* Wait for GPU to flush write buffer before freeing physical pages */
+ kbase_wait_write_flush(kctx);
+ }
+#endif
+ /* This will also free the physical pages */
+ kbase_free_alloced_region(reg);
+
+ out:
+ return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free_region);
+
+/**
+ * @brief Free the region from the GPU and unregister it.
+ *
+ * This function implements the free operation on a memory segment.
+ * It will loudly fail if called with outstanding mappings.
+ */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
+{
+ int err = 0;
+ struct kbase_va_region *reg;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ if (0 == gpu_addr) {
+ dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+ return -EINVAL;
+ }
+ kbase_gpu_vm_lock(kctx);
+
+ if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
+ gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
+ int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
+
+ reg = kctx->pending_regions[cookie];
+ if (!reg) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* ask to unlink the cookie as we'll free it */
+
+ kctx->pending_regions[cookie] = NULL;
+ kctx->cookies |= (1UL << cookie);
+
+ kbase_free_alloced_region(reg);
+ } else {
+ /* A real GPU va */
+
+ /* Validate the region */
+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+ if (!reg || (reg->flags & KBASE_REG_FREE)) {
+ dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
+ gpu_addr);
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+ /* SAME_VA must be freed through munmap */
+ dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
+ gpu_addr);
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ err = kbase_mem_free_region(kctx, reg);
+ }
+
+ out_unlock:
+ kbase_gpu_vm_unlock(kctx);
+ return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_free);
+
+void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags)
+{
+ KBASE_DEBUG_ASSERT(NULL != reg);
+ KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
+
+ reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
+ /* all memory is now growable */
+ reg->flags |= KBASE_REG_GROWABLE;
+
+ if (flags & BASE_MEM_GROW_ON_GPF)
+ reg->flags |= KBASE_REG_PF_GROW;
+
+ if (flags & BASE_MEM_PROT_CPU_WR)
+ reg->flags |= KBASE_REG_CPU_WR;
+
+ if (flags & BASE_MEM_PROT_CPU_RD)
+ reg->flags |= KBASE_REG_CPU_RD;
+
+ if (flags & BASE_MEM_PROT_GPU_WR)
+ reg->flags |= KBASE_REG_GPU_WR;
+
+ if (flags & BASE_MEM_PROT_GPU_RD)
+ reg->flags |= KBASE_REG_GPU_RD;
+
+ if (0 == (flags & BASE_MEM_PROT_GPU_EX))
+ reg->flags |= KBASE_REG_GPU_NX;
+
+ if (flags & BASE_MEM_COHERENT_SYSTEM ||
+ flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED)
+ reg->flags |= KBASE_REG_SHARE_BOTH;
+ else if (flags & BASE_MEM_COHERENT_LOCAL)
+ reg->flags |= KBASE_REG_SHARE_IN;
+}
+KBASE_EXPORT_TEST_API(kbase_update_region_flags);
+
+int kbase_alloc_phy_pages_helper(
+ struct kbase_mem_phy_alloc *alloc,
+ size_t nr_pages_requested)
+{
+ KBASE_DEBUG_ASSERT(alloc);
+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+ KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+
+ if (nr_pages_requested == 0)
+ goto done; /*nothing to do*/
+
+ kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+ kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+ /* Increase mm counters before we allocate pages so that this
+ * allocation is visible to the OOM killer */
+ kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested);
+
+ if (kbase_mem_allocator_alloc(&alloc->imported.kctx->osalloc,
+ nr_pages_requested, alloc->pages + alloc->nents) != 0)
+ goto no_alloc;
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_aux_pagesalloc((s64)nr_pages_requested);
+#endif
+
+ alloc->nents += nr_pages_requested;
+done:
+ return 0;
+
+no_alloc:
+ kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested);
+ kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages);
+ kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+ return -ENOMEM;
+}
+
+int kbase_free_phy_pages_helper(
+ struct kbase_mem_phy_alloc *alloc,
+ size_t nr_pages_to_free)
+{
+ bool syncback;
+ phys_addr_t *start_free;
+
+ KBASE_DEBUG_ASSERT(alloc);
+ KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
+ KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+ KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
+
+ /* early out if nothing to do */
+ if (0 == nr_pages_to_free)
+ return 0;
+
+ start_free = alloc->pages + alloc->nents - nr_pages_to_free;
+
+ syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+ kbase_mem_allocator_free(&alloc->imported.kctx->osalloc,
+ nr_pages_to_free,
+ start_free,
+ syncback);
+
+ alloc->nents -= nr_pages_to_free;
+ kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free);
+ kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->used_pages);
+ kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages);
+
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_aux_pagesalloc(-(s64)nr_pages_to_free);
+#endif
+
+ return 0;
+}
+
+void kbase_mem_kref_free(struct kref *kref)
+{
+ struct kbase_mem_phy_alloc *alloc;
+
+ alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
+
+ switch (alloc->type) {
+ case KBASE_MEM_TYPE_NATIVE: {
+ KBASE_DEBUG_ASSERT(alloc->imported.kctx);
+ kbase_free_phy_pages_helper(alloc, alloc->nents);
+ break;
+ }
+ case KBASE_MEM_TYPE_ALIAS: {
+ /* just call put on the underlying phy allocs */
+ size_t i;
+ struct kbase_aliased *aliased;
+
+ aliased = alloc->imported.alias.aliased;
+ if (aliased) {
+ for (i = 0; i < alloc->imported.alias.nents; i++)
+ if (aliased[i].alloc)
+ kbase_mem_phy_alloc_put(aliased[i].alloc);
+ vfree(aliased);
+ }
+ break;
+ }
+ case KBASE_MEM_TYPE_RAW:
+ /* raw pages, external cleanup */
+ break;
+ #ifdef CONFIG_UMP
+ case KBASE_MEM_TYPE_IMPORTED_UMP:
+ ump_dd_release(alloc->imported.ump_handle);
+ break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+ case KBASE_MEM_TYPE_IMPORTED_UMM:
+ dma_buf_detach(alloc->imported.umm.dma_buf,
+ alloc->imported.umm.dma_attachment);
+ dma_buf_put(alloc->imported.umm.dma_buf);
+ break;
+#endif
+ case KBASE_MEM_TYPE_TB:{
+ void *tb;
+
+ tb = alloc->imported.kctx->jctx.tb;
+ kbase_device_trace_buffer_uninstall(alloc->imported.kctx);
+ vfree(tb);
+ break;
+ }
+ default:
+ WARN(1, "Unexecpted free of type %d\n", alloc->type);
+ break;
+ }
+
+ /* Free based on allocation type */
+ if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
+ vfree(alloc);
+ else
+ kfree(alloc);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
+{
+ KBASE_DEBUG_ASSERT(NULL != reg);
+ KBASE_DEBUG_ASSERT(vsize > 0);
+
+ /* validate user provided arguments */
+ if (size > vsize || vsize > reg->nr_pages)
+ goto out_term;
+
+ /* Prevent vsize*sizeof from wrapping around.
+ * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
+ */
+ if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
+ goto out_term;
+
+ KBASE_DEBUG_ASSERT(0 != vsize);
+
+ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
+ goto out_term;
+
+ if (reg->cpu_alloc != reg->gpu_alloc) {
+ if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
+ goto out_rollback;
+ }
+
+ return 0;
+
+out_rollback:
+ kbase_free_phy_pages_helper(reg->cpu_alloc, size);
+out_term:
+ return -1;
+}
+
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+
+bool kbase_check_alloc_flags(unsigned long flags)
+{
+ /* Only known input flags should be set. */
+ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+ return false;
+
+ /* At least one flag should be set */
+ if (flags == 0)
+ return false;
+
+ /* Either the GPU or CPU must be reading from the allocated memory */
+ if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
+ return false;
+
+ /* Either the GPU or CPU must be writing to the allocated memory */
+ if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
+ return false;
+
+ /* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */
+ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+ return false;
+
+ /* GPU should have at least read or write access otherwise there is no
+ reason for allocating. */
+ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+ return false;
+
+ return true;
+}
+
+bool kbase_check_import_flags(unsigned long flags)
+{
+ /* Only known input flags should be set. */
+ if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
+ return false;
+
+ /* At least one flag should be set */
+ if (flags == 0)
+ return false;
+
+ /* Imported memory cannot be GPU executable */
+ if (flags & BASE_MEM_PROT_GPU_EX)
+ return false;
+
+ /* Imported memory cannot grow on page fault */
+ if (flags & BASE_MEM_GROW_ON_GPF)
+ return false;
+
+ /* GPU should have at least read or write access otherwise there is no
+ reason for importing. */
+ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
+ return false;
+
+ /* Secure memory cannot be read by the CPU */
+ if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD))
+ return false;
+
+ return true;
+}
+
+/**
+ * @brief Acquire the per-context region list lock
+ */
+void kbase_gpu_vm_lock(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ mutex_lock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
+
+/**
+ * @brief Release the per-context region list lock
+ */
+void kbase_gpu_vm_unlock(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ mutex_unlock(&kctx->reg_lock);
+}
+
+KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h
new file mode 100644
index 00000000000..d8a03526a64
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h
@@ -0,0 +1,774 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.h
+ * Base kernel memory APIs
+ */
+
+#ifndef _KBASE_MEM_H_
+#define _KBASE_MEM_H_
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/kref.h>
+
+#ifdef CONFIG_UMP
+#include <linux/ump.h>
+#endif /* CONFIG_UMP */
+#include "mali_base_kernel.h"
+#include <mali_kbase_hw.h>
+#include "mali_kbase_pm.h"
+#include "mali_kbase_defs.h"
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include "mali_kbase_gator.h"
+#endif
+
+/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */
+
+/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages.
+The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and
+page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table
+updates and generates duplicate page faults as the page table information used by the MMU is not valid. */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */
+
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */
+
+/* This must always be a power of 2 */
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
+#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/**
+ * A CPU mapping
+ */
+struct kbase_cpu_mapping {
+ struct list_head mappings_list;
+ struct kbase_mem_phy_alloc *alloc;
+ struct kbase_context *kctx;
+ struct kbase_va_region *region;
+ pgoff_t page_off;
+ int count;
+ unsigned long vm_start;
+ unsigned long vm_end;
+};
+
+enum kbase_memory_type {
+ KBASE_MEM_TYPE_NATIVE,
+ KBASE_MEM_TYPE_IMPORTED_UMP,
+ KBASE_MEM_TYPE_IMPORTED_UMM,
+ KBASE_MEM_TYPE_ALIAS,
+ KBASE_MEM_TYPE_TB,
+ KBASE_MEM_TYPE_RAW
+};
+
+/* internal structure, mirroring base_mem_aliasing_info,
+ * but with alloc instead of a gpu va (handle) */
+struct kbase_aliased {
+ struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */
+ u64 offset; /* in pages */
+ u64 length; /* in pages */
+};
+
+/**
+ * @brief Physical pages tracking object properties
+ */
+#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1ul << 0)
+#define KBASE_MEM_PHY_ALLOC_LARGE (1ul << 1)
+
+/* physical pages tracking object.
+ * Set up to track N pages.
+ * N not stored here, the creator holds that info.
+ * This object only tracks how many elements are actually valid (present).
+ * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not
+ * shared with another region or client. CPU mappings are OK to exist when changing, as
+ * long as the tracked mappings objects are updated as part of the change.
+ */
+struct kbase_mem_phy_alloc {
+ struct kref kref; /* number of users of this alloc */
+ atomic_t gpu_mappings;
+ size_t nents; /* 0..N */
+ phys_addr_t *pages; /* N elements, only 0..nents are valid */
+
+ /* kbase_cpu_mappings */
+ struct list_head mappings;
+
+ /* type of buffer */
+ enum kbase_memory_type type;
+
+ unsigned long properties;
+
+ /* member in union valid based on @a type */
+ union {
+#ifdef CONFIG_UMP
+ ump_dd_handle ump_handle;
+#endif /* CONFIG_UMP */
+#if defined(CONFIG_DMA_SHARED_BUFFER)
+ struct {
+ struct dma_buf *dma_buf;
+ struct dma_buf_attachment *dma_attachment;
+ unsigned int current_mapping_usage_count;
+ struct sg_table *sgt;
+ } umm;
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+ struct {
+ u64 stride;
+ size_t nents;
+ struct kbase_aliased *aliased;
+ } alias;
+ /* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */
+ struct kbase_context *kctx;
+ } imported;
+};
+
+static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
+{
+ KBASE_DEBUG_ASSERT(alloc);
+ /* we only track mappings of NATIVE buffers */
+ if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+ atomic_inc(&alloc->gpu_mappings);
+}
+
+static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc)
+{
+ KBASE_DEBUG_ASSERT(alloc);
+ /* we only track mappings of NATIVE buffers */
+ if (alloc->type == KBASE_MEM_TYPE_NATIVE)
+ if (0 > atomic_dec_return(&alloc->gpu_mappings)) {
+ pr_err("Mismatched %s:\n", __func__);
+ dump_stack();
+ }
+}
+
+void kbase_mem_kref_free(struct kref *kref);
+
+int kbase_mem_init(struct kbase_device *kbdev);
+void kbase_mem_halt(struct kbase_device *kbdev);
+void kbase_mem_term(struct kbase_device *kbdev);
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc)
+{
+ kref_get(&alloc->kref);
+ return alloc;
+}
+
+static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc)
+{
+ kref_put(&alloc->kref, kbase_mem_kref_free);
+ return NULL;
+}
+
+/**
+ * A GPU memory region, and attributes for CPU mappings.
+ */
+struct kbase_va_region {
+ struct rb_node rblink;
+ struct list_head link;
+
+ struct kbase_context *kctx; /* Backlink to base context */
+
+ u64 start_pfn; /* The PFN in GPU space */
+ size_t nr_pages;
+
+/* Free region */
+#define KBASE_REG_FREE (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED (1ul << 4)
+/* Is GPU cached? */
+#define KBASE_REG_GPU_CACHED (1ul << 5)
+
+#define KBASE_REG_GROWABLE (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW (1ul << 7)
+
+/* VA managed by us */
+#define KBASE_REG_CUSTOM_VA (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH (1ul << 10)
+
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_MASK (3ul << 11)
+#define KBASE_REG_ZONE(x) (((x) & 3) << 11)
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD (1ul<<13)
+/* CPU read access */
+#define KBASE_REG_CPU_RD (1ul<<14)
+
+/* Aligned for GPU EX in SAME_VA */
+#define KBASE_REG_ALIGNED (1ul<<15)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_SECURE (1ul << 19)
+
+#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0)
+
+/* only used with 32-bit clients */
+/*
+ * On a 32bit platform, custom VA should be wired from (4GB + shader region)
+ * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
+ * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
+ * So we put the default limit to the maximum possible on Linux and shrink
+ * it down, if required by the GPU, during initialization.
+ */
+#define KBASE_REG_ZONE_EXEC KBASE_REG_ZONE(1) /* Dedicated 16MB region for shader code */
+#define KBASE_REG_ZONE_EXEC_BASE ((1ULL << 32) >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_SIZE ((16ULL * 1024 * 1024) >> PAGE_SHIFT)
+
+#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(2)
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+
+ unsigned long flags;
+
+ size_t extent; /* nr of pages alloc'd on PF */
+
+ struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */
+ struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */
+
+ /* non-NULL if this memory object is a kds_resource */
+ struct kds_resource *kds_res;
+
+};
+
+/* Common functions */
+static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg)
+{
+ KBASE_DEBUG_ASSERT(reg);
+ KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+ KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+ return reg->cpu_alloc->pages;
+}
+
+static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg)
+{
+ KBASE_DEBUG_ASSERT(reg);
+ KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+ KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+ return reg->gpu_alloc->pages;
+}
+
+static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg)
+{
+ KBASE_DEBUG_ASSERT(reg);
+ /* if no alloc object the backed size naturally is 0 */
+ if (!reg->cpu_alloc)
+ return 0;
+
+ KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+ KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+ KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents);
+
+ return reg->cpu_alloc->nents;
+}
+
+#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */
+
+static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type)
+{
+ struct kbase_mem_phy_alloc *alloc;
+ const size_t alloc_size =
+ sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages;
+
+ /* Prevent nr_pages*sizeof + sizeof(*alloc) from wrapping around. */
+ if (nr_pages > ((((size_t) -1) - sizeof(*alloc))
+ / sizeof(*alloc->pages)))
+ return ERR_PTR(-ENOMEM);
+
+ /* Allocate based on the size to reduce internal fragmentation of vmem */
+ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+ alloc = vzalloc(alloc_size);
+ else
+ alloc = kzalloc(alloc_size, GFP_KERNEL);
+
+ if (!alloc)
+ return ERR_PTR(-ENOMEM);
+
+ /* Store allocation method */
+ if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD)
+ alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE;
+
+ kref_init(&alloc->kref);
+ atomic_set(&alloc->gpu_mappings, 0);
+ alloc->nents = 0;
+ alloc->pages = (void *)(alloc + 1);
+ INIT_LIST_HEAD(&alloc->mappings);
+ alloc->type = type;
+
+ return alloc;
+}
+
+static inline int kbase_reg_prepare_native(struct kbase_va_region *reg,
+ struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(reg);
+ KBASE_DEBUG_ASSERT(!reg->cpu_alloc);
+ KBASE_DEBUG_ASSERT(!reg->gpu_alloc);
+ KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE);
+
+ reg->cpu_alloc = kbase_alloc_create(reg->nr_pages,
+ KBASE_MEM_TYPE_NATIVE);
+ if (IS_ERR(reg->cpu_alloc))
+ return PTR_ERR(reg->cpu_alloc);
+ else if (!reg->cpu_alloc)
+ return -ENOMEM;
+ reg->cpu_alloc->imported.kctx = kctx;
+ if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) {
+ reg->gpu_alloc = kbase_alloc_create(reg->nr_pages,
+ KBASE_MEM_TYPE_NATIVE);
+ reg->gpu_alloc->imported.kctx = kctx;
+ } else {
+ reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+ }
+
+ reg->flags &= ~KBASE_REG_FREE;
+ return 0;
+}
+
+static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages)
+{
+ int new_val = atomic_add_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+ kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+ return new_val;
+}
+
+static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages)
+{
+ int new_val = atomic_sub_return(num_pages, used_pages);
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+ kbase_trace_mali_total_alloc_pages_change((long long int)new_val);
+#endif
+ return new_val;
+}
+
+/**
+ * @brief Initialize low-level memory access for a kbase device
+ *
+ * Performs any low-level setup needed for a kbase device to access memory on
+ * the device.
+ *
+ * @param kbdev kbase device to initialize memory access for
+ * @return 0 on success, Linux error code on failure
+ */
+int kbase_mem_lowlevel_init(struct kbase_device *kbdev);
+
+
+/**
+ * @brief Terminate low-level memory access for a kbase device
+ *
+ * Perform any low-level cleanup needed to clean
+ * after @ref kbase_mem_lowlevel_init
+ *
+ * @param kbdev kbase device to clean up for
+ */
+void kbase_mem_lowlevel_term(struct kbase_device *kbdev);
+
+/**
+ * @brief Initialize an OS based memory allocator.
+ *
+ * Initializes a allocator.
+ * Must be called before any allocation is attempted.
+ * \a kbase_mem_allocator_alloc and \a kbase_mem_allocator_free is used
+ * to allocate and free memory.
+ * \a kbase_mem_allocator_term must be called to clean up the allocator.
+ * All memory obtained via \a kbase_mem_allocator_alloc must have been
+ * \a kbase_mem_allocator_free before \a kbase_mem_allocator_term is called.
+ *
+ * @param allocator Allocator object to initialize
+ * @param max_size Maximum number of pages to keep on the freelist.
+ * @param kbdev The kbase device this allocator is used with
+ * @return 0 on success, an error code indicating what failed on
+ * error.
+ */
+int kbase_mem_allocator_init(struct kbase_mem_allocator *allocator,
+ unsigned int max_size,
+ struct kbase_device *kbdev);
+
+/**
+ * @brief Allocate memory via an OS based memory allocator.
+ *
+ * @param[in] allocator Allocator to obtain the memory from
+ * @param nr_pages Number of pages to allocate
+ * @param[out] pages Pointer to an array where the physical address of the allocated pages will be stored
+ * @return 0 if the pages were allocated, an error code indicating what failed on error
+ */
+int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages);
+
+/**
+ * @brief Free memory obtained for an OS based memory allocator.
+ *
+ * @param[in] allocator Allocator to free the memory back to
+ * @param nr_pages Number of pages to free
+ * @param[in] pages Pointer to an array holding the physical address of the paghes to free.
+ * @param[in] sync_back true case the memory should be synced back
+ */
+void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages, bool sync_back);
+
+/**
+ * @brief Terminate an OS based memory allocator.
+ *
+ * Frees all cached allocations and clean up internal state.
+ * All allocate pages must have been \a kbase_mem_allocator_free before
+ * this function is called.
+ *
+ * @param[in] allocator Allocator to terminate
+ */
+void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator);
+
+
+
+int kbase_region_tracker_init(struct kbase_context *kctx);
+void kbase_region_tracker_term(struct kbase_context *kctx);
+
+struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr);
+
+/**
+ * @brief Check that a pointer is actually a valid region.
+ *
+ * Must be called with context lock held.
+ */
+struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr);
+
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone);
+void kbase_free_alloced_region(struct kbase_va_region *reg);
+int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+bool kbase_check_alloc_flags(unsigned long flags);
+bool kbase_check_import_flags(unsigned long flags);
+void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags);
+
+void kbase_gpu_vm_lock(struct kbase_context *kctx);
+void kbase_gpu_vm_unlock(struct kbase_context *kctx);
+
+int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
+
+int kbase_mmu_init(struct kbase_context *kctx);
+void kbase_mmu_term(struct kbase_context *kctx);
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx);
+void kbase_mmu_free_pgd(struct kbase_context *kctx);
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+ phys_addr_t *phys, size_t nr,
+ unsigned long flags);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+ phys_addr_t phys, size_t nr,
+ unsigned long flags);
+
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr);
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags);
+
+/**
+ * @brief Register region and map it on the GPU.
+ *
+ * Call kbase_add_va_region() and map the region on the GPU.
+ */
+int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align);
+
+/**
+ * @brief Remove the region from the GPU and unregister it.
+ *
+ * Must be called with context lock held.
+ */
+int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_update(struct kbase_context *kctx);
+
+/**
+ * The caller has the following locking conditions:
+ * - It must hold kbase_as::transaction_mutex on kctx's address space
+ * - It must hold the kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_mmu_disable(struct kbase_context *kctx);
+
+/**
+ * kbase_mmu_disable_as() - set the MMU in unmapped mode for an address space.
+ *
+ * @kbdev: Kbase device
+ * @as_nr: Number of the address space for which the MMU
+ * should be set in unmapped mode.
+ *
+ * The caller must hold kbdev->as[as_nr].transaction_mutex.
+ */
+void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr);
+
+void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
+
+/** Dump the MMU tables to a buffer
+ *
+ * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the
+ * buffer is too small then the return value will be NULL.
+ *
+ * The GPU vm lock must be held when calling this function.
+ *
+ * The buffer returned should be freed with @ref vfree when it is no longer required.
+ *
+ * @param[in] kctx The kbase context to dump
+ * @param[in] nr_pages The number of pages to allocate for the buffer.
+ *
+ * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too
+ * small)
+ */
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages);
+
+int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset);
+void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa,
+ phys_addr_t gpu_pa, off_t offset, size_t size,
+ enum kbase_sync_type sync_fn);
+void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr);
+
+/**
+ * Set attributes for imported tmem region
+ *
+ * This function sets (extends with) requested attributes for given region
+ * of imported external memory
+ *
+ * @param[in] kctx The kbase context which the tmem belongs to
+ * @param[in] gpu_addr The base address of the tmem region
+ * @param[in] attributes The attributes of tmem region to be set
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_tmem_set_attributes(struct kbase_context *kctx, u64 gpu_addr, u32 attributes);
+
+/**
+ * Get attributes of imported tmem region
+ *
+ * This function retrieves the attributes of imported external memory
+ *
+ * @param[in] kctx The kbase context which the tmem belongs to
+ * @param[in] gpu_addr The base address of the tmem region
+ * @param[out] attributes The actual attributes of tmem region
+ *
+ * @return 0 on success. Any other value indicates failure.
+ */
+int kbase_tmem_get_attributes(struct kbase_context *kctx, u64 gpu_addr, u32 *const attributes);
+
+/* OS specific functions */
+int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr);
+int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg);
+void kbase_os_mem_map_lock(struct kbase_context *kctx);
+void kbase_os_mem_map_unlock(struct kbase_context *kctx);
+
+/**
+ * @brief Update the memory allocation counters for the current process
+ *
+ * OS specific call to updates the current memory allocation counters for the current process with
+ * the supplied delta.
+ *
+ * @param[in] kctx The kbase context
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
+
+/**
+ * @brief Add to the memory allocation counters for the current process
+ *
+ * OS specific call to add to the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
+{
+ kbasep_os_process_page_usage_update(kctx, pages);
+}
+
+/**
+ * @brief Subtract from the memory allocation counters for the current process
+ *
+ * OS specific call to subtract from the current memory allocation counters for the current process by
+ * the supplied amount.
+ *
+ * @param[in] kctx The kernel base context used for the allocation.
+ * @param[in] pages The desired delta to apply to the memory usage counters.
+ */
+
+static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
+{
+ kbasep_os_process_page_usage_update(kctx, 0 - pages);
+}
+
+/**
+ * @brief Find the offset of the CPU mapping of a memory allocation containing
+ * a given address range
+ *
+ * Searches for a CPU mapping of any part of the region starting at @p gpu_addr
+ * that fully encloses the CPU virtual address range specified by @p uaddr and
+ * @p size. Returns a failure indication if only part of the address range lies
+ * within a CPU mapping, or the address range lies within a CPU mapping of a
+ * different region.
+ *
+ * @param[in,out] kctx The kernel base context used for the allocation.
+ * @param[in] gpu_addr GPU address of the start of the allocated region
+ * within which to search.
+ * @param[in] uaddr Start of the CPU virtual address range.
+ * @param[in] size Size of the CPU virtual address range (in bytes).
+ * @param[out] offset The offset from the start of the allocation to the
+ * specified CPU virtual address.
+ *
+ * @return 0 if offset was obtained successfully. Error code
+ * otherwise.
+ */
+int kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context *kctx,
+ u64 gpu_addr,
+ unsigned long uaddr,
+ size_t size,
+ u64 *offset);
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer);
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
+
+/**
+* @brief Allocates physical pages.
+*
+* Allocates \a nr_pages_requested and updates the alloc object.
+*
+* @param[in] alloc allocation object to add pages to
+* @param[in] nr_pages_requested number of physical pages to allocate
+*
+* @return 0 if all pages have been successfully allocated. Error code otherwise
+*/
+int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested);
+
+/**
+* @brief Free physical pages.
+*
+* Frees \a nr_pages and updates the alloc object.
+*
+* @param[in] alloc allocation object to free pages from
+* @param[in] nr_pages_to_free number of physical pages to free
+*/
+int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
+
+static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr)
+{
+ SetPagePrivate(p);
+ if (sizeof(dma_addr_t) > sizeof(p->private)) {
+ /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the
+ * private filed stays the same. So we have to be clever and
+ * use the fact that we only store DMA addresses of whole pages,
+ * so the low bits should be zero */
+ KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1)));
+ set_page_private(p, dma_addr >> PAGE_SHIFT);
+ } else {
+ set_page_private(p, dma_addr);
+ }
+}
+
+static inline dma_addr_t kbase_dma_addr(struct page *p)
+{
+ if (sizeof(dma_addr_t) > sizeof(p->private))
+ return ((dma_addr_t)page_private(p)) << PAGE_SHIFT;
+
+ return (dma_addr_t)page_private(p);
+}
+
+/**
+* @brief Process a bus or page fault.
+*
+* This function will process a fault on a specific address space
+*
+* @param[in] kbdev The @ref kbase_device the fault happened on
+* @param[in] kctx The @ref kbase_context for the faulting address space if
+* one was found.
+* @param[in] as The address space that has the fault
+*/
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
+ struct kbase_context *kctx, struct kbase_as *as);
+
+/**
+ * @brief Process a page fault.
+ *
+ * @param[in] data work_struct passed by queue_work()
+ */
+void page_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Process a bus fault.
+ *
+ * @param[in] data work_struct passed by queue_work()
+ */
+void bus_fault_worker(struct work_struct *data);
+
+/**
+ * @brief Flush MMU workqueues.
+ *
+ * This function will cause any outstanding page or bus faults to be processed.
+ * It should be called prior to powering off the GPU.
+ *
+ * @param[in] kbdev Device pointer
+ */
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev);
+
+/**
+ * kbase_sync_single_for_device - update physical memory and give GPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir: DMA data direction
+ */
+
+void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir);
+
+/**
+ * kbase_sync_single_for_cpu - update physical memory and give CPU ownership
+ * @kbdev: Device pointer
+ * @handle: DMA address of region
+ * @size: Size of region to sync
+ * @dir: DMA data direction
+ */
+
+void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle,
+ size_t size, enum dma_data_direction dir);
+
+#endif /* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c
new file mode 100644
index 00000000000..642fa44bf35
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c
@@ -0,0 +1,292 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#include <mali_kbase.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/mempool.h>
+#include <linux/mm.h>
+#include <linux/atomic.h>
+#include <linux/version.h>
+
+int kbase_mem_lowlevel_init(struct kbase_device *kbdev)
+{
+ return 0;
+}
+
+void kbase_mem_lowlevel_term(struct kbase_device *kbdev)
+{
+}
+
+static unsigned long kbase_mem_allocator_count(struct shrinker *s,
+ struct shrink_control *sc)
+{
+ struct kbase_mem_allocator *allocator;
+
+ allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer);
+ return atomic_read(&allocator->free_list_size);
+}
+
+static unsigned long kbase_mem_allocator_scan(struct shrinker *s,
+ struct shrink_control *sc)
+{
+ struct kbase_mem_allocator *allocator;
+ int i;
+ int freed;
+
+ allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer);
+
+ might_sleep();
+
+ mutex_lock(&allocator->free_list_lock);
+ i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan);
+ freed = i;
+
+ atomic_sub(i, &allocator->free_list_size);
+
+ while (i--) {
+ struct page *p;
+
+ BUG_ON(list_empty(&allocator->free_list_head));
+ p = list_first_entry(&allocator->free_list_head,
+ struct page, lru);
+ list_del(&p->lru);
+ ClearPagePrivate(p);
+ __free_page(p);
+ }
+ mutex_unlock(&allocator->free_list_lock);
+ return atomic_read(&allocator->free_list_size);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+static int kbase_mem_allocator_shrink(struct shrinker *s,
+ struct shrink_control *sc)
+{
+ if (sc->nr_to_scan == 0)
+ return kbase_mem_allocator_count(s, sc);
+
+ return kbase_mem_allocator_scan(s, sc);
+}
+#endif
+
+int kbase_mem_allocator_init(struct kbase_mem_allocator *const allocator,
+ unsigned int max_size, struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(NULL != allocator);
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ INIT_LIST_HEAD(&allocator->free_list_head);
+
+ allocator->kbdev = kbdev;
+
+ mutex_init(&allocator->free_list_lock);
+
+ atomic_set(&allocator->free_list_size, 0);
+
+ allocator->free_list_max_size = max_size;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
+ allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink;
+#else
+ allocator->free_list_reclaimer.count_objects =
+ kbase_mem_allocator_count;
+ allocator->free_list_reclaimer.scan_objects = kbase_mem_allocator_scan;
+#endif
+ allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS;
+ /* Kernel versions prior to 3.1 :
+ * struct shrinker does not define batch */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0)
+ allocator->free_list_reclaimer.batch = 0;
+#endif
+
+ register_shrinker(&allocator->free_list_reclaimer);
+
+ return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_init);
+
+void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator)
+{
+ KBASE_DEBUG_ASSERT(NULL != allocator);
+
+ unregister_shrinker(&allocator->free_list_reclaimer);
+ mutex_lock(&allocator->free_list_lock);
+ while (!list_empty(&allocator->free_list_head)) {
+ struct page *p;
+
+ p = list_first_entry(&allocator->free_list_head, struct page,
+ lru);
+ list_del(&p->lru);
+ dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ ClearPagePrivate(p);
+ __free_page(p);
+ }
+ atomic_set(&allocator->free_list_size, 0);
+ mutex_unlock(&allocator->free_list_lock);
+ mutex_destroy(&allocator->free_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_term);
+
+int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages)
+{
+ struct page *p;
+ int i;
+ int num_from_free_list;
+ struct list_head from_free_list = LIST_HEAD_INIT(from_free_list);
+ gfp_t gfp;
+
+ might_sleep();
+
+ KBASE_DEBUG_ASSERT(NULL != allocator);
+
+ /* take from the free list first */
+ mutex_lock(&allocator->free_list_lock);
+ num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size));
+ atomic_sub(num_from_free_list, &allocator->free_list_size);
+ for (i = 0; i < num_from_free_list; i++) {
+ BUG_ON(list_empty(&allocator->free_list_head));
+ p = list_first_entry(&allocator->free_list_head, struct page, lru);
+ list_move(&p->lru, &from_free_list);
+ }
+ mutex_unlock(&allocator->free_list_lock);
+ i = 0;
+
+ /* Allocate as many pages from the pool of already allocated pages. */
+ list_for_each_entry(p, &from_free_list, lru) {
+ pages[i] = PFN_PHYS(page_to_pfn(p));
+ i++;
+ }
+
+ if (i == nr_pages)
+ return 0;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+ /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+ gfp = GFP_USER | __GFP_ZERO;
+#else
+ gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+ if (current->flags & PF_KTHREAD) {
+ /* Don't trigger OOM killer from kernel threads, e.g. when
+ * growing memory on GPU page fault */
+ gfp |= __GFP_NORETRY;
+ }
+
+ /* If not all pages were sourced from the pool, request new ones. */
+ for (; i < nr_pages; i++) {
+ dma_addr_t dma_addr;
+
+ p = alloc_page(gfp);
+ if (NULL == p)
+ goto err_out_roll_back;
+
+ dma_addr = dma_map_page(allocator->kbdev->dev, p, 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(allocator->kbdev->dev, dma_addr)) {
+ __free_page(p);
+ goto err_out_roll_back;
+ }
+
+ kbase_set_dma_addr(p, dma_addr);
+ pages[i] = PFN_PHYS(page_to_pfn(p));
+ BUG_ON(dma_addr != pages[i]);
+ }
+
+ return 0;
+
+err_out_roll_back:
+ while (i--) {
+ p = pfn_to_page(PFN_DOWN(pages[i]));
+ pages[i] = (phys_addr_t)0;
+ dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ ClearPagePrivate(p);
+ __free_page(p);
+ }
+
+ return -ENOMEM;
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_alloc);
+
+void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages, bool sync_back)
+{
+ int i = 0;
+ int page_count = 0;
+ int tofree;
+
+ LIST_HEAD(new_free_list_items);
+
+ KBASE_DEBUG_ASSERT(NULL != allocator);
+
+ might_sleep();
+
+ /* Starting by just freeing the overspill.
+ * As we do this outside of the lock we might spill too many pages
+ * or get too many on the free list, but the max_size is just a ballpark so it is ok
+ * providing that tofree doesn't exceed nr_pages
+ */
+ tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size), 0);
+ tofree = nr_pages - MIN(tofree, nr_pages);
+ for (; i < tofree; i++) {
+ if (likely(0 != pages[i])) {
+ struct page *p;
+
+ p = pfn_to_page(PFN_DOWN(pages[i]));
+ dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ ClearPagePrivate(p);
+ pages[i] = (phys_addr_t)0;
+ __free_page(p);
+ }
+ }
+
+ for (; i < nr_pages; i++) {
+ if (likely(0 != pages[i])) {
+ struct page *p;
+
+ p = pfn_to_page(PFN_DOWN(pages[i]));
+ pages[i] = (phys_addr_t)0;
+ /* Sync back the memory to ensure that future cache
+ * invalidations don't trample on memory.
+ */
+ if (sync_back)
+ dma_sync_single_for_cpu(allocator->kbdev->dev,
+ kbase_dma_addr(p),
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ list_add(&p->lru, &new_free_list_items);
+ page_count++;
+ }
+ }
+ mutex_lock(&allocator->free_list_lock);
+ list_splice(&new_free_list_items, &allocator->free_list_head);
+ atomic_add(page_count, &allocator->free_list_size);
+ mutex_unlock(&allocator->free_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_free);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h
new file mode 100644
index 00000000000..ab7b25953f2
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h
@@ -0,0 +1,32 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/atomic.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+
+/* raw page handling */
+struct kbase_mem_allocator {
+ struct kbase_device *kbdev;
+ atomic_t free_list_size;
+ unsigned int free_list_max_size;
+ struct mutex free_list_lock;
+ struct list_head free_list_head;
+ struct shrinker free_list_reclaimer;
+};
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c
new file mode 100644
index 00000000000..df9761b1e2e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c
@@ -0,0 +1,402 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem.c
+ * Base kernel memory APIs
+ */
+#include <mali_kbase.h>
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
+#include <linux/mempool.h>
+#include <linux/mm.h>
+#include <linux/atomic.h>
+#include <linux/debugfs.h>
+#include <linux/memblock.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+
+
+/* This code does not support having multiple kbase devices, or rmmod/insmod */
+
+static unsigned long kbase_carveout_start_pfn = ~0UL;
+static unsigned long kbase_carveout_end_pfn;
+static LIST_HEAD(kbase_carveout_free_list);
+static DEFINE_MUTEX(kbase_carveout_free_list_lock);
+static unsigned int kbase_carveout_pages;
+static atomic_t kbase_carveout_used_pages;
+static atomic_t kbase_carveout_system_pages;
+
+static struct page *kbase_carveout_get_page(struct kbase_mem_allocator *allocator)
+{
+ struct page *p = NULL;
+ gfp_t gfp;
+
+ mutex_lock(&kbase_carveout_free_list_lock);
+ if (!list_empty(&kbase_carveout_free_list)) {
+ p = list_first_entry(&kbase_carveout_free_list, struct page, lru);
+ list_del(&p->lru);
+ atomic_inc(&kbase_carveout_used_pages);
+ }
+ mutex_unlock(&kbase_carveout_free_list_lock);
+
+ if (!p) {
+ dma_addr_t dma_addr;
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+ /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+ gfp = GFP_USER | __GFP_ZERO;
+#else
+ gfp = GFP_HIGHUSER | __GFP_ZERO;
+#endif
+
+ if (current->flags & PF_KTHREAD) {
+ /* Don't trigger OOM killer from kernel threads, e.g.
+ * when growing memory on GPU page fault */
+ gfp |= __GFP_NORETRY;
+ }
+
+ p = alloc_page(gfp);
+ if (!p)
+ goto out;
+
+ dma_addr = dma_map_page(allocator->kbdev->dev, p, 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(allocator->kbdev->dev, dma_addr)) {
+ __free_page(p);
+ p = NULL;
+ goto out;
+ }
+
+ kbase_set_dma_addr(p, dma_addr);
+ BUG_ON(dma_addr != PFN_PHYS(page_to_pfn(p)));
+ atomic_inc(&kbase_carveout_system_pages);
+ }
+out:
+ return p;
+}
+
+static void kbase_carveout_put_page(struct page *p,
+ struct kbase_mem_allocator *allocator)
+{
+ if (page_to_pfn(p) >= kbase_carveout_start_pfn &&
+ page_to_pfn(p) <= kbase_carveout_end_pfn) {
+ mutex_lock(&kbase_carveout_free_list_lock);
+ list_add(&p->lru, &kbase_carveout_free_list);
+ atomic_dec(&kbase_carveout_used_pages);
+ mutex_unlock(&kbase_carveout_free_list_lock);
+ } else {
+ dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p),
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ ClearPagePrivate(p);
+ __free_page(p);
+ atomic_dec(&kbase_carveout_system_pages);
+ }
+}
+
+static int kbase_carveout_seq_show(struct seq_file *s, void *data)
+{
+ seq_printf(s, "carveout pages: %u\n", kbase_carveout_pages);
+ seq_printf(s, "used carveout pages: %u\n",
+ atomic_read(&kbase_carveout_used_pages));
+ seq_printf(s, "used system pages: %u\n",
+ atomic_read(&kbase_carveout_system_pages));
+ return 0;
+}
+
+static int kbasep_carveout_debugfs_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, kbase_carveout_seq_show, NULL);
+}
+
+static const struct file_operations kbase_carveout_debugfs_fops = {
+ .open = kbasep_carveout_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+static int kbase_carveout_init(struct device *dev)
+{
+ unsigned long pfn;
+ static int once;
+
+ mutex_lock(&kbase_carveout_free_list_lock);
+ BUG_ON(once);
+ once = 1;
+
+ for (pfn = kbase_carveout_start_pfn; pfn <= kbase_carveout_end_pfn; pfn++) {
+ struct page *p = pfn_to_page(pfn);
+ dma_addr_t dma_addr;
+
+ dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(dev, dma_addr))
+ goto out_rollback;
+
+ kbase_set_dma_addr(p, dma_addr);
+ BUG_ON(dma_addr != PFN_PHYS(page_to_pfn(p)));
+
+ list_add_tail(&p->lru, &kbase_carveout_free_list);
+ }
+
+ mutex_unlock(&kbase_carveout_free_list_lock);
+
+ debugfs_create_file("kbase_carveout", S_IRUGO, NULL, NULL,
+ &kbase_carveout_debugfs_fops);
+
+ return 0;
+
+out_rollback:
+ while (!list_empty(&kbase_carveout_free_list)) {
+ struct page *p;
+
+ p = list_first_entry(&kbase_carveout_free_list, struct page, lru);
+ dma_unmap_page(dev, kbase_dma_addr(p),
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ ClearPagePrivate(p);
+ list_del(&p->lru);
+ }
+
+ mutex_unlock(&kbase_carveout_free_list_lock);
+ return -ENOMEM;
+}
+
+int __init kbase_carveout_mem_reserve(phys_addr_t size)
+{
+ phys_addr_t mem;
+
+#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) \
+ && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+ /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */
+ mem = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ACCESSIBLE);
+#else
+ mem = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE);
+#endif
+ if (mem == 0) {
+ pr_warn("%s: Failed to allocate %d for kbase carveout\n",
+ __func__, size);
+ return -ENOMEM;
+ }
+
+ kbase_carveout_start_pfn = PFN_DOWN(mem);
+ kbase_carveout_end_pfn = PFN_DOWN(mem + size - 1);
+ kbase_carveout_pages = kbase_carveout_end_pfn - kbase_carveout_start_pfn + 1;
+
+ return 0;
+}
+
+int kbase_mem_lowlevel_init(struct kbase_device *kbdev)
+{
+ return kbase_carveout_init(kbdev->dev);
+}
+
+void kbase_mem_lowlevel_term(struct kbase_device *kbdev)
+{
+}
+
+static int kbase_mem_allocator_shrink(struct shrinker *s, struct shrink_control *sc)
+{
+ struct kbase_mem_allocator *allocator;
+ int i;
+ int freed;
+
+ allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer);
+
+ if (sc->nr_to_scan == 0)
+ return atomic_read(&allocator->free_list_size);
+
+ might_sleep();
+
+ mutex_lock(&allocator->free_list_lock);
+ i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan);
+ freed = i;
+
+ atomic_sub(i, &allocator->free_list_size);
+
+ while (i--) {
+ struct page *p;
+
+ BUG_ON(list_empty(&allocator->free_list_head));
+ p = list_first_entry(&allocator->free_list_head, struct page, lru);
+ list_del(&p->lru);
+ kbase_carveout_put_page(p, allocator);
+ }
+ mutex_unlock(&allocator->free_list_lock);
+ return atomic_read(&allocator->free_list_size);
+}
+
+int kbase_mem_allocator_init(struct kbase_mem_allocator * const allocator,
+ unsigned int max_size, struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(NULL != allocator);
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ INIT_LIST_HEAD(&allocator->free_list_head);
+
+ allocator->kbdev = kbdev;
+
+ mutex_init(&allocator->free_list_lock);
+
+ atomic_set(&allocator->free_list_size, 0);
+
+ allocator->free_list_max_size = max_size;
+ allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink;
+ allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) /* Kernel versions prior to 3.1 : struct shrinker does not define batch */
+ allocator->free_list_reclaimer.batch = 0;
+#endif
+
+ register_shrinker(&allocator->free_list_reclaimer);
+
+ return 0;
+}
+
+void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator)
+{
+ KBASE_DEBUG_ASSERT(NULL != allocator);
+
+ unregister_shrinker(&allocator->free_list_reclaimer);
+
+ while (!list_empty(&allocator->free_list_head)) {
+ struct page *p;
+
+ p = list_first_entry(&allocator->free_list_head, struct page,
+ lru);
+ list_del(&p->lru);
+
+ kbase_carveout_put_page(p, allocator);
+ }
+ mutex_destroy(&allocator->free_list_lock);
+}
+
+
+int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages)
+{
+ struct page *p;
+ int i;
+ int num_from_free_list;
+ struct list_head from_free_list = LIST_HEAD_INIT(from_free_list);
+
+ might_sleep();
+
+ KBASE_DEBUG_ASSERT(NULL != allocator);
+
+ /* take from the free list first */
+ mutex_lock(&allocator->free_list_lock);
+ num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size));
+ atomic_sub(num_from_free_list, &allocator->free_list_size);
+ for (i = 0; i < num_from_free_list; i++) {
+ BUG_ON(list_empty(&allocator->free_list_head));
+ p = list_first_entry(&allocator->free_list_head, struct page, lru);
+ list_move(&p->lru, &from_free_list);
+ }
+ mutex_unlock(&allocator->free_list_lock);
+ i = 0;
+
+ /* Allocate as many pages from the pool of already allocated pages. */
+ list_for_each_entry(p, &from_free_list, lru) {
+ pages[i] = PFN_PHYS(page_to_pfn(p));
+ i++;
+ }
+
+ if (i == nr_pages)
+ return 0;
+
+ /* If not all pages were sourced from the pool, request new ones. */
+ for (; i < nr_pages; i++) {
+ p = kbase_carveout_get_page(allocator);
+ if (NULL == p)
+ goto err_out_roll_back;
+
+ kbase_sync_single_for_device(allocator->kbdev,
+ kbase_dma_addr(p),
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+
+ pages[i] = PFN_PHYS(page_to_pfn(p));
+ }
+
+ return 0;
+
+err_out_roll_back:
+ while (i--) {
+ struct page *p;
+
+ p = pfn_to_page(PFN_DOWN(pages[i]));
+ pages[i] = (phys_addr_t)0;
+ kbase_carveout_put_page(p, allocator);
+ }
+
+ return -ENOMEM;
+}
+
+void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, u32 nr_pages, phys_addr_t *pages, bool sync_back)
+{
+ int i = 0;
+ int page_count = 0;
+ int tofree;
+
+ LIST_HEAD(new_free_list_items);
+
+ KBASE_DEBUG_ASSERT(NULL != allocator);
+
+ might_sleep();
+
+ /* Starting by just freeing the overspill.
+ * As we do this outside of the lock we might spill too many pages
+ * or get too many on the free list, but the max_size is just a ballpark so it is ok
+ * providing that tofree doesn't exceed nr_pages
+ */
+ tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size), 0);
+ tofree = nr_pages - MIN(tofree, nr_pages);
+ for (; i < tofree; i++) {
+ if (likely(0 != pages[i])) {
+ struct page *p;
+
+ p = pfn_to_page(PFN_DOWN(pages[i]));
+ pages[i] = (phys_addr_t)0;
+ kbase_carveout_put_page(p, allocator);
+ }
+ }
+
+ for (; i < nr_pages; i++) {
+ if (likely(0 != pages[i])) {
+ struct page *p;
+
+ p = pfn_to_page(PFN_DOWN(pages[i]));
+ pages[i] = (phys_addr_t)0;
+ /* Sync back the memory to ensure that future cache
+ * invalidations don't trample on memory.
+ */
+ if (sync_back)
+ kbase_sync_single_for_cpu(allocator->kbdev,
+ kbase_dma_addr(p),
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ list_add(&p->lru, &new_free_list_items);
+ page_count++;
+ }
+ }
+ mutex_lock(&allocator->free_list_lock);
+ list_splice(&new_free_list_items, &allocator->free_list_head);
+ atomic_add(page_count, &allocator->free_list_size);
+ mutex_unlock(&allocator->free_list_lock);
+}
+KBASE_EXPORT_TEST_API(kbase_mem_allocator_free);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
new file mode 100644
index 00000000000..1ddfe5c9737
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c
@@ -0,0 +1,1909 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.c
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#include <linux/compat.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/dma-mapping.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+ #include <linux/dma-attrs.h>
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+#include <linux/dma-buf.h>
+#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_hwaccess_time.h>
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma);
+static const struct vm_operations_struct kbase_vm_ops;
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment)
+{
+ int zone;
+ int gpu_pc_bits;
+ int cpu_va_bits;
+ struct kbase_va_region *reg;
+ struct device *dev;
+
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(flags);
+ KBASE_DEBUG_ASSERT(gpu_va);
+ KBASE_DEBUG_ASSERT(va_alignment);
+
+ dev = kctx->kbdev->dev;
+ *va_alignment = 0; /* no alignment by default */
+ *gpu_va = 0; /* return 0 on failure */
+
+ gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+ cpu_va_bits = BITS_PER_LONG;
+
+ if (0 == va_pages) {
+ dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!");
+ goto bad_size;
+ }
+
+ if (va_pages > (U64_MAX / PAGE_SIZE))
+ /* 64-bit address range is the max */
+ goto bad_size;
+
+#if defined(CONFIG_64BIT)
+ if (kctx->is_compat)
+ cpu_va_bits = 32;
+ else
+ /* force SAME_VA if a 64-bit client */
+ *flags |= BASE_MEM_SAME_VA;
+#endif
+
+ if (!kbase_check_alloc_flags(*flags)) {
+ dev_warn(dev,
+ "kbase_mem_alloc called with bad flags (%llx)",
+ (unsigned long long)*flags);
+ goto bad_flags;
+ }
+
+ if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
+ kctx->kbdev->system_coherency != COHERENCY_ACE) {
+ dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+ goto bad_flags;
+ }
+ if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
+ kctx->kbdev->system_coherency != COHERENCY_ACE) {
+ /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
+ *flags &= ~BASE_MEM_COHERENT_SYSTEM;
+ }
+
+ /* Limit GPU executable allocs to GPU PC size */
+ if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+ (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT)))
+ goto bad_ex_size;
+
+ /* find out which VA zone to use */
+ if (*flags & BASE_MEM_SAME_VA)
+ zone = KBASE_REG_ZONE_SAME_VA;
+ else if (*flags & BASE_MEM_PROT_GPU_EX)
+ zone = KBASE_REG_ZONE_EXEC;
+ else
+ zone = KBASE_REG_ZONE_CUSTOM_VA;
+
+ reg = kbase_alloc_free_region(kctx, 0, va_pages, zone);
+ if (!reg) {
+ dev_err(dev, "Failed to allocate free region");
+ goto no_region;
+ }
+
+ kbase_update_region_flags(reg, *flags);
+
+ if (kbase_reg_prepare_native(reg, kctx) != 0) {
+ dev_err(dev, "Failed to prepare region");
+ goto prepare_failed;
+ }
+
+ if (*flags & BASE_MEM_GROW_ON_GPF)
+ reg->extent = extent;
+ else
+ reg->extent = 0;
+
+ if (kbase_alloc_phy_pages(reg, va_pages, commit_pages)) {
+ dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)",
+ (unsigned long long)commit_pages,
+ (unsigned long long)va_pages);
+ goto no_mem;
+ }
+
+ kbase_gpu_vm_lock(kctx);
+
+ /* mmap needed to setup VA? */
+ if (*flags & BASE_MEM_SAME_VA) {
+ /* Bind to a cookie */
+ if (!kctx->cookies) {
+ dev_err(dev, "No cookies available for allocation!");
+ goto no_cookie;
+ }
+ /* return a cookie */
+ *gpu_va = __ffs(kctx->cookies);
+ kctx->cookies &= ~(1UL << *gpu_va);
+ BUG_ON(kctx->pending_regions[*gpu_va]);
+ kctx->pending_regions[*gpu_va] = reg;
+
+ /* relocate to correct base */
+ *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+ *gpu_va <<= PAGE_SHIFT;
+
+ /* See if we must align memory due to GPU PC bits vs CPU VA */
+ if ((*flags & BASE_MEM_PROT_GPU_EX) &&
+ (cpu_va_bits > gpu_pc_bits)) {
+ *va_alignment = gpu_pc_bits;
+ reg->flags |= KBASE_REG_ALIGNED;
+ }
+ } else /* we control the VA */ {
+ if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) {
+ dev_warn(dev, "Failed to map memory on GPU");
+ goto no_mmap;
+ }
+ /* return real GPU VA */
+ *gpu_va = reg->start_pfn << PAGE_SHIFT;
+ }
+
+ kbase_gpu_vm_unlock(kctx);
+ return reg;
+
+no_mmap:
+no_cookie:
+ kbase_gpu_vm_unlock(kctx);
+no_mem:
+ kbase_mem_phy_alloc_put(reg->cpu_alloc);
+ kbase_mem_phy_alloc_put(reg->gpu_alloc);
+prepare_failed:
+ kfree(reg);
+no_region:
+bad_ex_size:
+bad_flags:
+bad_size:
+ return NULL;
+}
+
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out)
+{
+ struct kbase_va_region *reg;
+ int ret = -EINVAL;
+
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(out);
+
+ kbase_gpu_vm_lock(kctx);
+
+ /* Validate the region */
+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+ if (!reg || (reg->flags & KBASE_REG_FREE))
+ goto out_unlock;
+
+ switch (query) {
+ case KBASE_MEM_QUERY_COMMIT_SIZE:
+ if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) {
+ *out = kbase_reg_current_backed_size(reg);
+ } else {
+ size_t i;
+ struct kbase_aliased *aliased;
+ *out = 0;
+ aliased = reg->cpu_alloc->imported.alias.aliased;
+ for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++)
+ *out += aliased[i].length;
+ }
+ break;
+ case KBASE_MEM_QUERY_VA_SIZE:
+ *out = reg->nr_pages;
+ break;
+ case KBASE_MEM_QUERY_FLAGS:
+ {
+ *out = 0;
+ if (KBASE_REG_CPU_WR & reg->flags)
+ *out |= BASE_MEM_PROT_CPU_WR;
+ if (KBASE_REG_CPU_RD & reg->flags)
+ *out |= BASE_MEM_PROT_CPU_RD;
+ if (KBASE_REG_CPU_CACHED & reg->flags)
+ *out |= BASE_MEM_CACHED_CPU;
+ if (KBASE_REG_GPU_WR & reg->flags)
+ *out |= BASE_MEM_PROT_GPU_WR;
+ if (KBASE_REG_GPU_RD & reg->flags)
+ *out |= BASE_MEM_PROT_GPU_RD;
+ if (!(KBASE_REG_GPU_NX & reg->flags))
+ *out |= BASE_MEM_PROT_GPU_EX;
+ if (KBASE_REG_SHARE_BOTH & reg->flags)
+ *out |= BASE_MEM_COHERENT_SYSTEM;
+ if (KBASE_REG_SHARE_IN & reg->flags)
+ *out |= BASE_MEM_COHERENT_LOCAL;
+ break;
+ }
+ default:
+ *out = 0;
+ goto out_unlock;
+ }
+
+ ret = 0;
+
+out_unlock:
+ kbase_gpu_vm_unlock(kctx);
+ return ret;
+}
+
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask)
+{
+ struct kbase_va_region *reg;
+ int ret = -EINVAL;
+ unsigned int real_flags = 0;
+ unsigned int prev_flags = 0;
+
+ KBASE_DEBUG_ASSERT(kctx);
+
+ if (!gpu_addr)
+ return -EINVAL;
+
+ /* nuke other bits */
+ flags &= mask;
+
+ /* check for only supported flags */
+ if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL))
+ goto out;
+
+ /* mask covers bits we don't support? */
+ if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL))
+ goto out;
+
+ /* convert flags */
+ if (BASE_MEM_COHERENT_SYSTEM & flags)
+ real_flags |= KBASE_REG_SHARE_BOTH;
+ else if (BASE_MEM_COHERENT_LOCAL & flags)
+ real_flags |= KBASE_REG_SHARE_IN;
+
+ /* now we can lock down the context, and find the region */
+ kbase_gpu_vm_lock(kctx);
+
+ /* Validate the region */
+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+ if (!reg || (reg->flags & KBASE_REG_FREE))
+ goto out_unlock;
+
+ /* limit to imported memory */
+ if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) &&
+ (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM))
+ goto out_unlock;
+
+ /* no change? */
+ if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) {
+ ret = 0;
+ goto out_unlock;
+ }
+
+ /* save for roll back */
+ prev_flags = reg->flags;
+ reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
+ reg->flags |= real_flags;
+
+ /* Currently supporting only imported memory */
+ switch (reg->gpu_alloc->type) {
+#ifdef CONFIG_UMP
+ case KBASE_MEM_TYPE_IMPORTED_UMP:
+ ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags);
+ break;
+#endif
+#ifdef CONFIG_DMA_SHARED_BUFFER
+ case KBASE_MEM_TYPE_IMPORTED_UMM:
+ /* Future use will use the new flags, existing mapping will NOT be updated
+ * as memory should not be in use by the GPU when updating the flags.
+ */
+ ret = 0;
+ WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count);
+ break;
+#endif
+ default:
+ break;
+ }
+
+ /* roll back on error, i.e. not UMP */
+ if (ret)
+ reg->flags = prev_flags;
+
+out_unlock:
+ kbase_gpu_vm_unlock(kctx);
+out:
+ return ret;
+}
+
+#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS)
+
+#ifdef CONFIG_UMP
+static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags)
+{
+ struct kbase_va_region *reg;
+ ump_dd_handle umph;
+ u64 block_count;
+ const ump_dd_physical_block_64 *block_array;
+ u64 i, j;
+ int page = 0;
+ ump_alloc_flags ump_flags;
+ ump_alloc_flags cpu_flags;
+ ump_alloc_flags gpu_flags;
+
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(va_pages);
+ KBASE_DEBUG_ASSERT(flags);
+
+ if (*flags & BASE_MEM_SECURE)
+ goto bad_flags;
+
+ umph = ump_dd_from_secure_id(id);
+ if (UMP_DD_INVALID_MEMORY_HANDLE == umph)
+ goto bad_id;
+
+ ump_flags = ump_dd_allocation_flags_get(umph);
+ cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK;
+ gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) &
+ UMP_DEVICE_MASK;
+
+ *va_pages = ump_dd_size_get_64(umph);
+ *va_pages >>= PAGE_SHIFT;
+
+ if (!*va_pages)
+ goto bad_size;
+
+ if (*va_pages > (U64_MAX / PAGE_SIZE))
+ /* 64-bit address range is the max */
+ goto bad_size;
+
+ if (*flags & BASE_MEM_SAME_VA)
+ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+ else
+ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+
+ if (!reg)
+ goto no_region;
+
+ /* we've got pages to map now, and support SAME_VA */
+ *flags |= KBASE_MEM_IMPORT_HAVE_PAGES;
+
+ reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP);
+ if (IS_ERR_OR_NULL(reg->gpu_alloc))
+ goto no_alloc_obj;
+
+ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+ reg->gpu_alloc->imported.ump_handle = umph;
+
+ reg->flags &= ~KBASE_REG_FREE;
+ reg->flags |= KBASE_REG_GPU_NX; /* UMP is always No eXecute */
+ reg->flags &= ~KBASE_REG_GROWABLE; /* UMP cannot be grown */
+
+ /* Override import flags based on UMP flags */
+ *flags &= ~(BASE_MEM_CACHED_CPU);
+ *flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR);
+ *flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR);
+
+ if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+ (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) {
+ reg->flags |= KBASE_REG_CPU_CACHED;
+ *flags |= BASE_MEM_CACHED_CPU;
+ }
+
+ if (cpu_flags & UMP_PROT_CPU_WR) {
+ reg->flags |= KBASE_REG_CPU_WR;
+ *flags |= BASE_MEM_PROT_CPU_WR;
+ }
+
+ if (cpu_flags & UMP_PROT_CPU_RD) {
+ reg->flags |= KBASE_REG_CPU_RD;
+ *flags |= BASE_MEM_PROT_CPU_RD;
+ }
+
+ if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) ==
+ (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR))
+ reg->flags |= KBASE_REG_GPU_CACHED;
+
+ if (gpu_flags & UMP_PROT_DEVICE_WR) {
+ reg->flags |= KBASE_REG_GPU_WR;
+ *flags |= BASE_MEM_PROT_GPU_WR;
+ }
+
+ if (gpu_flags & UMP_PROT_DEVICE_RD) {
+ reg->flags |= KBASE_REG_GPU_RD;
+ *flags |= BASE_MEM_PROT_GPU_RD;
+ }
+
+ /* ump phys block query */
+ ump_dd_phys_blocks_get_64(umph, &block_count, &block_array);
+
+ for (i = 0; i < block_count; i++) {
+ for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) {
+ reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT);
+ page++;
+ }
+ }
+ reg->gpu_alloc->nents = *va_pages;
+ reg->extent = 0;
+
+ return reg;
+
+no_alloc_obj:
+ kfree(reg);
+no_region:
+bad_size:
+ ump_dd_release(umph);
+bad_id:
+bad_flags:
+ return NULL;
+}
+#endif /* CONFIG_UMP */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, u64 *flags)
+{
+ struct kbase_va_region *reg;
+ struct dma_buf *dma_buf;
+ struct dma_buf_attachment *dma_attachment;
+
+ dma_buf = dma_buf_get(fd);
+ if (IS_ERR_OR_NULL(dma_buf))
+ goto no_buf;
+
+ dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev);
+ if (!dma_attachment)
+ goto no_attachment;
+
+ *va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT;
+ if (!*va_pages)
+ goto bad_size;
+
+ if (*va_pages > (U64_MAX / PAGE_SIZE))
+ /* 64-bit address range is the max */
+ goto bad_size;
+
+ /* ignore SAME_VA */
+ *flags &= ~BASE_MEM_SAME_VA;
+
+#ifdef CONFIG_64BIT
+ if (!kctx->is_compat) {
+ /* 64-bit tasks must MMAP anyway, but not expose this address to clients */
+ *flags |= BASE_MEM_NEED_MMAP;
+ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+ } else {
+#else
+ if (1) {
+#endif
+ reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+ }
+
+ if (!reg)
+ goto no_region;
+
+ reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM);
+ if (IS_ERR_OR_NULL(reg->gpu_alloc))
+ goto no_alloc_obj;
+
+ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+ /* No pages to map yet */
+ reg->gpu_alloc->nents = 0;
+
+ reg->flags &= ~KBASE_REG_FREE;
+ reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */
+ reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */
+ reg->flags |= KBASE_REG_GPU_CACHED;
+
+ if (*flags & BASE_MEM_PROT_CPU_WR)
+ reg->flags |= KBASE_REG_CPU_WR;
+
+ if (*flags & BASE_MEM_PROT_CPU_RD)
+ reg->flags |= KBASE_REG_CPU_RD;
+
+ if (*flags & BASE_MEM_PROT_GPU_WR)
+ reg->flags |= KBASE_REG_GPU_WR;
+
+ if (*flags & BASE_MEM_PROT_GPU_RD)
+ reg->flags |= KBASE_REG_GPU_RD;
+
+ if (*flags & BASE_MEM_SECURE)
+ reg->flags |= KBASE_REG_SECURE;
+
+ /* no read or write permission given on import, only on run do we give the right permissions */
+
+ reg->gpu_alloc->type = BASE_TMEM_IMPORT_TYPE_UMM;
+ reg->gpu_alloc->imported.umm.sgt = NULL;
+ reg->gpu_alloc->imported.umm.dma_buf = dma_buf;
+ reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
+ reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
+ reg->extent = 0;
+
+ return reg;
+
+no_alloc_obj:
+ kfree(reg);
+no_region:
+bad_size:
+ dma_buf_detach(dma_buf, dma_attachment);
+no_attachment:
+ dma_buf_put(dma_buf);
+no_buf:
+ return NULL;
+}
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
+ u64 nents, struct base_mem_aliasing_info *ai,
+ u64 *num_pages)
+{
+ struct kbase_va_region *reg;
+ u64 gpu_va;
+ size_t i;
+ bool coherent;
+
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(flags);
+ KBASE_DEBUG_ASSERT(ai);
+ KBASE_DEBUG_ASSERT(num_pages);
+
+ /* mask to only allowed flags */
+ *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
+ BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL |
+ BASE_MEM_COHERENT_SYSTEM_REQUIRED);
+
+ if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
+ dev_warn(kctx->kbdev->dev,
+ "kbase_mem_alias called with bad flags (%llx)",
+ (unsigned long long)*flags);
+ goto bad_flags;
+ }
+ coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 ||
+ (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0;
+
+ if (!stride)
+ goto bad_stride;
+
+ if (!nents)
+ goto bad_nents;
+
+ if ((nents * stride) > (U64_MAX / PAGE_SIZE))
+ /* 64-bit address range is the max */
+ goto bad_size;
+
+ /* calculate the number of pages this alias will cover */
+ *num_pages = nents * stride;
+
+#ifdef CONFIG_64BIT
+ if (!kctx->is_compat) {
+ /* 64-bit tasks must MMAP anyway, but not expose this address to
+ * clients */
+ *flags |= BASE_MEM_NEED_MMAP;
+ reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+ KBASE_REG_ZONE_SAME_VA);
+ } else {
+#else
+ if (1) {
+#endif
+ reg = kbase_alloc_free_region(kctx, 0, *num_pages,
+ KBASE_REG_ZONE_CUSTOM_VA);
+ }
+
+ if (!reg)
+ goto no_reg;
+
+ /* zero-sized page array, as we don't need one/can support one */
+ reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS);
+ if (IS_ERR_OR_NULL(reg->gpu_alloc))
+ goto no_alloc_obj;
+
+ reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+
+ kbase_update_region_flags(reg, *flags);
+
+ reg->gpu_alloc->imported.alias.nents = nents;
+ reg->gpu_alloc->imported.alias.stride = stride;
+ reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents);
+ if (!reg->gpu_alloc->imported.alias.aliased)
+ goto no_aliased_array;
+
+ kbase_gpu_vm_lock(kctx);
+
+ /* validate and add src handles */
+ for (i = 0; i < nents; i++) {
+ if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) {
+ if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE)
+ goto bad_handle; /* unsupported magic handle */
+ if (!ai[i].length)
+ goto bad_handle; /* must be > 0 */
+ if (ai[i].length > stride)
+ goto bad_handle; /* can't be larger than the
+ stride */
+ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+ } else {
+ struct kbase_va_region *aliasing_reg;
+ struct kbase_mem_phy_alloc *alloc;
+
+ aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT);
+
+ /* validate found region */
+ if (!aliasing_reg)
+ goto bad_handle; /* Not found */
+ if (aliasing_reg->flags & KBASE_REG_FREE)
+ goto bad_handle; /* Free region */
+ if (!aliasing_reg->gpu_alloc)
+ goto bad_handle; /* No alloc */
+ if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE)
+ goto bad_handle; /* Not a native alloc */
+ if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0))
+ goto bad_handle;
+ /* Non-coherent memory cannot alias
+ coherent memory, and vice versa.*/
+
+ /* check size against stride */
+ if (!ai[i].length)
+ goto bad_handle; /* must be > 0 */
+ if (ai[i].length > stride)
+ goto bad_handle; /* can't be larger than the
+ stride */
+
+ alloc = aliasing_reg->gpu_alloc;
+
+ /* check against the alloc's size */
+ if (ai[i].offset > alloc->nents)
+ goto bad_handle; /* beyond end */
+ if (ai[i].offset + ai[i].length > alloc->nents)
+ goto bad_handle; /* beyond end */
+
+ reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc);
+ reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length;
+ reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset;
+ }
+ }
+
+#ifdef CONFIG_64BIT
+ if (!kctx->is_compat) {
+ /* Bind to a cookie */
+ if (!kctx->cookies) {
+ dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
+ goto no_cookie;
+ }
+ /* return a cookie */
+ gpu_va = __ffs(kctx->cookies);
+ kctx->cookies &= ~(1UL << gpu_va);
+ BUG_ON(kctx->pending_regions[gpu_va]);
+ kctx->pending_regions[gpu_va] = reg;
+
+ /* relocate to correct base */
+ gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+ gpu_va <<= PAGE_SHIFT;
+ } else /* we control the VA */ {
+#else
+ if (1) {
+#endif
+ if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) {
+ dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
+ goto no_mmap;
+ }
+ /* return real GPU VA */
+ gpu_va = reg->start_pfn << PAGE_SHIFT;
+ }
+
+ reg->flags &= ~KBASE_REG_FREE;
+ reg->flags &= ~KBASE_REG_GROWABLE;
+
+ kbase_gpu_vm_unlock(kctx);
+
+ return gpu_va;
+
+#ifdef CONFIG_64BIT
+no_cookie:
+#endif
+no_mmap:
+bad_handle:
+ kbase_gpu_vm_unlock(kctx);
+no_aliased_array:
+ kbase_mem_phy_alloc_put(reg->cpu_alloc);
+ kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc_obj:
+ kfree(reg);
+no_reg:
+bad_size:
+bad_nents:
+bad_stride:
+bad_flags:
+ return 0;
+}
+
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags)
+{
+ struct kbase_va_region *reg;
+
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(gpu_va);
+ KBASE_DEBUG_ASSERT(va_pages);
+ KBASE_DEBUG_ASSERT(flags);
+
+#ifdef CONFIG_64BIT
+ if (!kctx->is_compat)
+ *flags |= BASE_MEM_SAME_VA;
+#endif
+
+ if (!kbase_check_import_flags(*flags)) {
+ dev_warn(kctx->kbdev->dev,
+ "kbase_mem_import called with bad flags (%llx)",
+ (unsigned long long)*flags);
+ goto bad_flags;
+ }
+
+ switch (type) {
+#ifdef CONFIG_UMP
+ case BASE_MEM_IMPORT_TYPE_UMP:
+ reg = kbase_mem_from_ump(kctx, (ump_secure_id)handle, va_pages, flags);
+ break;
+#endif /* CONFIG_UMP */
+#ifdef CONFIG_DMA_SHARED_BUFFER
+ case BASE_MEM_IMPORT_TYPE_UMM:
+ reg = kbase_mem_from_umm(kctx, handle, va_pages, flags);
+ break;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+ default:
+ reg = NULL;
+ break;
+ }
+
+ if (!reg)
+ goto no_reg;
+
+ kbase_gpu_vm_lock(kctx);
+
+ /* mmap needed to setup VA? */
+ if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) {
+ /* Bind to a cookie */
+ if (!kctx->cookies)
+ goto no_cookie;
+ /* return a cookie */
+ *gpu_va = __ffs(kctx->cookies);
+ kctx->cookies &= ~(1UL << *gpu_va);
+ BUG_ON(kctx->pending_regions[*gpu_va]);
+ kctx->pending_regions[*gpu_va] = reg;
+
+ /* relocate to correct base */
+ *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
+ *gpu_va <<= PAGE_SHIFT;
+
+ } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) {
+ /* we control the VA, mmap now to the GPU */
+ if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0)
+ goto no_gpu_va;
+ /* return real GPU VA */
+ *gpu_va = reg->start_pfn << PAGE_SHIFT;
+ } else {
+ /* we control the VA, but nothing to mmap yet */
+ if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0)
+ goto no_gpu_va;
+ /* return real GPU VA */
+ *gpu_va = reg->start_pfn << PAGE_SHIFT;
+ }
+
+ /* clear out private flags */
+ *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1);
+
+ kbase_gpu_vm_unlock(kctx);
+
+ return 0;
+
+no_gpu_va:
+no_cookie:
+ kbase_gpu_vm_unlock(kctx);
+ kbase_mem_phy_alloc_put(reg->cpu_alloc);
+ kbase_mem_phy_alloc_put(reg->gpu_alloc);
+ kfree(reg);
+no_reg:
+bad_flags:
+ *gpu_va = 0;
+ *va_pages = 0;
+ *flags = 0;
+ return -ENOMEM;
+}
+
+
+static int zap_range_nolock(struct mm_struct *mm,
+ const struct vm_operations_struct *vm_ops,
+ unsigned long start, unsigned long end)
+{
+ struct vm_area_struct *vma;
+ int err = -EINVAL; /* in case end < start */
+
+ while (start < end) {
+ unsigned long local_end;
+
+ vma = find_vma_intersection(mm, start, end);
+ if (!vma)
+ break;
+
+ /* is it ours? */
+ if (vma->vm_ops != vm_ops)
+ goto try_next;
+
+ local_end = vma->vm_end;
+
+ if (end < local_end)
+ local_end = end;
+
+ err = zap_vma_ptes(vma, start, local_end - start);
+ if (unlikely(err))
+ break;
+
+try_next:
+ /* go to next vma, if any */
+ start = vma->vm_end;
+ }
+
+ return err;
+}
+
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason)
+{
+ u64 old_pages;
+ u64 delta;
+ int res = -EINVAL;
+ struct kbase_va_region *reg;
+ phys_addr_t *phy_pages;
+
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(failure_reason);
+ KBASE_DEBUG_ASSERT(gpu_addr != 0);
+
+ down_read(&current->mm->mmap_sem);
+ kbase_gpu_vm_lock(kctx);
+
+ /* Validate the region */
+ reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
+ if (!reg || (reg->flags & KBASE_REG_FREE)) {
+ *failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+ goto out_unlock;
+ }
+
+ KBASE_DEBUG_ASSERT(reg->cpu_alloc);
+ KBASE_DEBUG_ASSERT(reg->gpu_alloc);
+
+ if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
+ *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+ goto out_unlock;
+ }
+
+ if (0 == (reg->flags & KBASE_REG_GROWABLE)) {
+ *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+ goto out_unlock;
+ }
+
+ if (new_pages > reg->nr_pages) {
+ /* Would overflow the VA region */
+ *failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS;
+ goto out_unlock;
+ }
+
+ /* can't be mapped more than once on the GPU */
+ if (atomic_read(&reg->gpu_alloc->gpu_mappings) > 1) {
+ *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE;
+ goto out_unlock;
+ }
+
+ if (new_pages == reg->gpu_alloc->nents) {
+ /* no change */
+ res = 0;
+ goto out_unlock;
+ }
+
+ phy_pages = kbase_get_gpu_phy_pages(reg);
+ old_pages = kbase_reg_current_backed_size(reg);
+
+ if (new_pages > old_pages) {
+ /* growing */
+ int err;
+
+ delta = new_pages - old_pages;
+ /* Allocate some more pages */
+ if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) {
+ *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+ goto out_unlock;
+ }
+ if (reg->cpu_alloc != reg->gpu_alloc) {
+ if (kbase_alloc_phy_pages_helper(
+ reg->gpu_alloc, delta) != 0) {
+ *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+ kbase_free_phy_pages_helper(reg->cpu_alloc,
+ delta);
+ goto out_unlock;
+ }
+ }
+ err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages,
+ phy_pages + old_pages, delta, reg->flags);
+ if (err) {
+ kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+ if (reg->cpu_alloc != reg->gpu_alloc)
+ kbase_free_phy_pages_helper(reg->gpu_alloc,
+ delta);
+ *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+ goto out_unlock;
+ }
+ } else {
+ /* shrinking */
+ struct kbase_cpu_mapping *mapping;
+ int err;
+
+ /* first, unmap from any mappings affected */
+ list_for_each_entry(mapping, &reg->cpu_alloc->mappings, mappings_list) {
+ unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT;
+
+ /* is this mapping affected ?*/
+ if ((mapping->page_off + mapping_size) > new_pages) {
+ unsigned long first_bad = 0;
+ int zap_res;
+
+ if (new_pages > mapping->page_off)
+ first_bad = new_pages - mapping->page_off;
+
+ zap_res = zap_range_nolock(current->mm,
+ &kbase_vm_ops,
+ mapping->vm_start +
+ (first_bad << PAGE_SHIFT),
+ mapping->vm_end);
+ WARN(zap_res,
+ "Failed to zap VA range (0x%lx - 0x%lx);\n",
+ mapping->vm_start +
+ (first_bad << PAGE_SHIFT),
+ mapping->vm_end
+ );
+ }
+ }
+
+ /* Free some pages */
+ delta = old_pages - new_pages;
+ err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages,
+ delta);
+ if (err) {
+ *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM;
+ goto out_unlock;
+ }
+#ifndef CONFIG_MALI_NO_MALI
+ if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) {
+ /* Wait for GPU to flush write buffer before freeing physical pages */
+ kbase_wait_write_flush(kctx);
+ }
+#endif
+ kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
+ if (reg->cpu_alloc != reg->gpu_alloc)
+ kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+ }
+
+ res = 0;
+
+out_unlock:
+ kbase_gpu_vm_unlock(kctx);
+ up_read(&current->mm->mmap_sem);
+
+ return res;
+}
+
+static void kbase_cpu_vm_open(struct vm_area_struct *vma)
+{
+ struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+ KBASE_DEBUG_ASSERT(map);
+ KBASE_DEBUG_ASSERT(map->count > 0);
+ /* non-atomic as we're under Linux' mm lock */
+ map->count++;
+}
+
+static void kbase_cpu_vm_close(struct vm_area_struct *vma)
+{
+ struct kbase_cpu_mapping *map = vma->vm_private_data;
+
+ KBASE_DEBUG_ASSERT(map);
+ KBASE_DEBUG_ASSERT(map->count > 0);
+
+ /* non-atomic as we're under Linux' mm lock */
+ if (--map->count)
+ return;
+
+ KBASE_DEBUG_ASSERT(map->kctx);
+ KBASE_DEBUG_ASSERT(map->alloc);
+
+ kbase_gpu_vm_lock(map->kctx);
+
+ if (map->region) {
+ KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
+ KBASE_REG_ZONE_SAME_VA);
+ /* Avoid freeing memory on the process death which results in
+ * GPU Page Fault. Memory will be freed in kbase_destroy_context
+ */
+ if (!(current->flags & PF_EXITING))
+ kbase_mem_free_region(map->kctx, map->region);
+ }
+
+ list_del(&map->mappings_list);
+
+ kbase_gpu_vm_unlock(map->kctx);
+
+ kbase_mem_phy_alloc_put(map->alloc);
+ kfree(map);
+}
+
+KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
+
+
+static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct kbase_cpu_mapping *map = vma->vm_private_data;
+ pgoff_t rel_pgoff;
+ size_t i;
+
+ KBASE_DEBUG_ASSERT(map);
+ KBASE_DEBUG_ASSERT(map->count > 0);
+ KBASE_DEBUG_ASSERT(map->kctx);
+ KBASE_DEBUG_ASSERT(map->alloc);
+
+ /* we don't use vmf->pgoff as it's affected by our mmap with
+ * offset being a GPU VA or a cookie */
+ rel_pgoff = ((unsigned long)vmf->virtual_address - map->vm_start)
+ >> PAGE_SHIFT;
+
+ kbase_gpu_vm_lock(map->kctx);
+ if (map->page_off + rel_pgoff >= map->alloc->nents)
+ goto locked_bad_fault;
+
+ /* insert all valid pages from the fault location */
+ for (i = rel_pgoff;
+ i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
+ map->alloc->nents - map->page_off); i++) {
+ int ret = vm_insert_pfn(vma, map->vm_start + (i << PAGE_SHIFT),
+ PFN_DOWN(map->alloc->pages[map->page_off + i]));
+ if (ret < 0 && ret != -EBUSY)
+ goto locked_bad_fault;
+ }
+
+ kbase_gpu_vm_unlock(map->kctx);
+ /* we resolved it, nothing for VM to do */
+ return VM_FAULT_NOPAGE;
+
+locked_bad_fault:
+ kbase_gpu_vm_unlock(map->kctx);
+ send_sig(SIGSEGV, current, 1);
+ return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct kbase_vm_ops = {
+ .open = kbase_cpu_vm_open,
+ .close = kbase_cpu_vm_close,
+ .fault = kbase_cpu_vm_fault
+};
+
+static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close)
+{
+ struct kbase_cpu_mapping *map;
+ u64 start_off = vma->vm_pgoff - reg->start_pfn;
+ phys_addr_t *page_array;
+ int err = 0;
+ int i;
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+
+ if (!map) {
+ WARN_ON(1);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * VM_DONTCOPY - don't make this mapping available in fork'ed processes
+ * VM_DONTEXPAND - disable mremap on this region
+ * VM_IO - disables paging
+ * VM_DONTDUMP - Don't include in core dumps (3.7 only)
+ * VM_MIXEDMAP - Support mixing struct page*s and raw pfns.
+ * This is needed to support using the dedicated and
+ * the OS based memory backends together.
+ */
+ /*
+ * This will need updating to propagate coherency flags
+ * See MIDBASE-1057
+ */
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+#else
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+ vma->vm_ops = &kbase_vm_ops;
+ vma->vm_private_data = map;
+
+ page_array = kbase_get_cpu_phy_pages(reg);
+
+ if (!(reg->flags & KBASE_REG_CPU_CACHED) &&
+ (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) {
+ /* We can't map vmalloc'd memory uncached.
+ * Other memory will have been returned from
+ * kbase_mem_allocator_alloc which would be
+ * suitable for mapping uncached.
+ */
+ BUG_ON(kaddr);
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ }
+
+ if (!kaddr) {
+ vma->vm_flags |= VM_PFNMAP;
+ for (i = 0; i < nr_pages; i++) {
+ err = vm_insert_pfn(vma, vma->vm_start + (i << PAGE_SHIFT), page_array[i + start_off] >> PAGE_SHIFT);
+ if (WARN_ON(err))
+ break;
+ }
+ } else {
+ /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
+ vma->vm_flags |= VM_MIXEDMAP;
+ /* vmalloc remaping is easy... */
+ err = remap_vmalloc_range(vma, kaddr, 0);
+ WARN_ON(err);
+ }
+
+ if (err) {
+ kfree(map);
+ goto out;
+ }
+
+ map->page_off = start_off;
+ map->region = free_on_close ? reg : NULL;
+ map->kctx = reg->kctx;
+ map->vm_start = vma->vm_start + aligned_offset;
+ if (aligned_offset) {
+ KBASE_DEBUG_ASSERT(!start_off);
+ map->vm_end = map->vm_start + (reg->nr_pages << PAGE_SHIFT);
+ } else {
+ map->vm_end = vma->vm_end;
+ }
+ map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+ map->count = 1; /* start with one ref */
+
+ if (reg->flags & KBASE_REG_CPU_CACHED)
+ map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
+
+ list_add(&map->mappings_list, &map->alloc->mappings);
+
+ out:
+ return err;
+}
+
+static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr)
+{
+ struct kbase_va_region *new_reg;
+ u32 nr_pages;
+ size_t size;
+ int err = 0;
+ u32 *tb;
+ int owns_tb = 1;
+
+ dev_dbg(kctx->kbdev->dev, "in %s\n", __func__);
+ size = (vma->vm_end - vma->vm_start);
+ nr_pages = size >> PAGE_SHIFT;
+
+ if (!kctx->jctx.tb) {
+ KBASE_DEBUG_ASSERT(0 != size);
+ tb = vmalloc_user(size);
+
+ if (NULL == tb) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ kbase_device_trace_buffer_install(kctx, tb, size);
+ } else {
+ err = -EINVAL;
+ goto out;
+ }
+
+ *kaddr = kctx->jctx.tb;
+
+ new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+ if (!new_reg) {
+ err = -ENOMEM;
+ WARN_ON(1);
+ goto out_no_region;
+ }
+
+ new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB);
+ if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+ err = -ENOMEM;
+ new_reg->cpu_alloc = NULL;
+ WARN_ON(1);
+ goto out_no_alloc;
+ }
+
+ new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+ new_reg->cpu_alloc->imported.kctx = kctx;
+ new_reg->flags &= ~KBASE_REG_FREE;
+ new_reg->flags |= KBASE_REG_CPU_CACHED;
+
+ /* alloc now owns the tb */
+ owns_tb = 0;
+
+ if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+ err = -ENOMEM;
+ WARN_ON(1);
+ goto out_no_va_region;
+ }
+
+ *reg = new_reg;
+
+ /* map read only, noexec */
+ vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+ /* the rest of the flags is added by the cpu_mmap handler */
+
+ dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
+ return 0;
+
+out_no_va_region:
+out_no_alloc:
+ kbase_free_alloced_region(new_reg);
+out_no_region:
+ if (owns_tb) {
+ kbase_device_trace_buffer_uninstall(kctx);
+ vfree(tb);
+ }
+out:
+ return err;
+}
+
+static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr)
+{
+ struct kbase_va_region *new_reg;
+ void *kaddr;
+ u32 nr_pages;
+ size_t size;
+ int err = 0;
+
+ dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+ size = (vma->vm_end - vma->vm_start);
+ nr_pages = size >> PAGE_SHIFT;
+
+ kaddr = kbase_mmu_dump(kctx, nr_pages);
+
+ if (!kaddr) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA);
+ if (!new_reg) {
+ err = -ENOMEM;
+ WARN_ON(1);
+ goto out;
+ }
+
+ new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW);
+ if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) {
+ err = -ENOMEM;
+ new_reg->cpu_alloc = NULL;
+ WARN_ON(1);
+ goto out_no_alloc;
+ }
+
+ new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc);
+
+ new_reg->flags &= ~KBASE_REG_FREE;
+ new_reg->flags |= KBASE_REG_CPU_CACHED;
+ if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) {
+ err = -ENOMEM;
+ WARN_ON(1);
+ goto out_va_region;
+ }
+
+ *kmap_addr = kaddr;
+ *reg = new_reg;
+
+ dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+ return 0;
+
+out_no_alloc:
+out_va_region:
+ kbase_free_alloced_region(new_reg);
+out:
+ return err;
+}
+
+
+void kbase_os_mem_map_lock(struct kbase_context *kctx)
+{
+ struct mm_struct *mm = current->mm;
+ (void)kctx;
+ down_read(&mm->mmap_sem);
+}
+
+void kbase_os_mem_map_unlock(struct kbase_context *kctx)
+{
+ struct mm_struct *mm = current->mm;
+ (void)kctx;
+ up_read(&mm->mmap_sem);
+}
+
+int kbase_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct kbase_context *kctx = file->private_data;
+ struct kbase_va_region *reg;
+ void *kaddr = NULL;
+ size_t nr_pages;
+ int err = 0;
+ int free_on_close = 0;
+ struct device *dev = kctx->kbdev->dev;
+ size_t aligned_offset = 0;
+
+ dev_dbg(dev, "kbase_mmap\n");
+ nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+ /* strip away corresponding VM_MAY% flags to the VM_% flags requested */
+ vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4);
+
+ if (0 == nr_pages) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!(vma->vm_flags & VM_SHARED)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ kbase_gpu_vm_lock(kctx);
+
+ if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) {
+ /* The non-mapped tracking helper page */
+ err = kbase_tracking_page_setup(kctx, vma);
+ goto out_unlock;
+ }
+
+ /* if not the MTP, verify that the MTP has been mapped */
+ rcu_read_lock();
+ /* catches both when the special page isn't present or
+ * when we've forked */
+ if (rcu_dereference(kctx->process_mm) != current->mm) {
+ err = -EINVAL;
+ rcu_read_unlock();
+ goto out_unlock;
+ }
+ rcu_read_unlock();
+
+ switch (vma->vm_pgoff) {
+ case PFN_DOWN(BASE_MEM_INVALID_HANDLE):
+ case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE):
+ /* Illegal handle for direct map */
+ err = -EINVAL;
+ goto out_unlock;
+ case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE):
+ err = kbase_trace_buffer_mmap(kctx, vma, &reg, &kaddr);
+ if (0 != err)
+ goto out_unlock;
+ dev_dbg(dev, "kbase_trace_buffer_mmap ok\n");
+ /* free the region on munmap */
+ free_on_close = 1;
+ goto map;
+ case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE):
+ /* MMU dump */
+ err = kbase_mmu_dump_mmap(kctx, vma, &reg, &kaddr);
+ if (0 != err)
+ goto out_unlock;
+ /* free the region on munmap */
+ free_on_close = 1;
+ goto map;
+ case PFN_DOWN(BASE_MEM_COOKIE_BASE) ...
+ PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: {
+ /* SAME_VA stuff, fetch the right region */
+ int gpu_pc_bits;
+ int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+
+ gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
+ reg = kctx->pending_regions[cookie];
+ if (NULL != reg) {
+ if (reg->flags & KBASE_REG_ALIGNED) {
+ /* nr_pages must be able to hold alignment pages
+ * plus actual pages */
+ if (nr_pages != ((1UL << gpu_pc_bits >>
+ PAGE_SHIFT) +
+ reg->nr_pages)) {
+ /* incorrect mmap size */
+ /* leave the cookie for a potential
+ * later mapping, or to be reclaimed
+ * later when the context is freed */
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ aligned_offset = (vma->vm_start +
+ (1UL << gpu_pc_bits) - 1) &
+ ~((1UL << gpu_pc_bits) - 1);
+ aligned_offset -= vma->vm_start;
+ } else if (reg->nr_pages != nr_pages) {
+ /* incorrect mmap size */
+ /* leave the cookie for a potential later
+ * mapping, or to be reclaimed later when the
+ * context is freed */
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ if ((vma->vm_flags & VM_READ &&
+ !(reg->flags & KBASE_REG_CPU_RD)) ||
+ (vma->vm_flags & VM_WRITE &&
+ !(reg->flags & KBASE_REG_CPU_WR))) {
+ /* VM flags inconsistent with region flags */
+ err = -EPERM;
+ dev_err(dev, "%s:%d inconsistent VM flags\n",
+ __FILE__, __LINE__);
+ goto out_unlock;
+ }
+
+ /* adjust down nr_pages to what we have physically */
+ nr_pages = kbase_reg_current_backed_size(reg);
+
+ if (kbase_gpu_mmap(kctx, reg,
+ vma->vm_start + aligned_offset,
+ reg->nr_pages, 1) != 0) {
+ dev_err(dev, "%s:%d\n", __FILE__, __LINE__);
+ /* Unable to map in GPU space. */
+ WARN_ON(1);
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ /* no need for the cookie anymore */
+ kctx->pending_regions[cookie] = NULL;
+ kctx->cookies |= (1UL << cookie);
+
+ /*
+ * Overwrite the offset with the
+ * region start_pfn, so we effectively
+ * map from offset 0 in the region.
+ */
+ vma->vm_pgoff = reg->start_pfn;
+
+ /* free the region on munmap */
+ free_on_close = 1;
+ goto map;
+ }
+
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+ default: {
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT);
+
+ if (reg && !(reg->flags & KBASE_REG_FREE)) {
+ /* will this mapping overflow the size of the region? */
+ if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn)))
+ goto overflow;
+
+ if ((vma->vm_flags & VM_READ &&
+ !(reg->flags & KBASE_REG_CPU_RD)) ||
+ (vma->vm_flags & VM_WRITE &&
+ !(reg->flags & KBASE_REG_CPU_WR))) {
+ /* VM flags inconsistent with region flags */
+ err = -EPERM;
+ dev_err(dev, "%s:%d inconsistent VM flags\n",
+ __FILE__, __LINE__);
+ goto out_unlock;
+ }
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+ if (reg->cpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM)
+ goto dma_map;
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+
+ /* limit what we map to the amount currently backed */
+ if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) {
+ if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
+ nr_pages = 0;
+ else
+ nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn);
+ }
+
+ goto map;
+ }
+
+overflow:
+ err = -ENOMEM;
+ goto out_unlock;
+ } /* default */
+ } /* switch */
+map:
+ err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close);
+
+ if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) {
+ /* MMU dump - userspace should now have a reference on
+ * the pages, so we can now free the kernel mapping */
+ vfree(kaddr);
+ }
+ goto out_unlock;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+dma_map:
+ err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn);
+#endif /* CONFIG_DMA_SHARED_BUFFER */
+out_unlock:
+ kbase_gpu_vm_unlock(kctx);
+out:
+ if (err)
+ dev_err(dev, "mmap failed %d\n", err);
+
+ return err;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmap);
+
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+ struct kbase_vmap_struct *map)
+{
+ struct kbase_va_region *reg;
+ unsigned long page_index;
+ unsigned int offset = gpu_addr & ~PAGE_MASK;
+ size_t page_count = PFN_UP(offset + size);
+ phys_addr_t *page_array;
+ struct page **pages;
+ void *cpu_addr = NULL;
+ pgprot_t prot;
+ size_t i;
+ bool sync_needed;
+
+ if (!size || !map)
+ return NULL;
+
+ /* check if page_count calculation will wrap */
+ if (size > ((size_t)-1 / PAGE_SIZE))
+ return NULL;
+
+ kbase_gpu_vm_lock(kctx);
+
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
+ if (!reg || (reg->flags & KBASE_REG_FREE))
+ goto out_unlock;
+
+ page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn;
+
+ /* check if page_index + page_count will wrap */
+ if (-1UL - page_count < page_index)
+ goto out_unlock;
+
+ if (page_index + page_count > kbase_reg_current_backed_size(reg))
+ goto out_unlock;
+
+ page_array = kbase_get_cpu_phy_pages(reg);
+ if (!page_array)
+ goto out_unlock;
+
+ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ goto out_unlock;
+
+ for (i = 0; i < page_count; i++)
+ pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i]));
+
+ prot = PAGE_KERNEL;
+ if (!(reg->flags & KBASE_REG_CPU_CACHED)) {
+ /* Map uncached */
+ prot = pgprot_writecombine(prot);
+ }
+
+ cpu_addr = vmap(pages, page_count, VM_MAP, prot);
+
+ kfree(pages);
+
+ if (!cpu_addr)
+ goto out_unlock;
+
+ map->gpu_addr = gpu_addr;
+ map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+ map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index];
+ map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc);
+ map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index];
+ map->addr = (void *)((uintptr_t)cpu_addr + offset);
+ map->size = size;
+ map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0;
+ sync_needed = map->is_cached;
+
+ if (sync_needed) {
+ /* Sync first page */
+ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+ phys_addr_t cpu_pa = map->cpu_pages[0];
+ phys_addr_t gpu_pa = map->gpu_pages[0];
+
+ kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+ KBASE_SYNC_TO_CPU);
+
+ /* Sync middle pages (if any) */
+ for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+ cpu_pa = map->cpu_pages[i];
+ gpu_pa = map->gpu_pages[i];
+ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+ KBASE_SYNC_TO_CPU);
+ }
+
+ /* Sync last page (if any) */
+ if (page_count > 1) {
+ cpu_pa = map->cpu_pages[page_count - 1];
+ gpu_pa = map->gpu_pages[page_count - 1];
+ sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+ KBASE_SYNC_TO_CPU);
+ }
+ }
+ kbase_gpu_vm_unlock(kctx);
+
+ return map->addr;
+
+out_unlock:
+ kbase_gpu_vm_unlock(kctx);
+ return NULL;
+}
+
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map)
+{
+ void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+ bool sync_needed = map->is_cached;
+ vunmap(addr);
+ if (sync_needed) {
+ off_t offset = (uintptr_t)map->addr & ~PAGE_MASK;
+ size_t size = map->size;
+ size_t page_count = PFN_UP(offset + size);
+ size_t i;
+
+ /* Sync first page */
+ size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
+ phys_addr_t cpu_pa = map->cpu_pages[0];
+ phys_addr_t gpu_pa = map->gpu_pages[0];
+
+ kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz,
+ KBASE_SYNC_TO_DEVICE);
+
+ /* Sync middle pages (if any) */
+ for (i = 1; page_count > 2 && i < page_count - 1; i++) {
+ cpu_pa = map->cpu_pages[i];
+ gpu_pa = map->gpu_pages[i];
+ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE,
+ KBASE_SYNC_TO_DEVICE);
+ }
+
+ /* Sync last page (if any) */
+ if (page_count > 1) {
+ cpu_pa = map->cpu_pages[page_count - 1];
+ gpu_pa = map->gpu_pages[page_count - 1];
+ sz = ((offset + size - 1) & ~PAGE_MASK) + 1;
+ kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz,
+ KBASE_SYNC_TO_DEVICE);
+ }
+ }
+ map->gpu_addr = 0;
+ map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc);
+ map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc);
+ map->cpu_pages = NULL;
+ map->gpu_pages = NULL;
+ map->addr = NULL;
+ map->size = 0;
+ map->is_cached = false;
+}
+
+void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
+{
+ struct mm_struct *mm;
+
+ rcu_read_lock();
+ mm = rcu_dereference(kctx->process_mm);
+ if (mm) {
+ atomic_add(pages, &kctx->nonmapped_pages);
+#ifdef SPLIT_RSS_COUNTING
+ add_mm_counter(mm, MM_FILEPAGES, pages);
+#else
+ spin_lock(&mm->page_table_lock);
+ add_mm_counter(mm, MM_FILEPAGES, pages);
+ spin_unlock(&mm->page_table_lock);
+#endif
+ }
+ rcu_read_unlock();
+}
+
+static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
+{
+ int pages;
+ struct mm_struct *mm;
+
+ spin_lock(&kctx->mm_update_lock);
+ mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
+ if (!mm) {
+ spin_unlock(&kctx->mm_update_lock);
+ return;
+ }
+
+ rcu_assign_pointer(kctx->process_mm, NULL);
+ spin_unlock(&kctx->mm_update_lock);
+ synchronize_rcu();
+
+ pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+#ifdef SPLIT_RSS_COUNTING
+ add_mm_counter(mm, MM_FILEPAGES, -pages);
+#else
+ spin_lock(&mm->page_table_lock);
+ add_mm_counter(mm, MM_FILEPAGES, -pages);
+ spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+ struct kbase_context *kctx;
+
+ kctx = vma->vm_private_data;
+ kbasep_os_process_page_usage_drain(kctx);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+ .close = kbase_special_vm_close,
+};
+
+static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
+{
+ /* check that this is the only tracking page */
+ spin_lock(&kctx->mm_update_lock);
+ if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
+ spin_unlock(&kctx->mm_update_lock);
+ return -EFAULT;
+ }
+
+ rcu_assign_pointer(kctx->process_mm, current->mm);
+
+ spin_unlock(&kctx->mm_update_lock);
+
+ /* no real access */
+ vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+#else
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
+#endif
+ vma->vm_ops = &kbase_vm_special_ops;
+ vma->vm_private_data = kctx;
+
+ return 0;
+}
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle)
+{
+ int i;
+ int res;
+ void *va;
+ dma_addr_t dma_pa;
+ struct kbase_va_region *reg;
+ phys_addr_t *page_array;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+ DEFINE_DMA_ATTRS(attrs);
+#endif
+
+ u32 pages = ((size - 1) >> PAGE_SHIFT) + 1;
+ u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR |
+ BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(0 != size);
+ KBASE_DEBUG_ASSERT(0 != pages);
+
+ if (size == 0)
+ goto err;
+
+ /* All the alloc calls return zeroed memory */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+ dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+ va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs);
+#else
+ va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL);
+#endif
+ if (!va)
+ goto err;
+
+ /* Store the state so we can free it later. */
+ handle->cpu_va = va;
+ handle->dma_pa = dma_pa;
+ handle->size = size;
+
+
+ reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA);
+ if (!reg)
+ goto no_reg;
+
+ reg->flags &= ~KBASE_REG_FREE;
+ kbase_update_region_flags(reg, flags);
+
+ reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW);
+ if (IS_ERR_OR_NULL(reg->cpu_alloc))
+ goto no_alloc;
+
+ reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc);
+
+ page_array = kbase_get_cpu_phy_pages(reg);
+
+ for (i = 0; i < pages; i++)
+ page_array[i] = dma_pa + (i << PAGE_SHIFT);
+
+ reg->cpu_alloc->nents = pages;
+
+ kbase_gpu_vm_lock(kctx);
+ res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1);
+ kbase_gpu_vm_unlock(kctx);
+ if (res)
+ goto no_mmap;
+
+ return va;
+
+no_mmap:
+ kbase_mem_phy_alloc_put(reg->cpu_alloc);
+ kbase_mem_phy_alloc_put(reg->gpu_alloc);
+no_alloc:
+ kfree(reg);
+no_reg:
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+ dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs);
+#else
+ dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa);
+#endif
+err:
+ return NULL;
+}
+KBASE_EXPORT_SYMBOL(kbase_va_alloc);
+
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle)
+{
+ struct kbase_va_region *reg;
+ int err;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+ DEFINE_DMA_ATTRS(attrs);
+#endif
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+ KBASE_DEBUG_ASSERT(handle->cpu_va != NULL);
+
+ kbase_gpu_vm_lock(kctx);
+ reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va);
+ KBASE_DEBUG_ASSERT(reg);
+ err = kbase_gpu_munmap(kctx, reg);
+ kbase_gpu_vm_unlock(kctx);
+ KBASE_DEBUG_ASSERT(!err);
+
+ kbase_mem_phy_alloc_put(reg->cpu_alloc);
+ kbase_mem_phy_alloc_put(reg->gpu_alloc);
+ kfree(reg);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+ dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+ dma_free_attrs(kctx->kbdev->dev, handle->size,
+ handle->cpu_va, handle->dma_pa, &attrs);
+#else
+ dma_free_writecombine(kctx->kbdev->dev, handle->size,
+ handle->cpu_va, handle->dma_pa);
+#endif
+}
+KBASE_EXPORT_SYMBOL(kbase_va_free);
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
new file mode 100644
index 00000000000..1d854152704
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_linux.h
+ * Base kernel memory APIs, Linux implementation.
+ */
+
+#ifndef _KBASE_MEM_LINUX_H_
+#define _KBASE_MEM_LINUX_H_
+
+/** A HWC dump mapping */
+struct kbase_hwc_dma_mapping {
+ void *cpu_va;
+ dma_addr_t dma_pa;
+ size_t size;
+};
+
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment);
+int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages);
+int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags);
+u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages);
+int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask);
+int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason);
+int kbase_mmap(struct file *file, struct vm_area_struct *vma);
+
+struct kbase_vmap_struct {
+ u64 gpu_addr;
+ struct kbase_mem_phy_alloc *cpu_alloc;
+ struct kbase_mem_phy_alloc *gpu_alloc;
+ phys_addr_t *cpu_pages;
+ phys_addr_t *gpu_pages;
+ void *addr;
+ size_t size;
+ bool is_cached;
+};
+void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size,
+ struct kbase_vmap_struct *map);
+void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map);
+
+/** @brief Allocate memory from kernel space and map it onto the GPU
+ *
+ * @param kctx The context used for the allocation/mapping
+ * @param size The size of the allocation in bytes
+ * @param handle An opaque structure used to contain the state needed to free the memory
+ * @return the VA for kernel space and GPU MMU
+ */
+void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle);
+
+/** @brief Free/unmap memory allocated by kbase_va_alloc
+ *
+ * @param kctx The context used for the allocation/mapping
+ * @param handle An opaque structure returned by the kbase_va_alloc function.
+ */
+void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle);
+
+#endif /* _KBASE_MEM_LINUX_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
new file mode 100644
index 00000000000..441180b64d5
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_MEM_LOWLEVEL_H
+#define _KBASE_MEM_LOWLEVEL_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+#include <linux/dma-mapping.h>
+
+/**
+ * @brief Flags for kbase_phy_allocator_pages_alloc
+ */
+#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */
+#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */
+#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */
+
+#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON)
+
+#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */
+
+enum kbase_sync_type {
+ KBASE_SYNC_TO_CPU,
+ KBASE_SYNC_TO_DEVICE
+};
+
+#endif /* _KBASE_LOWLEVEL_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
new file mode 100644
index 00000000000..bf60c192029
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c
@@ -0,0 +1,105 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase_gpu_memory_debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/* mam_profile file name max length 22 based on format <int>_<int>\0 */
+#define KBASEP_DEBUGFS_FNAME_SIZE_MAX (10+1+10+1)
+
+void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+ size_t size)
+{
+ spin_lock(&kctx->mem_profile_lock);
+ kfree(kctx->mem_profile_data);
+ kctx->mem_profile_data = data;
+ kctx->mem_profile_size = size;
+ spin_unlock(&kctx->mem_profile_lock);
+}
+
+/** Show callback for the @c mem_profile debugfs file.
+ *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
+ * @param sfile The debugfs entry
+ * @param data Data associated with the entry
+ *
+ * @return 0 if successfully prints data in debugfs entry file
+ * -1 if it encountered an error
+ */
+static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data)
+{
+ struct kbase_context *kctx = sfile->private;
+
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ spin_lock(&kctx->mem_profile_lock);
+ seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size);
+ seq_putc(sfile, '\n');
+ spin_unlock(&kctx->mem_profile_lock);
+
+ return 0;
+}
+
+/*
+ * File operations related to debugfs entry for mem_profile
+ */
+static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file)
+{
+ return single_open(file, kbasep_mem_profile_seq_show, in->i_private);
+}
+
+static const struct file_operations kbasep_mem_profile_debugfs_fops = {
+ .open = kbasep_mem_profile_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ spin_lock_init(&kctx->mem_profile_lock);
+
+ debugfs_create_file("mem_profile", S_IRUGO, kctx->kctx_dentry, kctx,
+ &kbasep_mem_profile_debugfs_fops);
+}
+
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(kctx != NULL);
+
+ spin_lock(&kctx->mem_profile_lock);
+ kfree(kctx->mem_profile_data);
+ kctx->mem_profile_data = NULL;
+ spin_unlock(&kctx->mem_profile_lock);
+}
+
+#else /* CONFIG_DEBUG_FS */
+
+/**
+ * @brief Stub function for when debugfs is disabled
+ */
+void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+ size_t size)
+{
+ kfree(data);
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
new file mode 100644
index 00000000000..205bd378c8e
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs.h
+ * Header file for mem profiles entries in debugfs
+ *
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H
+#define _KBASE_MEM_PROFILE_DEBUGFS_H
+
+#include <mali_kbase.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+/**
+ * @brief Add new entry to Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx);
+
+/**
+ * @brief Remove entry from Mali memory profile debugfs
+ */
+void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
+
+/**
+ * @brief Insert data to debugfs file, so it can be read by userspce
+ *
+ * Function takes ownership of @c data and frees it later when new data
+ * are inserted.
+ *
+ * @param kctx Context to which file data should be inserted
+ * @param data NULL-terminated string to be inserted to mem_profile file,
+ without trailing new line character
+ * @param size @c buf length
+ */
+void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data,
+ size_t size);
+
+#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
new file mode 100644
index 00000000000..82f0702974c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_mem_profile_debugfs_buf_size.h
+ * Header file for the size of the buffer to accumulate the histogram report text in
+ */
+
+#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
+
+/**
+ * The size of the buffer to accumulate the histogram report text in
+ * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ */
+#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56))
+
+#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
new file mode 100644
index 00000000000..325b79082eb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c
@@ -0,0 +1,1653 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_mmu.c
+ * Base kernel MMU management.
+ */
+
+/* #define DEBUG 1 */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+#include <mali_kbase_gator.h>
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+#include <mali_kbase_tlstream.h>
+#endif
+#include <mali_kbase_debug.h>
+
+#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
+
+#include <mali_kbase_defs.h>
+#include <mali_kbase_hw.h>
+#include <mali_kbase_mmu_hw.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define KBASE_MMU_PAGE_ENTRIES 512
+
+/**
+ * kbase_mmu_sync_pgd - sync page directory to memory
+ * @dev: Device pointer.
+ * @handle: Address of DMA region.
+ * @size: Size of the region to sync.
+ *
+ * This should be called after each page directory update.
+ */
+
+static void kbase_mmu_sync_pgd(struct device *dev,
+ dma_addr_t handle, size_t size)
+{
+ dma_sync_single_for_device(dev, handle, size, DMA_TO_DEVICE);
+}
+
+/*
+ * Definitions:
+ * - PGD: Page Directory.
+ * - PTE: Page Table Entry. A 64bit value pointing to the next
+ * level of translation
+ * - ATE: Address Transation Entry. A 64bit value pointing to
+ * a 4kB physical page.
+ */
+
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+ struct kbase_as *as, const char *reason_str);
+
+
+static size_t make_multiple(size_t minimum, size_t multiple)
+{
+ size_t remainder = minimum % multiple;
+
+ if (remainder == 0)
+ return minimum;
+
+ return minimum + multiple - remainder;
+}
+
+void page_fault_worker(struct work_struct *data)
+{
+ u64 fault_pfn;
+ u32 fault_status;
+ size_t new_pages;
+ size_t fault_rel_pfn;
+ struct kbase_as *faulting_as;
+ int as_no;
+ struct kbase_context *kctx;
+ struct kbase_device *kbdev;
+ struct kbase_va_region *region;
+ int err;
+ bool grown = false;
+
+ faulting_as = container_of(data, struct kbase_as, work_pagefault);
+ fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT;
+ as_no = faulting_as->number;
+
+ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+ /* Grab the context that was already refcounted in kbase_mmu_interrupt().
+ * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+ *
+ * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
+ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+
+ if (kctx == NULL) {
+ /* Only handle this if not already suspended */
+ if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+ /* Address space has no context, terminate the work */
+
+ /* AS transaction begin */
+ mutex_lock(&faulting_as->transaction_mutex);
+
+ kbase_mmu_disable_as(kbdev, as_no);
+
+ mutex_unlock(&faulting_as->transaction_mutex);
+ /* AS transaction end */
+
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ KBASE_MMU_FAULT_TYPE_PAGE);
+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+ KBASE_MMU_FAULT_TYPE_PAGE);
+ kbase_pm_context_idle(kbdev);
+ }
+ atomic_dec(&kbdev->faults_pending);
+ return;
+ }
+
+ KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
+
+ fault_status = faulting_as->fault_status;
+ switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
+
+ case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
+ /* need to check against the region to handle this one */
+ break;
+
+ case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
+ kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+ "Permission failure");
+ goto fault_done;
+
+ case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
+ kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+ "Tranlation table bus fault");
+ goto fault_done;
+
+ case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
+ /* nothing to do, but we don't expect this fault currently */
+ dev_warn(kbdev->dev, "Access flag unexpectedly set");
+ goto fault_done;
+
+
+ default:
+ kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+ "Unknown fault code");
+ goto fault_done;
+ }
+
+ /* so we have a translation fault, let's see if it is for growable
+ * memory */
+ kbase_gpu_vm_lock(kctx);
+
+ region = kbase_region_tracker_find_region_enclosing_address(kctx,
+ faulting_as->fault_addr);
+ if (!region || region->flags & KBASE_REG_FREE) {
+ kbase_gpu_vm_unlock(kctx);
+ kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+ "Memory is not mapped on the GPU");
+ goto fault_done;
+ }
+
+ if ((region->flags & GROWABLE_FLAGS_REQUIRED)
+ != GROWABLE_FLAGS_REQUIRED) {
+ kbase_gpu_vm_unlock(kctx);
+ kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+ "Memory is not growable");
+ goto fault_done;
+ }
+
+ /* find the size we need to grow it by */
+ /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address
+ * validating the fault_adress to be within a size_t from the start_pfn */
+ fault_rel_pfn = fault_pfn - region->start_pfn;
+
+ if (fault_rel_pfn < kbase_reg_current_backed_size(region)) {
+ dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
+ faulting_as->fault_addr, region->start_pfn,
+ region->start_pfn +
+ kbase_reg_current_backed_size(region));
+
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ KBASE_MMU_FAULT_TYPE_PAGE);
+ /* [1] in case another page fault occurred while we were
+ * handling the (duplicate) page fault we need to ensure we
+ * don't loose the other page fault as result of us clearing
+ * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
+ * an UNLOCK command that will retry any stalled memory
+ * transaction (which should cause the other page fault to be
+ * raised again).
+ */
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+ AS_COMMAND_UNLOCK, 1);
+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+ KBASE_MMU_FAULT_TYPE_PAGE);
+ kbase_gpu_vm_unlock(kctx);
+
+ goto fault_done;
+ }
+
+ new_pages = make_multiple(fault_rel_pfn -
+ kbase_reg_current_backed_size(region) + 1,
+ region->extent);
+
+ /* cap to max vsize */
+ if (new_pages + kbase_reg_current_backed_size(region) >
+ region->nr_pages)
+ new_pages = region->nr_pages -
+ kbase_reg_current_backed_size(region);
+
+ if (0 == new_pages) {
+ /* Duplicate of a fault we've already handled, nothing to do */
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ KBASE_MMU_FAULT_TYPE_PAGE);
+ /* See comment [1] about UNLOCK usage */
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0,
+ AS_COMMAND_UNLOCK, 1);
+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+ KBASE_MMU_FAULT_TYPE_PAGE);
+ kbase_gpu_vm_unlock(kctx);
+ goto fault_done;
+ }
+
+ if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) {
+ if (region->gpu_alloc != region->cpu_alloc) {
+ if (kbase_alloc_phy_pages_helper(
+ region->cpu_alloc, new_pages) == 0) {
+ grown = true;
+ } else {
+ kbase_free_phy_pages_helper(region->gpu_alloc,
+ new_pages);
+ }
+ } else {
+ grown = true;
+ }
+ }
+
+
+ if (grown) {
+ u32 op;
+
+ /* alloc success */
+ KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages);
+
+ /* AS transaction begin */
+ mutex_lock(&faulting_as->transaction_mutex);
+
+ /* set up the new pages */
+ err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags);
+ if (err) {
+ /* failed to insert pages, handle as a normal PF */
+ mutex_unlock(&faulting_as->transaction_mutex);
+ kbase_free_phy_pages_helper(region->gpu_alloc, new_pages);
+ if (region->gpu_alloc != region->cpu_alloc)
+ kbase_free_phy_pages_helper(region->cpu_alloc,
+ new_pages);
+ kbase_gpu_vm_unlock(kctx);
+ /* The locked VA region will be unlocked and the cache invalidated in here */
+ kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+ "Page table update failure");
+ goto fault_done;
+ }
+#if defined(CONFIG_MALI_GATOR_SUPPORT)
+ kbase_trace_mali_page_fault_insert_pages(as_no, new_pages);
+#endif
+#if defined(CONFIG_MALI_MIPE_ENABLED)
+ kbase_tlstream_aux_pagefault(
+ as_no,
+ atomic_read(&kctx->used_pages));
+#endif
+
+ /* flush L2 and unlock the VA (resumes the MMU) */
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367))
+ op = AS_COMMAND_FLUSH;
+ else
+ op = AS_COMMAND_FLUSH_PT;
+
+ /* clear MMU interrupt - this needs to be done after updating
+ * the page tables but before issuing a FLUSH command. The
+ * FLUSH cmd has a side effect that it restarts stalled memory
+ * transactions in other address spaces which may cause
+ * another fault to occur. If we didn't clear the interrupt at
+ * this stage a new IRQ might not be raised when the GPU finds
+ * a MMU IRQ is already pending.
+ */
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ KBASE_MMU_FAULT_TYPE_PAGE);
+
+ kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx,
+ faulting_as->fault_addr >> PAGE_SHIFT,
+ new_pages,
+ op, 1);
+
+ mutex_unlock(&faulting_as->transaction_mutex);
+ /* AS transaction end */
+
+ /* reenable this in the mask */
+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+ KBASE_MMU_FAULT_TYPE_PAGE);
+ kbase_gpu_vm_unlock(kctx);
+ } else {
+ /* failed to extend, handle as a normal PF */
+ kbase_gpu_vm_unlock(kctx);
+ kbase_mmu_report_fault_and_kill(kctx, faulting_as,
+ "Page allocation failure");
+ }
+
+fault_done:
+ /*
+ * By this point, the fault was handled in some way,
+ * so release the ctx refcount
+ */
+ kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+ atomic_dec(&kbdev->faults_pending);
+}
+
+phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx)
+{
+ phys_addr_t pgd;
+ u64 *page;
+ int i;
+ struct page *p;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ kbase_atomic_add_pages(1, &kctx->used_pages);
+ kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages);
+
+ if (kbase_mem_allocator_alloc(kctx->pgd_allocator, 1, &pgd) != 0)
+ goto sub_pages;
+
+ p = pfn_to_page(PFN_DOWN(pgd));
+ page = kmap(p);
+ if (NULL == page)
+ goto alloc_free;
+
+ kbase_process_page_usage_inc(kctx, 1);
+
+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
+ kctx->kbdev->mmu_mode->entry_invalidate(&page[i]);
+
+ kbase_mmu_sync_pgd(kctx->kbdev->dev, kbase_dma_addr(p), PAGE_SIZE);
+
+ kunmap(pfn_to_page(PFN_DOWN(pgd)));
+ return pgd;
+
+alloc_free:
+ kbase_mem_allocator_free(kctx->pgd_allocator, 1, &pgd, false);
+sub_pages:
+ kbase_atomic_sub_pages(1, &kctx->used_pages);
+ kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+
+ return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd);
+
+/* Given PGD PFN for level N, return PGD PFN for level N+1 */
+static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+ u64 *page;
+ phys_addr_t target_pgd;
+ struct page *p;
+
+ KBASE_DEBUG_ASSERT(pgd);
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ /*
+ * Architecture spec defines level-0 as being the top-most.
+ * This is a bit unfortunate here, but we keep the same convention.
+ */
+ vpfn >>= (3 - level) * 9;
+ vpfn &= 0x1FF;
+
+ p = pfn_to_page(PFN_DOWN(pgd));
+ page = kmap(p);
+ if (NULL == page) {
+ dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n");
+ return 0;
+ }
+
+ target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+
+ if (!target_pgd) {
+ target_pgd = kbase_mmu_alloc_pgd(kctx);
+ if (!target_pgd) {
+ dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n");
+ kunmap(p);
+ return 0;
+ }
+
+ kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd);
+
+ kbase_mmu_sync_pgd(kctx->kbdev->dev,
+ kbase_dma_addr(p), PAGE_SIZE);
+ /* Rely on the caller to update the address space flags. */
+ }
+
+ kunmap(p);
+ return target_pgd;
+}
+
+static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+ phys_addr_t pgd;
+ int l;
+
+ pgd = kctx->pgd;
+
+ for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
+ pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l);
+ /* Handle failure condition */
+ if (!pgd) {
+ dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n");
+ return 0;
+ }
+ }
+
+ return pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level)
+{
+ u64 *page;
+ phys_addr_t target_pgd;
+
+ KBASE_DEBUG_ASSERT(pgd);
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ /*
+ * Architecture spec defines level-0 as being the top-most.
+ * This is a bit unfortunate here, but we keep the same convention.
+ */
+ vpfn >>= (3 - level) * 9;
+ vpfn &= 0x1FF;
+
+ page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+ /* kmap_atomic should NEVER fail */
+ KBASE_DEBUG_ASSERT(NULL != page);
+
+ target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+ /* As we are recovering from what has already been set up, we should have a target_pgd */
+ KBASE_DEBUG_ASSERT(0 != target_pgd);
+ kunmap_atomic(page);
+ return target_pgd;
+}
+
+static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn)
+{
+ phys_addr_t pgd;
+ int l;
+
+ pgd = kctx->pgd;
+
+ for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) {
+ pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l);
+ /* Should never fail */
+ KBASE_DEBUG_ASSERT(0 != pgd);
+ }
+
+ return pgd;
+}
+
+static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn,
+ size_t nr)
+{
+ phys_addr_t pgd;
+ u64 *pgd_page;
+ struct kbase_mmu_mode const *mmu_mode;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(0 != vpfn);
+ /* 64-bit address range is the max */
+ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ mmu_mode = kctx->kbdev->mmu_mode;
+
+ while (nr) {
+ unsigned int i;
+ unsigned int index = vpfn & 0x1FF;
+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+ struct page *p;
+
+ if (count > nr)
+ count = nr;
+
+ pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn);
+ KBASE_DEBUG_ASSERT(0 != pgd);
+
+ p = pfn_to_page(PFN_DOWN(pgd));
+
+ pgd_page = kmap_atomic(p);
+ KBASE_DEBUG_ASSERT(NULL != pgd_page);
+
+ /* Invalidate the entries we added */
+ for (i = 0; i < count; i++)
+ mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+ vpfn += count;
+ nr -= count;
+
+ kbase_mmu_sync_pgd(kctx->kbdev->dev,
+ kbase_dma_addr(p),
+ PAGE_SIZE);
+
+ kunmap_atomic(pgd_page);
+ }
+}
+
+/*
+ * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
+ phys_addr_t phys, size_t nr,
+ unsigned long flags)
+{
+ phys_addr_t pgd;
+ u64 *pgd_page;
+ /* In case the insert_single_page only partially completes we need to be
+ * able to recover */
+ bool recover_required = false;
+ u64 recover_vpfn = vpfn;
+ size_t recover_count = 0;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(0 != vpfn);
+ /* 64-bit address range is the max */
+ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ while (nr) {
+ unsigned int i;
+ unsigned int index = vpfn & 0x1FF;
+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+ struct page *p;
+
+ if (count > nr)
+ count = nr;
+
+ /*
+ * Repeatedly calling mmu_get_bottom_pte() is clearly
+ * suboptimal. We don't have to re-parse the whole tree
+ * each time (just cache the l0-l2 sequence).
+ * On the other hand, it's only a gain when we map more than
+ * 256 pages at once (on average). Do we really care?
+ */
+ pgd = mmu_get_bottom_pgd(kctx, vpfn);
+ if (!pgd) {
+ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+ if (recover_required) {
+ /* Invalidate the pages we have partially
+ * completed */
+ mmu_insert_pages_failure_recovery(kctx,
+ recover_vpfn,
+ recover_count);
+ }
+ return -EINVAL;
+ }
+
+ p = pfn_to_page(PFN_DOWN(pgd));
+ pgd_page = kmap(p);
+ if (!pgd_page) {
+ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+ if (recover_required) {
+ /* Invalidate the pages we have partially
+ * completed */
+ mmu_insert_pages_failure_recovery(kctx,
+ recover_vpfn,
+ recover_count);
+ }
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < count; i++) {
+ unsigned int ofs = index + i;
+
+ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+ kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+ phys, flags);
+ }
+
+ vpfn += count;
+ nr -= count;
+
+ kbase_mmu_sync_pgd(kctx->kbdev->dev,
+ kbase_dma_addr(p) +
+ (index * sizeof(u64)),
+ count * sizeof(u64));
+
+ kunmap(p);
+ /* We have started modifying the page table.
+ * If further pages need inserting and fail we need to undo what
+ * has already taken place */
+ recover_required = true;
+ recover_count += count;
+ }
+ return 0;
+}
+
+/*
+ * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn'
+ */
+int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn,
+ phys_addr_t *phys, size_t nr,
+ unsigned long flags)
+{
+ phys_addr_t pgd;
+ u64 *pgd_page;
+ /* In case the insert_pages only partially completes we need to be able
+ * to recover */
+ bool recover_required = false;
+ u64 recover_vpfn = vpfn;
+ size_t recover_count = 0;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(0 != vpfn);
+ /* 64-bit address range is the max */
+ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ while (nr) {
+ unsigned int i;
+ unsigned int index = vpfn & 0x1FF;
+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+ struct page *p;
+
+ if (count > nr)
+ count = nr;
+
+ /*
+ * Repeatedly calling mmu_get_bottom_pte() is clearly
+ * suboptimal. We don't have to re-parse the whole tree
+ * each time (just cache the l0-l2 sequence).
+ * On the other hand, it's only a gain when we map more than
+ * 256 pages at once (on average). Do we really care?
+ */
+ pgd = mmu_get_bottom_pgd(kctx, vpfn);
+ if (!pgd) {
+ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n");
+ if (recover_required) {
+ /* Invalidate the pages we have partially
+ * completed */
+ mmu_insert_pages_failure_recovery(kctx,
+ recover_vpfn,
+ recover_count);
+ }
+ return -EINVAL;
+ }
+
+ p = pfn_to_page(PFN_DOWN(pgd));
+ pgd_page = kmap(p);
+ if (!pgd_page) {
+ dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n");
+ if (recover_required) {
+ /* Invalidate the pages we have partially
+ * completed */
+ mmu_insert_pages_failure_recovery(kctx,
+ recover_vpfn,
+ recover_count);
+ }
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < count; i++) {
+ unsigned int ofs = index + i;
+
+ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
+ kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs],
+ phys[i], flags);
+ }
+
+ phys += count;
+ vpfn += count;
+ nr -= count;
+
+ kbase_mmu_sync_pgd(kctx->kbdev->dev,
+ kbase_dma_addr(p) +
+ (index * sizeof(u64)),
+ count * sizeof(u64));
+
+ kunmap(p);
+ /* We have started modifying the page table. If further pages
+ * need inserting and fail we need to undo what has already
+ * taken place */
+ recover_required = true;
+ recover_count += count;
+ }
+ return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+
+/**
+ * This function is responsible for validating the MMU PTs
+ * triggering reguired flushes.
+ *
+ * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+ struct kbase_device *kbdev;
+ bool ctx_is_in_runpool;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+
+ kbdev = kctx->kbdev;
+
+ /* We must flush if we're currently running jobs. At the very least, we need to retain the
+ * context to ensure it doesn't schedule out whilst we're trying to flush it */
+ ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx);
+
+ if (ctx_is_in_runpool) {
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+ /* Second level check is to try to only do this when jobs are running. The refcount is
+ * a heuristic for this. */
+ if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) {
+ if (!kbase_pm_context_active_handle_suspend(kbdev,
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+ int ret;
+ u32 op;
+
+ /* AS transaction begin */
+ mutex_lock(&kbdev->as[
+ kctx->as_nr].transaction_mutex);
+
+ if (kbase_hw_has_issue(kbdev,
+ BASE_HW_ISSUE_6367))
+ op = AS_COMMAND_FLUSH;
+ else
+ op = AS_COMMAND_FLUSH_MEM;
+
+ ret = kbase_mmu_hw_do_operation(kbdev,
+ &kbdev->as[kctx->as_nr],
+ kctx, vpfn, nr,
+ op, 0);
+#if KBASE_GPU_RESET_EN
+ if (ret) {
+ /* Flush failed to complete, assume the
+ * GPU has hung and perform a reset to
+ * recover */
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n");
+ if (kbase_prepare_to_reset_gpu(kbdev))
+ kbase_reset_gpu(kbdev);
+ }
+#endif /* KBASE_GPU_RESET_EN */
+
+ mutex_unlock(&kbdev->as[
+ kctx->as_nr].transaction_mutex);
+ /* AS transaction end */
+
+ kbase_pm_context_idle(kbdev);
+ }
+ }
+ kbasep_js_runpool_release_ctx(kbdev, kctx);
+ }
+}
+
+/*
+ * We actually only discard the ATE, and not the page table
+ * pages. There is a potential DoS here, as we'll leak memory by
+ * having PTEs that are potentially unused. Will require physical
+ * page accounting, so MMU pages are part of the process allocation.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+ phys_addr_t pgd;
+ u64 *pgd_page;
+ struct kbase_device *kbdev;
+ size_t requested_nr = nr;
+ struct kbase_mmu_mode const *mmu_mode;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr);
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ if (0 == nr) {
+ /* early out if nothing to do */
+ return 0;
+ }
+
+ kbdev = kctx->kbdev;
+ mmu_mode = kbdev->mmu_mode;
+
+ while (nr) {
+ unsigned int i;
+ unsigned int index = vpfn & 0x1FF;
+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+ struct page *p;
+
+ if (count > nr)
+ count = nr;
+
+ pgd = mmu_get_bottom_pgd(kctx, vpfn);
+ if (!pgd) {
+ dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n");
+ return -EINVAL;
+ }
+
+ p = pfn_to_page(PFN_DOWN(pgd));
+ pgd_page = kmap(p);
+ if (!pgd_page) {
+ dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < count; i++)
+ mmu_mode->entry_invalidate(&pgd_page[index + i]);
+
+ vpfn += count;
+ nr -= count;
+
+ kbase_mmu_sync_pgd(kctx->kbdev->dev,
+ kbase_dma_addr(p) +
+ (index * sizeof(u64)),
+ count * sizeof(u64));
+
+ kunmap(p);
+ }
+
+ kbase_mmu_flush(kctx, vpfn, requested_nr);
+ return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+/**
+ * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'.
+ * This call is being triggered as a response to the changes of the mem attributes
+ *
+ * @pre : The caller is responsible for validating the memory attributes
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ */
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags)
+{
+ phys_addr_t pgd;
+ u64 *pgd_page;
+ size_t requested_nr = nr;
+ struct kbase_mmu_mode const *mmu_mode;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(0 != vpfn);
+ KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ mmu_mode = kctx->kbdev->mmu_mode;
+
+ dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages",
+ vpfn, phys, nr);
+
+ while (nr) {
+ unsigned int i;
+ unsigned int index = vpfn & 0x1FF;
+ size_t count = KBASE_MMU_PAGE_ENTRIES - index;
+ struct page *p;
+
+ if (count > nr)
+ count = nr;
+
+ pgd = mmu_get_bottom_pgd(kctx, vpfn);
+ if (!pgd) {
+ dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n");
+ return -EINVAL;
+ }
+
+ p = pfn_to_page(PFN_DOWN(pgd));
+ pgd_page = kmap(p);
+ if (!pgd_page) {
+ dev_warn(kctx->kbdev->dev, "kmap failure\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < count; i++)
+ mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i],
+ flags);
+
+ phys += count;
+ vpfn += count;
+ nr -= count;
+
+ kbase_mmu_sync_pgd(kctx->kbdev->dev,
+ kbase_dma_addr(p) +
+ (index * sizeof(u64)),
+ count * sizeof(u64));
+
+ kunmap(pfn_to_page(PFN_DOWN(pgd)));
+ }
+
+ kbase_mmu_flush(kctx, vpfn, requested_nr);
+
+ return 0;
+}
+
+/* This is a debug feature only */
+static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd)
+{
+ u64 *page;
+ int i;
+
+ page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+ /* kmap_atomic should NEVER fail. */
+ KBASE_DEBUG_ASSERT(NULL != page);
+
+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+ if (kctx->kbdev->mmu_mode->ate_is_valid(page[i]))
+ beenthere(kctx, "live pte %016lx", (unsigned long)page[i]);
+ }
+ kunmap_atomic(page);
+}
+
+static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer)
+{
+ phys_addr_t target_pgd;
+ u64 *pgd_page;
+ int i;
+ struct kbase_mmu_mode const *mmu_mode;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ lockdep_assert_held(&kctx->reg_lock);
+
+ pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+ /* kmap_atomic should NEVER fail. */
+ KBASE_DEBUG_ASSERT(NULL != pgd_page);
+ /* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */
+ memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
+ kunmap_atomic(pgd_page);
+ pgd_page = pgd_page_buffer;
+
+ mmu_mode = kctx->kbdev->mmu_mode;
+
+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+ target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+ if (target_pgd) {
+ if (level < 2) {
+ mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64)));
+ } else {
+ /*
+ * So target_pte is a level-3 page.
+ * As a leaf, it is safe to free it.
+ * Unless we have live pages attached to it!
+ */
+ mmu_check_unused(kctx, target_pgd);
+ }
+
+ beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1);
+ if (zap) {
+ kbase_mem_allocator_free(kctx->pgd_allocator, 1, &target_pgd, true);
+ kbase_process_page_usage_dec(kctx, 1);
+ kbase_atomic_sub_pages(1, &kctx->used_pages);
+ kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+ }
+ }
+ }
+}
+
+int kbase_mmu_init(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages);
+
+ /* Preallocate MMU depth of four pages for mmu_teardown_level to use */
+ kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+
+ if (NULL == kctx->mmu_teardown_pages)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void kbase_mmu_term(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+ kfree(kctx->mmu_teardown_pages);
+ kctx->mmu_teardown_pages = NULL;
+}
+
+void kbase_mmu_free_pgd(struct kbase_context *kctx)
+{
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages);
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages);
+
+ beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd);
+ kbase_mem_allocator_free(kctx->pgd_allocator, 1, &kctx->pgd, true);
+ kbase_process_page_usage_dec(kctx, 1);
+ kbase_atomic_sub_pages(1, &kctx->used_pages);
+ kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages);
+}
+
+KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd);
+
+static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left)
+{
+ phys_addr_t target_pgd;
+ u64 *pgd_page;
+ int i;
+ size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
+ size_t dump_size;
+ struct kbase_mmu_mode const *mmu_mode;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ lockdep_assert_held(&kctx->reg_lock);
+
+ mmu_mode = kctx->kbdev->mmu_mode;
+
+ pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+ if (!pgd_page) {
+ dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n");
+ return 0;
+ }
+
+ if (*size_left >= size) {
+ /* A modified physical address that contains the page table level */
+ u64 m_pgd = pgd | level;
+
+ /* Put the modified physical address in the output buffer */
+ memcpy(*buffer, &m_pgd, sizeof(m_pgd));
+ *buffer += sizeof(m_pgd);
+
+ /* Followed by the page table itself */
+ memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
+ *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
+
+ *size_left -= size;
+ }
+
+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
+ if (mmu_mode->pte_is_valid(pgd_page[i])) {
+ target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
+
+ dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left);
+ if (!dump_size) {
+ kunmap(pfn_to_page(PFN_DOWN(pgd)));
+ return 0;
+ }
+ size += dump_size;
+ }
+ }
+
+ kunmap(pfn_to_page(PFN_DOWN(pgd)));
+
+ return size;
+}
+
+void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
+{
+ void *kaddr;
+ size_t size_left;
+
+ KBASE_DEBUG_ASSERT(kctx);
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ if (0 == nr_pages) {
+ /* can't find in a 0 sized buffer, early out */
+ return NULL;
+ }
+
+ size_left = nr_pages * PAGE_SIZE;
+
+ KBASE_DEBUG_ASSERT(0 != size_left);
+ kaddr = vmalloc_user(size_left);
+
+ if (kaddr) {
+ u64 end_marker = 0xFFULL;
+ char *buffer = (char *)kaddr;
+ size_t size = kbasep_mmu_dump_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, &buffer, &size_left);
+
+ if (!size) {
+ vfree(kaddr);
+ return NULL;
+ }
+
+ /* Add on the size for the end marker */
+ size += sizeof(u64);
+
+ if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) {
+ /* The buffer isn't big enough - free the memory and return failure */
+ vfree(kaddr);
+ return NULL;
+ }
+
+ /* Add the end marker */
+ memcpy(buffer, &end_marker, sizeof(u64));
+ }
+
+ return kaddr;
+}
+KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+
+void bus_fault_worker(struct work_struct *data)
+{
+ struct kbase_as *faulting_as;
+ int as_no;
+ struct kbase_context *kctx;
+ struct kbase_device *kbdev;
+#if KBASE_GPU_RESET_EN
+ bool reset_status = false;
+#endif /* KBASE_GPU_RESET_EN */
+
+ faulting_as = container_of(data, struct kbase_as, work_busfault);
+
+ as_no = faulting_as->number;
+
+ kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
+
+ /* Grab the context that was already refcounted in kbase_mmu_interrupt().
+ * Therefore, it cannot be scheduled out of this AS until we explicitly release it
+ *
+ * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */
+ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no);
+#if KBASE_GPU_RESET_EN
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+ /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+ * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+ * are evicted from the GPU before the switch.
+ */
+ dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n");
+ reset_status = kbase_prepare_to_reset_gpu(kbdev);
+ }
+#endif /* KBASE_GPU_RESET_EN */
+ /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */
+ if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
+
+ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+ /* AS transaction begin */
+ mutex_lock(&kbdev->as[as_no].transaction_mutex);
+
+ /* Set the MMU into unmapped mode */
+ kbase_mmu_disable_as(kbdev, as_no);
+
+ mutex_unlock(&kbdev->as[as_no].transaction_mutex);
+ /* AS transaction end */
+
+ kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx,
+ KBASE_MMU_FAULT_TYPE_BUS);
+ kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx,
+ KBASE_MMU_FAULT_TYPE_BUS);
+
+ kbase_pm_context_idle(kbdev);
+ }
+#if KBASE_GPU_RESET_EN
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+ kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+ /* By this point, the fault was handled in some way, so release the ctx refcount */
+ if (kctx != NULL)
+ kbasep_js_runpool_release_ctx(kbdev, kctx);
+
+ atomic_dec(&kbdev->faults_pending);
+}
+
+const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code)
+{
+ const char *e;
+
+ switch (exception_code) {
+ /* Non-Fault Status code */
+ case 0x00:
+ e = "NOT_STARTED/IDLE/OK";
+ break;
+ case 0x01:
+ e = "DONE";
+ break;
+ case 0x02:
+ e = "INTERRUPTED";
+ break;
+ case 0x03:
+ e = "STOPPED";
+ break;
+ case 0x04:
+ e = "TERMINATED";
+ break;
+ case 0x08:
+ e = "ACTIVE";
+ break;
+ /* Job exceptions */
+ case 0x40:
+ e = "JOB_CONFIG_FAULT";
+ break;
+ case 0x41:
+ e = "JOB_POWER_FAULT";
+ break;
+ case 0x42:
+ e = "JOB_READ_FAULT";
+ break;
+ case 0x43:
+ e = "JOB_WRITE_FAULT";
+ break;
+ case 0x44:
+ e = "JOB_AFFINITY_FAULT";
+ break;
+ case 0x48:
+ e = "JOB_BUS_FAULT";
+ break;
+ case 0x50:
+ e = "INSTR_INVALID_PC";
+ break;
+ case 0x51:
+ e = "INSTR_INVALID_ENC";
+ break;
+ case 0x52:
+ e = "INSTR_TYPE_MISMATCH";
+ break;
+ case 0x53:
+ e = "INSTR_OPERAND_FAULT";
+ break;
+ case 0x54:
+ e = "INSTR_TLS_FAULT";
+ break;
+ case 0x55:
+ e = "INSTR_BARRIER_FAULT";
+ break;
+ case 0x56:
+ e = "INSTR_ALIGN_FAULT";
+ break;
+ case 0x58:
+ e = "DATA_INVALID_FAULT";
+ break;
+ case 0x59:
+ e = "TILE_RANGE_FAULT";
+ break;
+ case 0x5A:
+ e = "ADDR_RANGE_FAULT";
+ break;
+ case 0x60:
+ e = "OUT_OF_MEMORY";
+ break;
+ /* GPU exceptions */
+ case 0x80:
+ e = "DELAYED_BUS_FAULT";
+ break;
+ case 0x88:
+ e = "SHAREABILITY_FAULT";
+ break;
+ /* MMU exceptions */
+ case 0xC0:
+ case 0xC1:
+ case 0xC2:
+ case 0xC3:
+ case 0xC4:
+ case 0xC5:
+ case 0xC6:
+ case 0xC7:
+ e = "TRANSLATION_FAULT";
+ break;
+ case 0xC8:
+ e = "PERMISSION_FAULT";
+ break;
+ case 0xD0:
+ case 0xD1:
+ case 0xD2:
+ case 0xD3:
+ case 0xD4:
+ case 0xD5:
+ case 0xD6:
+ case 0xD7:
+ e = "TRANSTAB_BUS_FAULT";
+ break;
+ case 0xD8:
+ e = "ACCESS_FLAG";
+ break;
+ break;
+ default:
+ e = "UNKNOWN";
+ break;
+ };
+
+ return e;
+}
+
+static const char *access_type_name(struct kbase_device *kbdev,
+ u32 fault_status)
+{
+ switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+ return "UNKNOWN";
+ case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+ return "READ";
+ case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+ return "WRITE";
+ case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+ return "EXECUTE";
+ default:
+ KBASE_DEBUG_ASSERT(0);
+ return NULL;
+ }
+}
+
+/**
+ * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on.
+ */
+static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
+ struct kbase_as *as, const char *reason_str)
+{
+ unsigned long flags;
+ int exception_type;
+ int access_type;
+ int source_id;
+ int as_no;
+ struct kbase_device *kbdev;
+ struct kbasep_js_device_data *js_devdata;
+
+#if KBASE_GPU_RESET_EN
+ bool reset_status = false;
+#endif
+
+ as_no = as->number;
+ kbdev = kctx->kbdev;
+ js_devdata = &kbdev->js_data;
+
+ /* ASSERT that the context won't leave the runpool */
+ KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0);
+
+ /* decode the fault status */
+ exception_type = as->fault_status & 0xFF;
+ access_type = (as->fault_status >> 8) & 0x3;
+ source_id = (as->fault_status >> 16);
+
+ /* terminal fault, print info about the fault */
+ dev_err(kbdev->dev,
+ "Unhandled Page fault in AS%d at VA 0x%016llX\n"
+ "Reason: %s\n"
+ "raw fault status 0x%X\n"
+ "decoded fault status: %s\n"
+ "exception type 0x%X: %s\n"
+ "access type 0x%X: %s\n"
+ "source id 0x%X\n"
+ "pid: %d\n",
+ as_no, as->fault_addr,
+ reason_str,
+ as->fault_status,
+ (as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+ exception_type, kbase_exception_name(kbdev, exception_type),
+ access_type, access_type_name(kbdev, as->fault_status),
+ source_id,
+ kctx->pid);
+
+ /* hardware counters dump fault handling */
+ if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) &&
+ (kbdev->hwcnt.backend.state ==
+ KBASE_INSTR_STATE_DUMPING)) {
+ unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
+
+ if ((as->fault_addr >= kbdev->hwcnt.addr) &&
+ (as->fault_addr < (kbdev->hwcnt.addr +
+ (num_core_groups * 2048))))
+ kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+ }
+
+ /* Stop the kctx from submitting more jobs and cause it to be scheduled
+ * out/rescheduled - this will occur on releasing the context's refcount */
+ spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags);
+ kbasep_js_clear_submit_allowed(js_devdata, kctx);
+ spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags);
+
+ /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this
+ * context can appear in the job slots from this point on */
+ kbase_backend_jm_kill_jobs_from_kctx(kctx);
+ /* AS transaction begin */
+ mutex_lock(&as->transaction_mutex);
+#if KBASE_GPU_RESET_EN
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) {
+ /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode.
+ * We start the reset before switching to UNMAPPED to ensure that unrelated jobs
+ * are evicted from the GPU before the switch.
+ */
+ dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery.");
+ reset_status = kbase_prepare_to_reset_gpu(kbdev);
+ }
+#endif /* KBASE_GPU_RESET_EN */
+ /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */
+ kbase_mmu_disable_as(kbdev, as_no);
+
+ mutex_unlock(&as->transaction_mutex);
+ /* AS transaction end */
+ /* Clear down the fault */
+ kbase_mmu_hw_clear_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE);
+ kbase_mmu_hw_enable_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE);
+
+#if KBASE_GPU_RESET_EN
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status)
+ kbase_reset_gpu(kbdev);
+#endif /* KBASE_GPU_RESET_EN */
+}
+
+void kbasep_as_do_poke(struct work_struct *work)
+{
+ struct kbase_as *as;
+ struct kbase_device *kbdev;
+ struct kbase_context *kctx;
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(work);
+ as = container_of(work, struct kbase_as, poke_work);
+ kbdev = container_of(as, struct kbase_device, as[as->number]);
+ KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+ /* GPU power will already be active by virtue of the caller holding a JS
+ * reference on the address space, and will not release it until this worker
+ * has finished */
+
+ /* Further to the comment above, we know that while this function is running
+ * the AS will not be released as before the atom is released this workqueue
+ * is flushed (in kbase_as_poking_timer_release_atom)
+ */
+ kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number);
+
+ /* AS transaction begin */
+ mutex_lock(&as->transaction_mutex);
+ /* Force a uTLB invalidate */
+ kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0,
+ AS_COMMAND_UNLOCK, 0);
+ mutex_unlock(&as->transaction_mutex);
+ /* AS transaction end */
+
+ spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+ if (as->poke_refcount &&
+ !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) {
+ /* Only queue up the timer if we need it, and we're not trying to kill it */
+ hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL);
+ }
+ spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+}
+
+enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer)
+{
+ struct kbase_as *as;
+ int queue_work_ret;
+
+ KBASE_DEBUG_ASSERT(NULL != timer);
+ as = container_of(timer, struct kbase_as, poke_timer);
+ KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+ queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+ KBASE_DEBUG_ASSERT(queue_work_ret);
+ return HRTIMER_NORESTART;
+}
+
+/**
+ * Retain the poking timer on an atom's context (if the atom hasn't already
+ * done so), and start the timer (if it's not already started).
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that's running on the GPU.
+ *
+ * The caller must hold kbasep_js_device_data::runpool_irq::lock
+ *
+ * This can be called safely from atomic context
+ */
+void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ struct kbase_as *as;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(katom);
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ if (katom->poking)
+ return;
+
+ katom->poking = 1;
+
+ /* It's safe to work on the as/as_nr without an explicit reference,
+ * because the caller holds the runpool_irq lock, and the atom itself
+ * was also running and had already taken a reference */
+ as = &kbdev->as[kctx->as_nr];
+
+ if (++(as->poke_refcount) == 1) {
+ /* First refcount for poke needed: check if not already in flight */
+ if (!as->poke_state) {
+ /* need to start poking */
+ as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT;
+ queue_work(as->poke_wq, &as->poke_work);
+ }
+ }
+}
+
+/**
+ * If an atom holds a poking timer, release it and wait for it to finish
+ *
+ * This must only be called on a context that's scheduled in, and an atom
+ * that still has a JS reference on the context
+ *
+ * This must \b not be called from atomic context, since it can sleep.
+ */
+void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom)
+{
+ struct kbase_as *as;
+ unsigned long flags;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(katom);
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+ if (!katom->poking)
+ return;
+
+ as = &kbdev->as[kctx->as_nr];
+
+ spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+ KBASE_DEBUG_ASSERT(as->poke_refcount > 0);
+ KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT);
+
+ if (--(as->poke_refcount) == 0) {
+ as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE;
+ spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+ hrtimer_cancel(&as->poke_timer);
+ flush_workqueue(as->poke_wq);
+
+ spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags);
+
+ /* Re-check whether it's still needed */
+ if (as->poke_refcount) {
+ int queue_work_ret;
+ /* Poking still needed:
+ * - Another retain will not be starting the timer or queueing work,
+ * because it's still marked as in-flight
+ * - The hrtimer has finished, and has not started a new timer or
+ * queued work because it's been marked as killing
+ *
+ * So whatever happens now, just queue the work again */
+ as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE);
+ queue_work_ret = queue_work(as->poke_wq, &as->poke_work);
+ KBASE_DEBUG_ASSERT(queue_work_ret);
+ } else {
+ /* It isn't - so mark it as not in flight, and not killing */
+ as->poke_state = 0u;
+
+ /* The poke associated with the atom has now finished. If this is
+ * also the last atom on the context, then we can guarentee no more
+ * pokes (and thus no more poking register accesses) will occur on
+ * the context until new atoms are run */
+ }
+ }
+ spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags);
+
+ katom->poking = 0;
+}
+
+void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ if (kctx == NULL) {
+ dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n",
+ kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault",
+ as->number, as->fault_addr);
+ }
+
+ if (kbase_as_has_bus_fault(as)) {
+ if (kctx) {
+ /*
+ * hw counters dumping in progress, signal the
+ * other thread that it failed
+ */
+ if ((kbdev->hwcnt.kctx == kctx) &&
+ (kbdev->hwcnt.backend.state ==
+ KBASE_INSTR_STATE_DUMPING))
+ kbdev->hwcnt.backend.state =
+ KBASE_INSTR_STATE_FAULT;
+
+ /*
+ * Stop the kctx from submitting more jobs and cause it
+ * to be scheduled out/rescheduled when all references
+ * to it are released
+ */
+ kbasep_js_clear_submit_allowed(js_devdata, kctx);
+
+ dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n",
+ as->number, as->fault_addr);
+
+ }
+
+ /*
+ * We need to switch to UNMAPPED mode - but we do this in a
+ * worker so that we can sleep
+ */
+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault));
+ WARN_ON(work_pending(&as->work_busfault));
+ INIT_WORK(&as->work_busfault, bus_fault_worker);
+ queue_work(as->pf_wq, &as->work_busfault);
+ atomic_inc(&kbdev->faults_pending);
+ } else {
+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault));
+ WARN_ON(work_pending(&as->work_pagefault));
+ INIT_WORK(&as->work_pagefault, page_fault_worker);
+ queue_work(as->pf_wq, &as->work_pagefault);
+ atomic_inc(&kbdev->faults_pending);
+ }
+}
+
+void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
+{
+ int i;
+
+ for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
+ struct kbase_as *as = &kbdev->as[i];
+
+ flush_workqueue(as->pf_wq);
+ }
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
new file mode 100644
index 00000000000..c6478b2e0a6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h
@@ -0,0 +1,121 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file
+ * Interface file for accessing MMU hardware functionality
+ */
+
+/**
+ * @page mali_kbase_mmu_hw_page MMU hardware interface
+ *
+ * @section mali_kbase_mmu_hw_intro_sec Introduction
+ * This module provides an abstraction for accessing the functionality provided
+ * by the midgard MMU and thus allows all MMU HW access to be contained within
+ * one common place and allows for different backends (implementations) to
+ * be provided.
+ */
+
+#ifndef _MALI_KBASE_MMU_HW_H_
+#define _MALI_KBASE_MMU_HW_H_
+
+/* Forward declarations */
+struct kbase_device;
+struct kbase_as;
+struct kbase_context;
+
+/**
+ * @addtogroup base_kbase_api
+ * @{
+ */
+
+/**
+ * @addtogroup mali_kbase_mmu_hw MMU access APIs
+ * @{
+ */
+
+/** @brief MMU fault type descriptor.
+ */
+enum kbase_mmu_fault_type {
+ KBASE_MMU_FAULT_TYPE_UNKNOWN = 0,
+ KBASE_MMU_FAULT_TYPE_PAGE,
+ KBASE_MMU_FAULT_TYPE_BUS
+};
+
+/** @brief Configure an address space for use.
+ *
+ * Configure the MMU using the address space details setup in the
+ * @ref kbase_context structure.
+ *
+ * @param[in] kbdev kbase device to configure.
+ * @param[in] as address space to configure.
+ * @param[in] kctx kbase context to configure.
+ */
+void kbase_mmu_hw_configure(struct kbase_device *kbdev,
+ struct kbase_as *as, struct kbase_context *kctx);
+
+/** @brief Issue an operation to the MMU.
+ *
+ * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that
+ * is associated with the provided @ref kbase_context over the specified range
+ *
+ * @param[in] kbdev kbase device to issue the MMU operation on.
+ * @param[in] as address space to issue the MMU operation on.
+ * @param[in] kctx kbase context to issue the MMU operation on.
+ * @param[in] vpfn MMU Virtual Page Frame Number to start the
+ * operation on.
+ * @param[in] nr Number of pages to work on.
+ * @param[in] type Operation type (written to ASn_COMMAND).
+ * @param[in] handling_irq Is this operation being called during the handling
+ * of an interrupt?
+ *
+ * @return Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
+ struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type,
+ unsigned int handling_irq);
+
+/** @brief Clear a fault that has been previously reported by the MMU.
+ *
+ * Clear a bus error or page fault that has been reported by the MMU.
+ *
+ * @param[in] kbdev kbase device to clear the fault from.
+ * @param[in] as address space to clear the fault from.
+ * @param[in] kctx kbase context to clear the fault from or NULL.
+ * @param[in] type The type of fault that needs to be cleared.
+ */
+void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
+ struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @brief Enable fault that has been previously reported by the MMU.
+ *
+ * After a page fault or bus error has been reported by the MMU these
+ * will be disabled. After these are handled this function needs to be
+ * called to enable the page fault or bus error fault again.
+ *
+ * @param[in] kbdev kbase device to again enable the fault from.
+ * @param[in] as address space to again enable the fault from.
+ * @param[in] kctx kbase context to again enable the fault from.
+ * @param[in] type The type of fault that needs to be enabled again.
+ */
+void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
+ struct kbase_context *kctx, enum kbase_mmu_fault_type type);
+
+/** @} *//* end group mali_kbase_mmu_hw */
+/** @} *//* end group base_kbase_api */
+
+#endif /* _MALI_KBASE_MMU_HW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
new file mode 100644
index 00000000000..26e683e2960
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h
@@ -0,0 +1,44 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MALI_KBASE_MMU_MODE_
+#define _MALI_KBASE_MMU_MODE_
+
+#include <linux/types.h>
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_device;
+struct kbase_as;
+
+struct kbase_mmu_mode {
+ void (*update)(struct kbase_context *kctx);
+ void (*disable_as)(struct kbase_device *kbdev, int as_nr);
+ phys_addr_t (*pte_to_phy_addr)(u64 entry);
+ int (*ate_is_valid)(u64 ate);
+ int (*pte_is_valid)(u64 pte);
+ void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags);
+ void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
+ void (*entry_invalidate)(u64 *entry);
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void);
+struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
+
+#endif /* _MALI_KBASE_MMU_MODE_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
new file mode 100644
index 00000000000..2211a1dcf01
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c
@@ -0,0 +1,188 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include "mali_kbase_mmu_mode.h"
+
+#include "mali_kbase.h"
+#include "mali_midg_regmap.h"
+
+#define ENTRY_TYPE_MASK 3ULL
+#define ENTRY_IS_ATE 1ULL
+#define ENTRY_IS_INVAL 2ULL
+#define ENTRY_IS_PTE 3ULL
+
+#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */
+#define ENTRY_RD_BIT (1ULL << 6)
+#define ENTRY_WR_BIT (1ULL << 7)
+#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */
+#define ENTRY_ACCESS_BIT (1ULL << 10)
+#define ENTRY_NX_BIT (1ULL << 54)
+
+#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \
+ ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT)
+
+/* Helper Function to perform assignment of page table entries, to
+ * ensure the use of strd, which is required on LPAE systems.
+ */
+static inline void page_table_entry_set(u64 *pte, u64 phy)
+{
+#ifdef CONFIG_64BIT
+ *pte = phy;
+#elif defined(CONFIG_ARM)
+ /*
+ * In order to prevent the compiler keeping cached copies of
+ * memory, we have to explicitly say that we have updated
+ * memory.
+ *
+ * Note: We could manually move the data ourselves into R0 and
+ * R1 by specifying register variables that are explicitly
+ * given registers assignments, the down side of this is that
+ * we have to assume cpu endianness. To avoid this we can use
+ * the ldrd to read the data from memory into R0 and R1 which
+ * will respect the cpu endianness, we then use strd to make
+ * the 64 bit assignment to the page table entry.
+ */
+ asm volatile("ldrd r0, r1, [%[ptemp]]\n\t"
+ "strd r0, r1, [%[pte]]\n\t"
+ : "=m" (*pte)
+ : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy)
+ : "r0", "r1");
+#else
+#error "64-bit atomic write must be implemented for your architecture"
+#endif
+}
+
+static void mmu_update(struct kbase_context *kctx)
+{
+ struct kbase_device * const kbdev = kctx->kbdev;
+ struct kbase_as * const as = &kbdev->as[kctx->as_nr];
+ struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+ /* Set up the required caching policies at the correct indices
+ * in the memattr register. */
+ current_setup->memattr =
+ (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY <<
+ (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) |
+ (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL <<
+ (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) |
+ (AS_MEMATTR_LPAE_WRITE_ALLOC <<
+ (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) |
+ 0; /* The other indices are unused for now */
+
+ current_setup->transtab = (u64)kctx->pgd &
+ ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK);
+
+ current_setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE;
+ current_setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER;
+
+
+ /* Apply the address space setting */
+ kbase_mmu_hw_configure(kbdev, as, kctx);
+}
+
+static void mmu_disable_as(struct kbase_device *kbdev, int as_nr)
+{
+ struct kbase_as * const as = &kbdev->as[as_nr];
+ struct kbase_mmu_setup * const current_setup = &as->current_setup;
+
+ current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED;
+
+
+ /* Apply the address space setting */
+ kbase_mmu_hw_configure(kbdev, as, NULL);
+}
+
+static phys_addr_t pte_to_phy_addr(u64 entry)
+{
+ if (!(entry & 1))
+ return 0;
+
+ return entry & ~0xFFF;
+}
+
+static int ate_is_valid(u64 ate)
+{
+ return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE);
+}
+
+static int pte_is_valid(u64 pte)
+{
+ return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE);
+}
+
+/*
+ * Map KBASE_REG flags to MMU flags
+ */
+static u64 get_mmu_flags(unsigned long flags)
+{
+ u64 mmu_flags;
+
+ /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */
+ mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2;
+
+ /* write perm if requested */
+ mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0;
+ /* read perm if requested */
+ mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0;
+ /* nx if requested */
+ mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0;
+
+ if (flags & KBASE_REG_SHARE_BOTH) {
+ /* inner and outer shareable */
+ mmu_flags |= SHARE_BOTH_BITS;
+ } else if (flags & KBASE_REG_SHARE_IN) {
+ /* inner shareable coherency */
+ mmu_flags |= SHARE_INNER_BITS;
+ }
+
+ return mmu_flags;
+}
+
+static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags)
+{
+ page_table_entry_set(entry, (phy & ~0xFFF) |
+ get_mmu_flags(flags) |
+ ENTRY_IS_ATE);
+}
+
+static void entry_set_pte(u64 *entry, phys_addr_t phy)
+{
+ page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE);
+}
+
+static void entry_invalidate(u64 *entry)
+{
+ page_table_entry_set(entry, ENTRY_IS_INVAL);
+}
+
+static struct kbase_mmu_mode const lpae_mode = {
+ .update = mmu_update,
+ .disable_as = mmu_disable_as,
+ .pte_to_phy_addr = pte_to_phy_addr,
+ .ate_is_valid = ate_is_valid,
+ .pte_is_valid = pte_is_valid,
+ .entry_set_ate = entry_set_ate,
+ .entry_set_pte = entry_set_pte,
+ .entry_invalidate = entry_invalidate
+};
+
+struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void)
+{
+ return &lpae_mode;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
new file mode 100644
index 00000000000..5bbd6d48563
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c
@@ -0,0 +1,126 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+
+#ifdef CONFIG_MACH_MANTA
+#include <plat/devs.h>
+#endif
+
+/*
+ * This file is included only for type definitions and functions belonging to
+ * specific platform folders. Do not add dependencies with symbols that are
+ * defined somewhere else.
+ */
+#include <mali_kbase_config.h>
+
+#define PLATFORM_CONFIG_RESOURCE_COUNT 4
+#define PLATFORM_CONFIG_IRQ_RES_COUNT 3
+
+static struct platform_device *mali_device;
+
+#ifndef CONFIG_OF
+/**
+ * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources
+ *
+ * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function
+ * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT.
+ * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ.
+ *
+ * @param[in] io_resource Input IO resource data
+ * @param[out] linux_resources Pointer to output array of Linux resource structures
+ */
+static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources)
+{
+ if (!io_resources || !linux_resources) {
+ pr_err("%s: couldn't find proper resources\n", __func__);
+ return;
+ }
+
+ memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource));
+
+ linux_resources[0].start = io_resources->io_memory_region.start;
+ linux_resources[0].end = io_resources->io_memory_region.end;
+ linux_resources[0].flags = IORESOURCE_MEM;
+ linux_resources[1].start = io_resources->job_irq_number;
+ linux_resources[1].end = io_resources->job_irq_number;
+ linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+ linux_resources[2].start = io_resources->mmu_irq_number;
+ linux_resources[2].end = io_resources->mmu_irq_number;
+ linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+
+ linux_resources[3].start = io_resources->gpu_irq_number;
+ linux_resources[3].end = io_resources->gpu_irq_number;
+ linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL;
+}
+#endif /* CONFIG_OF */
+
+int kbase_platform_fake_register(void)
+{
+ struct kbase_platform_config *config;
+#ifndef CONFIG_OF
+ struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT];
+#endif
+ int err;
+
+ config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */
+ if (config == NULL) {
+ pr_err("%s: couldn't get platform config\n", __func__);
+ return -ENODEV;
+ }
+
+ mali_device = platform_device_alloc("mali", 0);
+ if (mali_device == NULL)
+ return -ENOMEM;
+
+#ifndef CONFIG_OF
+ kbasep_config_parse_io_resources(config->io_resources, resources);
+ err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT);
+ if (err) {
+ platform_device_put(mali_device);
+ mali_device = NULL;
+ return err;
+ }
+#endif /* CONFIG_OF */
+
+ err = platform_device_add(mali_device);
+ if (err) {
+ platform_device_unregister(mali_device);
+ mali_device = NULL;
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(kbase_platform_fake_register);
+
+void kbase_platform_fake_unregister(void)
+{
+ if (mali_device)
+ platform_device_unregister(mali_device);
+}
+EXPORT_SYMBOL(kbase_platform_fake_unregister);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c
new file mode 100644
index 00000000000..261441fa145
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c
@@ -0,0 +1,204 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.c
+ * Base kernel power management APIs
+ */
+#include <mali_kbase.h>
+#include <mali_midg_regmap.h>
+#include <mali_kbase_config_defaults.h>
+#include <mali_kbase_instr.h>
+
+#include <mali_kbase_pm.h>
+
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
+{
+ return kbase_hwaccess_pm_powerup(kbdev, flags);
+}
+
+void kbase_pm_halt(struct kbase_device *kbdev)
+{
+ kbase_hwaccess_pm_halt(kbdev);
+}
+
+void kbase_pm_context_active(struct kbase_device *kbdev)
+{
+ (void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE);
+}
+
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ int c;
+ int old_count;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ /* Trace timeline information about how long it took to handle the decision
+ * to powerup. Sometimes the event might be missed due to reading the count
+ * outside of mutex, but this is necessary to get the trace timing
+ * correct. */
+ old_count = kbdev->pm.active_count;
+ if (old_count == 0)
+ kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+ mutex_lock(&js_devdata->runpool_mutex);
+ mutex_lock(&kbdev->pm.lock);
+ if (kbase_pm_is_suspending(kbdev)) {
+ switch (suspend_handler) {
+ case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE:
+ if (kbdev->pm.active_count != 0)
+ break;
+ /* FALLTHROUGH */
+ case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE:
+ mutex_unlock(&kbdev->pm.lock);
+ mutex_unlock(&js_devdata->runpool_mutex);
+ if (old_count == 0)
+ kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+ return 1;
+
+ case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE:
+ /* FALLTHROUGH */
+ default:
+ KBASE_DEBUG_ASSERT_MSG(false, "unreachable");
+ break;
+ }
+ }
+ c = ++kbdev->pm.active_count;
+ KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c);
+
+ /* Trace the event being handled */
+ if (old_count == 0)
+ kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE);
+
+ if (c == 1)
+ /* First context active: Power on the GPU and any cores requested by
+ * the policy */
+ kbase_hwaccess_pm_gpu_active(kbdev);
+
+ mutex_unlock(&kbdev->pm.lock);
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ return 0;
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_active);
+
+void kbase_pm_context_idle(struct kbase_device *kbdev)
+{
+ struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
+ int c;
+ int old_count;
+
+ KBASE_DEBUG_ASSERT(kbdev != NULL);
+
+ /* Trace timeline information about how long it took to handle the decision
+ * to powerdown. Sometimes the event might be missed due to reading the
+ * count outside of mutex, but this is necessary to get the trace timing
+ * correct. */
+ old_count = kbdev->pm.active_count;
+ if (old_count == 0)
+ kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+ mutex_lock(&js_devdata->runpool_mutex);
+ mutex_lock(&kbdev->pm.lock);
+
+ c = --kbdev->pm.active_count;
+ KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c);
+ KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c);
+
+ KBASE_DEBUG_ASSERT(c >= 0);
+
+ /* Trace the event being handled */
+ if (old_count == 0)
+ kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE);
+
+ if (c == 0) {
+ /* Last context has gone idle */
+ kbase_hwaccess_pm_gpu_idle(kbdev);
+
+ /* Wake up anyone waiting for this to become 0 (e.g. suspend). The
+ * waiters must synchronize with us by locking the pm.lock after
+ * waiting */
+ wake_up(&kbdev->pm.zero_active_count_wait);
+ }
+
+ mutex_unlock(&kbdev->pm.lock);
+ mutex_unlock(&js_devdata->runpool_mutex);
+}
+
+KBASE_EXPORT_TEST_API(kbase_pm_context_idle);
+
+void kbase_pm_suspend(struct kbase_device *kbdev)
+{
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ mutex_lock(&kbdev->pm.lock);
+ KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev));
+ kbdev->pm.suspending = true;
+ mutex_unlock(&kbdev->pm.lock);
+
+ /* From now on, the active count will drop towards zero. Sometimes, it'll
+ * go up briefly before going down again. However, once it reaches zero it
+ * will stay there - guaranteeing that we've idled all pm references */
+
+ /* Suspend job scheduler and associated components, so that it releases all
+ * the PM active count references */
+ kbasep_js_suspend(kbdev);
+
+ /* Suspend any counter collection that might be happening */
+ kbase_instr_hwcnt_suspend(kbdev);
+
+ /* Wait for the active count to reach zero. This is not the same as
+ * waiting for a power down, since not all policies power down when this
+ * reaches zero. */
+ wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0);
+
+ /* NOTE: We synchronize with anything that was just finishing a
+ * kbase_pm_context_idle() call by locking the pm.lock below */
+
+ kbase_hwaccess_pm_suspend(kbdev);
+}
+
+void kbase_pm_resume(struct kbase_device *kbdev)
+{
+ /* MUST happen before any pm_context_active calls occur */
+ kbase_hwaccess_pm_resume(kbdev);
+
+ /* Initial active call, to power on the GPU/cores if needed */
+ kbase_pm_context_active(kbdev);
+
+ /* Re-enable instrumentation, if it was previously disabled */
+ kbase_instr_hwcnt_resume(kbdev);
+
+ /* Resume any blocked atoms (which may cause contexts to be scheduled in
+ * and dependent atoms to run) */
+ kbase_resume_suspended_soft_jobs(kbdev);
+
+ /* Resume the Job Scheduler and associated components, and start running
+ * atoms */
+ kbasep_js_resume(kbdev);
+
+ /* Matching idle call, to power off the GPU/cores if we didn't actually
+ * need it and the policy doesn't want it on */
+ kbase_pm_context_idle(kbdev);
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h
new file mode 100644
index 00000000000..37fa2479df7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_pm.h
+ * Power management API definitions
+ */
+
+#ifndef _KBASE_PM_H_
+#define _KBASE_PM_H_
+
+#include "mali_kbase_hwaccess_pm.h"
+
+#define PM_ENABLE_IRQS 0x01
+#define PM_HW_ISSUES_DETECT 0x02
+
+
+/** Initialize the power management framework.
+ *
+ * Must be called before any other power management function
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid pointer)
+ *
+ * @return 0 if the power management framework was successfully initialized.
+ */
+int kbase_pm_init(struct kbase_device *kbdev);
+
+/** Power up GPU after all modules have been initialized and interrupt handlers installed.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid pointer)
+ *
+ * @param flags Flags to pass on to kbase_pm_init_hw
+ *
+ * @return 0 if powerup was successful.
+ */
+int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
+
+/**
+ * Halt the power management framework.
+ * Should ensure that no new interrupts are generated,
+ * but allow any currently running interrupt handlers to complete successfully.
+ * The GPU is forced off by the time this function returns, regardless of
+ * whether or not the active power policy asks for the GPU to be powered off.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_halt(struct kbase_device *kbdev);
+
+/** Terminate the power management framework.
+ *
+ * No power management functions may be called after this
+ * (except @ref kbase_pm_init)
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_term(struct kbase_device *kbdev);
+
+/** Increment the count of active contexts.
+ *
+ * This function should be called when a context is about to submit a job. It informs the active power policy that the
+ * GPU is going to be in use shortly and the policy is expected to start turning on the GPU.
+ *
+ * This function will block until the GPU is available.
+ *
+ * This function ASSERTS if a suspend is occuring/has occurred whilst this is
+ * in use. Use kbase_pm_contect_active_unless_suspending() instead.
+ *
+ * @note a Suspend is only visible to Kernel threads; user-space threads in a
+ * syscall cannot witness a suspend, because they are frozen before the suspend
+ * begins.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_active(struct kbase_device *kbdev);
+
+
+/** Handler codes for doing kbase_pm_context_active_handle_suspend() */
+enum kbase_pm_suspend_handler {
+ /** A suspend is not expected/not possible - this is the same as
+ * kbase_pm_context_active() */
+ KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE,
+ /** If we're suspending, fail and don't increase the active count */
+ KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE,
+ /** If we're suspending, succeed and allow the active count to increase iff
+ * it didn't go from 0->1 (i.e., we didn't re-activate the GPU).
+ *
+ * This should only be used when there is a bounded time on the activation
+ * (e.g. guarantee it's going to be idled very soon after) */
+ KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE
+};
+
+/** Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * If a suspend is in progress, this allows for various different ways of
+ * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
+ *
+ * We returns a status code indicating whether we're allowed to keep the GPU
+ * active during the suspend, depending on the handler code. If the status code
+ * indicates a failure, the caller must abort whatever operation it was
+ * attempting, and potentially queue it up for after the OS has resumed.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid pointer)
+ * @param suspend_handler The handler code for how to handle a suspend that might occur
+ * @return zero Indicates success
+ * @return non-zero Indicates failure due to the system being suspending/suspended.
+ */
+int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
+
+/** Decrement the reference count of active contexts.
+ *
+ * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power
+ * policy so the calling code should ensure that it does not access the GPU's registers.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_context_idle(struct kbase_device *kbdev);
+
+/**
+ * Suspend the GPU and prevent any further register accesses to it from Kernel
+ * threads.
+ *
+ * This is called in response to an OS suspend event, and calls into the various
+ * kbase components to complete the suspend.
+ *
+ * @note the mechanisms used here rely on all user-space threads being frozen
+ * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up
+ * the GPU e.g. via atom submission.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_suspend(struct kbase_device *kbdev);
+
+/**
+ * Resume the GPU, allow register accesses to it, and resume running atoms on
+ * the GPU.
+ *
+ * This is called in response to an OS resume event, and calls into the various
+ * kbase components to complete the resume.
+ *
+ * @param kbdev The kbase device structure for the device (must be a valid pointer)
+ */
+void kbase_pm_resume(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_vsync_callback - vsync callback
+ *
+ * @buffer_updated: 1 if a new frame was displayed, 0 otherwise
+ * @data: Pointer to the kbase device as returned by kbase_find_device()
+ *
+ * Callback function used to notify the power management code that a vsync has
+ * occurred on the display.
+ */
+void kbase_pm_vsync_callback(int buffer_updated, void *data);
+
+#endif /* _KBASE_PM_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
new file mode 100644
index 00000000000..7fb674eded3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h
@@ -0,0 +1,40 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_profiling_gator_api.h
+ * Model interface
+ */
+
+#ifndef _KBASE_PROFILING_GATOR_API_H_
+#define _KBASE_PROFILING_GATOR_API_H_
+
+/*
+ * List of possible actions to be controlled by Streamline.
+ * The following numbers are used by gator to control
+ * the frame buffer dumping and s/w counter reporting.
+ */
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define SW_COUNTER_ENABLE (3)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+#define FBDUMP_CONTROL_MAX (5)
+#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE
+
+void _mali_profiling_control(u32 action, u32 value);
+
+#endif /* _KBASE_PROFILING_GATOR_API */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c
new file mode 100644
index 00000000000..b5c7758e1bb
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c
@@ -0,0 +1,1134 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @file mali_kbase_replay.c
+ * Replay soft job handlers
+ */
+
+#include <linux/dma-mapping.h>
+#include <mali_kbase_config.h>
+#include <mali_kbase.h>
+#include <mali_kbase_mem.h>
+#include <mali_kbase_mem_linux.h>
+
+#define JOB_NOT_STARTED 0
+#define JOB_TYPE_MASK 0xfe
+#define JOB_TYPE_NULL (1 << 1)
+#define JOB_TYPE_VERTEX (5 << 1)
+#define JOB_TYPE_TILER (7 << 1)
+#define JOB_TYPE_FUSED (8 << 1)
+#define JOB_TYPE_FRAGMENT (9 << 1)
+
+#define JOB_FLAG_DESC_SIZE (1 << 0)
+#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8)
+
+#define JOB_HEADER_32_FBD_OFFSET (31*4)
+#define JOB_HEADER_64_FBD_OFFSET (44*4)
+
+#define FBD_POINTER_MASK (~0x3f)
+
+#define SFBD_TILER_OFFSET (48*4)
+
+#define MFBD_TILER_OFFSET (14*4)
+
+#define FBD_HIERARCHY_WEIGHTS 8
+#define FBD_HIERARCHY_MASK_MASK 0x1fff
+
+#define FBD_TYPE 1
+
+#define HIERARCHY_WEIGHTS 13
+
+#define JOB_HEADER_ID_MAX 0xffff
+
+#define JOB_SOURCE_ID(status) (((status) >> 16) & 0xFFFF)
+#define JOB_POLYGON_LIST (0x03)
+
+struct job_head {
+ u32 status;
+ u32 not_complete_index;
+ u64 fault_addr;
+ u16 flags;
+ u16 index;
+ u16 dependencies[2];
+ union {
+ u64 _64;
+ u32 _32;
+ } next;
+ u32 x[2];
+ union {
+ u64 _64;
+ u32 _32;
+ } fragment_fbd;
+};
+
+static void dump_job_head(struct kbase_context *kctx, char *head_str,
+ struct job_head *job)
+{
+#ifdef CONFIG_MALI_DEBUG
+ dev_dbg(kctx->kbdev->dev, "%s\n", head_str);
+ dev_dbg(kctx->kbdev->dev, "addr = %p\n"
+ "status = %x\n"
+ "not_complete_index = %x\n"
+ "fault_addr = %llx\n"
+ "flags = %x\n"
+ "index = %x\n"
+ "dependencies = %x,%x\n",
+ job, job->status, job->not_complete_index,
+ job->fault_addr, job->flags, job->index,
+ job->dependencies[0],
+ job->dependencies[1]);
+
+ if (job->flags & JOB_FLAG_DESC_SIZE)
+ dev_dbg(kctx->kbdev->dev, "next = %llx\n",
+ job->next._64);
+ else
+ dev_dbg(kctx->kbdev->dev, "next = %x\n",
+ job->next._32);
+#endif
+}
+
+static int kbasep_replay_reset_sfbd(struct kbase_context *kctx,
+ u64 fbd_address, u64 tiler_heap_free,
+ u16 hierarchy_mask, u32 default_weight)
+{
+ struct {
+ u32 padding_1[1];
+ u32 flags;
+ u64 padding_2[2];
+ u64 heap_free_address;
+ u32 padding[8];
+ u32 weights[FBD_HIERARCHY_WEIGHTS];
+ } *fbd_tiler;
+ struct kbase_vmap_struct map;
+
+ dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+ fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET,
+ sizeof(*fbd_tiler), &map);
+ if (!fbd_tiler) {
+ dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n");
+ return -EINVAL;
+ }
+
+#ifdef CONFIG_MALI_DEBUG
+ dev_dbg(kctx->kbdev->dev,
+ "FBD tiler:\n"
+ "flags = %x\n"
+ "heap_free_address = %llx\n",
+ fbd_tiler->flags, fbd_tiler->heap_free_address);
+#endif
+ if (hierarchy_mask) {
+ u32 weights[HIERARCHY_WEIGHTS];
+ u16 old_hierarchy_mask = fbd_tiler->flags &
+ FBD_HIERARCHY_MASK_MASK;
+ int i, j = 0;
+
+ for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+ if (old_hierarchy_mask & (1 << i)) {
+ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+ weights[i] = fbd_tiler->weights[j++];
+ } else {
+ weights[i] = default_weight;
+ }
+ }
+
+
+ dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x New hierarchy mask=%x\n",
+ old_hierarchy_mask, hierarchy_mask);
+
+ for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+ dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+ i, weights[i]);
+
+ j = 0;
+
+ for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+ if (hierarchy_mask & (1 << i)) {
+ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+ dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n",
+ i, weights[i], j);
+
+ fbd_tiler->weights[j++] = weights[i];
+ }
+ }
+
+ for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+ fbd_tiler->weights[j] = 0;
+
+ fbd_tiler->flags = hierarchy_mask | (1 << 16);
+ }
+
+ fbd_tiler->heap_free_address = tiler_heap_free;
+
+ dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n",
+ fbd_tiler->heap_free_address, fbd_tiler->flags);
+
+ kbase_vunmap(kctx, &map);
+
+ return 0;
+}
+
+static int kbasep_replay_reset_mfbd(struct kbase_context *kctx,
+ u64 fbd_address, u64 tiler_heap_free,
+ u16 hierarchy_mask, u32 default_weight)
+{
+ struct kbase_vmap_struct map;
+ struct {
+ u32 padding_0;
+ u32 flags;
+ u64 padding_1[2];
+ u64 heap_free_address;
+ u64 padding_2;
+ u32 weights[FBD_HIERARCHY_WEIGHTS];
+ } *fbd_tiler;
+
+ dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address);
+
+ fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET,
+ sizeof(*fbd_tiler), &map);
+ if (!fbd_tiler) {
+ dev_err(kctx->kbdev->dev,
+ "kbasep_replay_reset_fbd: failed to map fbd\n");
+ return -EINVAL;
+ }
+
+#ifdef CONFIG_MALI_DEBUG
+ dev_dbg(kctx->kbdev->dev, "FBD tiler:\n"
+ "flags = %x\n"
+ "heap_free_address = %llx\n",
+ fbd_tiler->flags,
+ fbd_tiler->heap_free_address);
+#endif
+ if (hierarchy_mask) {
+ u32 weights[HIERARCHY_WEIGHTS];
+ u16 old_hierarchy_mask = (fbd_tiler->flags) &
+ FBD_HIERARCHY_MASK_MASK;
+ int i, j = 0;
+
+ for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+ if (old_hierarchy_mask & (1 << i)) {
+ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+ weights[i] = fbd_tiler->weights[j++];
+ } else {
+ weights[i] = default_weight;
+ }
+ }
+
+
+ dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x New hierarchy mask=%x\n",
+ old_hierarchy_mask, hierarchy_mask);
+
+ for (i = 0; i < HIERARCHY_WEIGHTS; i++)
+ dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n",
+ i, weights[i]);
+
+ j = 0;
+
+ for (i = 0; i < HIERARCHY_WEIGHTS; i++) {
+ if (hierarchy_mask & (1 << i)) {
+ KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS);
+
+ dev_dbg(kctx->kbdev->dev,
+ " Writing hierarchy level %02d (%08x) to %d\n",
+ i, weights[i], j);
+
+ fbd_tiler->weights[j++] = weights[i];
+ }
+ }
+
+ for (; j < FBD_HIERARCHY_WEIGHTS; j++)
+ fbd_tiler->weights[j] = 0;
+
+ fbd_tiler->flags = hierarchy_mask | (1 << 16);
+ }
+
+ fbd_tiler->heap_free_address = tiler_heap_free;
+
+ kbase_vunmap(kctx, &map);
+
+ return 0;
+}
+
+/**
+ * @brief Reset the status of an FBD pointed to by a tiler job
+ *
+ * This performs two functions :
+ * - Set the hierarchy mask
+ * - Reset the tiler free heap address
+ *
+ * @param[in] kctx Context pointer
+ * @param[in] job_header Address of job header to reset.
+ * @param[in] tiler_heap_free The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask The hierarchy mask to use
+ * @param[in] default_weight Default hierarchy weight to write when no other
+ * weight is given in the FBD
+ * @param[in] job_64 true if this job is using 64-bit
+ * descriptors
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx,
+ u64 job_header, u64 tiler_heap_free,
+ u16 hierarchy_mask, u32 default_weight, bool job_64)
+{
+ struct kbase_vmap_struct map;
+ u64 fbd_address;
+
+ if (job_64) {
+ u64 *job_ext;
+
+ job_ext = kbase_vmap(kctx,
+ job_header + JOB_HEADER_64_FBD_OFFSET,
+ sizeof(*job_ext), &map);
+
+ if (!job_ext) {
+ dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+ return -EINVAL;
+ }
+
+ fbd_address = *job_ext;
+
+ kbase_vunmap(kctx, &map);
+ } else {
+ u32 *job_ext;
+
+ job_ext = kbase_vmap(kctx,
+ job_header + JOB_HEADER_32_FBD_OFFSET,
+ sizeof(*job_ext), &map);
+
+ if (!job_ext) {
+ dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n");
+ return -EINVAL;
+ }
+
+ fbd_address = *job_ext;
+
+ kbase_vunmap(kctx, &map);
+ }
+
+ if (fbd_address & FBD_TYPE) {
+ return kbasep_replay_reset_mfbd(kctx,
+ fbd_address & FBD_POINTER_MASK,
+ tiler_heap_free,
+ hierarchy_mask,
+ default_weight);
+ } else {
+ return kbasep_replay_reset_sfbd(kctx,
+ fbd_address & FBD_POINTER_MASK,
+ tiler_heap_free,
+ hierarchy_mask,
+ default_weight);
+ }
+}
+
+/**
+ * @brief Reset the status of a job
+ *
+ * This performs the following functions :
+ *
+ * - Reset the Job Status field of each job to NOT_STARTED.
+ * - Set the Job Type field of any Vertex Jobs to Null Job.
+ * - For any jobs using an FBD, set the Tiler Heap Free field to the value of
+ * the tiler_heap_free parameter, and set the hierarchy level mask to the
+ * hier_mask parameter.
+ * - Offset HW dependencies by the hw_job_id_offset parameter
+ * - Set the Perform Job Barrier flag if this job is the first in the chain
+ * - Read the address of the next job header
+ *
+ * @param[in] kctx Context pointer
+ * @param[in,out] job_header Address of job header to reset. Set to address
+ * of next job header on exit.
+ * @param[in] prev_jc Previous job chain to link to, if this job is
+ * the last in the chain.
+ * @param[in] hw_job_id_offset Offset for HW job IDs
+ * @param[in] tiler_heap_free The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask The hierarchy mask to use
+ * @param[in] default_weight Default hierarchy weight to write when no other
+ * weight is given in the FBD
+ * @param[in] first_in_chain true if this job is the first in the chain
+ * @param[in] fragment_chain true if this job is in the fragment chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_reset_job(struct kbase_context *kctx,
+ u64 *job_header, u64 prev_jc,
+ u64 tiler_heap_free, u16 hierarchy_mask,
+ u32 default_weight, u16 hw_job_id_offset,
+ bool first_in_chain, bool fragment_chain)
+{
+ struct job_head *job;
+ u64 new_job_header;
+ struct kbase_vmap_struct map;
+
+ job = kbase_vmap(kctx, *job_header, sizeof(*job), &map);
+ if (!job) {
+ dev_err(kctx->kbdev->dev,
+ "kbasep_replay_parse_jc: failed to map jc\n");
+ return -EINVAL;
+ }
+
+ dump_job_head(kctx, "Job header:", job);
+
+ if (job->status == JOB_NOT_STARTED && !fragment_chain) {
+ dev_err(kctx->kbdev->dev, "Job already not started\n");
+ goto out_unmap;
+ }
+ job->status = JOB_NOT_STARTED;
+
+ if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX)
+ job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL;
+
+ if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) {
+ dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n");
+ goto out_unmap;
+ }
+
+ if (first_in_chain)
+ job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER;
+
+ if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX ||
+ (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX ||
+ (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) {
+ dev_err(kctx->kbdev->dev,
+ "Job indicies/dependencies out of valid range\n");
+ goto out_unmap;
+ }
+
+ if (job->dependencies[0])
+ job->dependencies[0] += hw_job_id_offset;
+ if (job->dependencies[1])
+ job->dependencies[1] += hw_job_id_offset;
+
+ job->index += hw_job_id_offset;
+
+ if (job->flags & JOB_FLAG_DESC_SIZE) {
+ new_job_header = job->next._64;
+ if (!job->next._64)
+ job->next._64 = prev_jc;
+ } else {
+ new_job_header = job->next._32;
+ if (!job->next._32)
+ job->next._32 = prev_jc;
+ }
+ dump_job_head(kctx, "Updated to:", job);
+
+ if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) {
+ bool job_64 = (job->flags & JOB_FLAG_DESC_SIZE) != 0;
+
+ if (kbasep_replay_reset_tiler_job(kctx, *job_header,
+ tiler_heap_free, hierarchy_mask,
+ default_weight, job_64) != 0)
+ goto out_unmap;
+
+ } else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) {
+ u64 fbd_address;
+
+ if (job->flags & JOB_FLAG_DESC_SIZE)
+ fbd_address = job->fragment_fbd._64;
+ else
+ fbd_address = (u64)job->fragment_fbd._32;
+
+ if (fbd_address & FBD_TYPE) {
+ if (kbasep_replay_reset_mfbd(kctx,
+ fbd_address & FBD_POINTER_MASK,
+ tiler_heap_free,
+ hierarchy_mask,
+ default_weight) != 0)
+ goto out_unmap;
+ } else {
+ if (kbasep_replay_reset_sfbd(kctx,
+ fbd_address & FBD_POINTER_MASK,
+ tiler_heap_free,
+ hierarchy_mask,
+ default_weight) != 0)
+ goto out_unmap;
+ }
+ }
+
+ kbase_vunmap(kctx, &map);
+
+ *job_header = new_job_header;
+
+ return 0;
+
+out_unmap:
+ kbase_vunmap(kctx, &map);
+ return -EINVAL;
+}
+
+/**
+ * @brief Find the highest job ID in a job chain
+ *
+ * @param[in] kctx Context pointer
+ * @param[in] jc Job chain start address
+ * @param[out] hw_job_id Highest job ID in chain
+ *
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx,
+ u64 jc, u16 *hw_job_id)
+{
+ while (jc) {
+ struct job_head *job;
+ struct kbase_vmap_struct map;
+
+ dev_dbg(kctx->kbdev->dev,
+ "kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc);
+
+ job = kbase_vmap(kctx, jc, sizeof(*job), &map);
+ if (!job) {
+ dev_err(kctx->kbdev->dev, "failed to map jc\n");
+
+ return -EINVAL;
+ }
+
+ if (job->index > *hw_job_id)
+ *hw_job_id = job->index;
+
+ if (job->flags & JOB_FLAG_DESC_SIZE)
+ jc = job->next._64;
+ else
+ jc = job->next._32;
+
+ kbase_vunmap(kctx, &map);
+ }
+
+ return 0;
+}
+
+/**
+ * @brief Reset the status of a number of jobs
+ *
+ * This function walks the provided job chain, and calls
+ * kbasep_replay_reset_job for each job. It also links the job chain to the
+ * provided previous job chain.
+ *
+ * The function will fail if any of the jobs passed already have status of
+ * NOT_STARTED.
+ *
+ * @param[in] kctx Context pointer
+ * @param[in] jc Job chain to be processed
+ * @param[in] prev_jc Job chain to be added to. May be NULL
+ * @param[in] tiler_heap_free The value to reset Tiler Heap Free to
+ * @param[in] hierarchy_mask The hierarchy mask to use
+ * @param[in] default_weight Default hierarchy weight to write when no other
+ * weight is given in the FBD
+ * @param[in] hw_job_id_offset Offset for HW job IDs
+ * @param[in] fragment_chain true if this chain is the fragment chain
+ *
+ * @return 0 on success, error code otherwise
+ */
+static int kbasep_replay_parse_jc(struct kbase_context *kctx,
+ u64 jc, u64 prev_jc,
+ u64 tiler_heap_free, u16 hierarchy_mask,
+ u32 default_weight, u16 hw_job_id_offset,
+ bool fragment_chain)
+{
+ bool first_in_chain = true;
+ int nr_jobs = 0;
+
+ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n",
+ jc, hw_job_id_offset);
+
+ while (jc) {
+ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc);
+
+ if (kbasep_replay_reset_job(kctx, &jc, prev_jc,
+ tiler_heap_free, hierarchy_mask,
+ default_weight, hw_job_id_offset,
+ first_in_chain, fragment_chain) != 0)
+ return -EINVAL;
+
+ first_in_chain = false;
+
+ nr_jobs++;
+ if (fragment_chain &&
+ nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) {
+ dev_err(kctx->kbdev->dev,
+ "Exceeded maximum number of jobs in fragment chain\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * @brief Reset the status of a replay job, and set up dependencies
+ *
+ * This performs the actions to allow the replay job to be re-run following
+ * completion of the passed dependency.
+ *
+ * @param[in] katom The atom to be reset
+ * @param[in] dep_atom The dependency to be attached to the atom
+ */
+static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom,
+ struct kbase_jd_atom *dep_atom)
+{
+ katom->status = KBASE_JD_ATOM_STATE_QUEUED;
+ kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA);
+ list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]);
+}
+
+/**
+ * @brief Allocate an unused katom
+ *
+ * This will search the provided context for an unused katom, and will mark it
+ * as KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * If no atoms are available then the function will fail.
+ *
+ * @param[in] kctx Context pointer
+ * @return An atom ID, or -1 on failure
+ */
+static int kbasep_allocate_katom(struct kbase_context *kctx)
+{
+ struct kbase_jd_context *jctx = &kctx->jctx;
+ int i;
+
+ for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) {
+ if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) {
+ jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED;
+ dev_dbg(kctx->kbdev->dev,
+ "kbasep_allocate_katom: Allocated atom %d\n",
+ i);
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+/**
+ * @brief Release a katom
+ *
+ * This will mark the provided atom as available, and remove any dependencies.
+ *
+ * For use on error path.
+ *
+ * @param[in] kctx Context pointer
+ * @param[in] atom_id ID of atom to release
+ */
+static void kbasep_release_katom(struct kbase_context *kctx, int atom_id)
+{
+ struct kbase_jd_context *jctx = &kctx->jctx;
+
+ dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n",
+ atom_id);
+
+ while (!list_empty(&jctx->atoms[atom_id].dep_head[0]))
+ list_del(jctx->atoms[atom_id].dep_head[0].next);
+
+ while (!list_empty(&jctx->atoms[atom_id].dep_head[1]))
+ list_del(jctx->atoms[atom_id].dep_head[1].next);
+
+ jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED;
+}
+
+static void kbasep_replay_create_atom(struct kbase_context *kctx,
+ struct base_jd_atom_v2 *atom,
+ int atom_nr,
+ base_jd_prio prio)
+{
+ atom->nr_extres = 0;
+ atom->extres_list.value = NULL;
+ atom->device_nr = 0;
+ atom->prio = prio;
+ atom->atom_number = atom_nr;
+
+ base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID);
+ base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID);
+
+ atom->udata.blob[0] = 0;
+ atom->udata.blob[1] = 0;
+}
+
+/**
+ * @brief Create two atoms for the purpose of replaying jobs
+ *
+ * Two atoms are allocated and created. The jc pointer is not set at this
+ * stage. The second atom has a dependency on the first. The remaining fields
+ * are set up as follows :
+ *
+ * - No external resources. Any required external resources will be held by the
+ * replay atom.
+ * - device_nr is set to 0. This is not relevant as
+ * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set.
+ * - Priority is inherited from the replay job.
+ *
+ * @param[out] t_atom Atom to use for tiler jobs
+ * @param[out] f_atom Atom to use for fragment jobs
+ * @param[in] prio Priority of new atom (inherited from replay soft
+ * job)
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_create_atoms(struct kbase_context *kctx,
+ struct base_jd_atom_v2 *t_atom,
+ struct base_jd_atom_v2 *f_atom,
+ base_jd_prio prio)
+{
+ int t_atom_nr, f_atom_nr;
+
+ t_atom_nr = kbasep_allocate_katom(kctx);
+ if (t_atom_nr < 0) {
+ dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+ return -EINVAL;
+ }
+
+ f_atom_nr = kbasep_allocate_katom(kctx);
+ if (f_atom_nr < 0) {
+ dev_err(kctx->kbdev->dev, "Failed to allocate katom\n");
+ kbasep_release_katom(kctx, t_atom_nr);
+ return -EINVAL;
+ }
+
+ kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio);
+ kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio);
+
+ base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA);
+
+ return 0;
+}
+
+#ifdef CONFIG_MALI_DEBUG
+static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload)
+{
+ u64 next;
+
+ dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n");
+ next = payload->tiler_jc_list;
+
+ while (next) {
+ struct kbase_vmap_struct map;
+ base_jd_replay_jc *jc_struct;
+
+ jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map);
+
+ if (!jc_struct)
+ return;
+
+ dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n",
+ jc_struct, jc_struct->jc, jc_struct->next);
+
+ next = jc_struct->next;
+
+ kbase_vunmap(kctx, &map);
+ }
+}
+#endif
+
+/**
+ * @brief Parse a base_jd_replay_payload provided by userspace
+ *
+ * This will read the payload from userspace, and parse the job chains.
+ *
+ * @param[in] kctx Context pointer
+ * @param[in] replay_atom Replay soft job atom
+ * @param[in] t_atom Atom to use for tiler jobs
+ * @param[in] f_atom Atom to use for fragment jobs
+ * @return 0 on success, error code on failure
+ */
+static int kbasep_replay_parse_payload(struct kbase_context *kctx,
+ struct kbase_jd_atom *replay_atom,
+ struct base_jd_atom_v2 *t_atom,
+ struct base_jd_atom_v2 *f_atom)
+{
+ base_jd_replay_payload *payload;
+ u64 next;
+ u64 prev_jc = 0;
+ u16 hw_job_id_offset = 0;
+ int ret = -EINVAL;
+ struct kbase_vmap_struct map;
+
+ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n",
+ replay_atom->jc, sizeof(payload));
+
+ payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map);
+
+ if (!payload) {
+ dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n");
+ return -EINVAL;
+ }
+
+#ifdef CONFIG_MALI_DEBUG
+ dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload);
+ dev_dbg(kctx->kbdev->dev, "Payload structure:\n"
+ "tiler_jc_list = %llx\n"
+ "fragment_jc = %llx\n"
+ "tiler_heap_free = %llx\n"
+ "fragment_hierarchy_mask = %x\n"
+ "tiler_hierarchy_mask = %x\n"
+ "hierarchy_default_weight = %x\n"
+ "tiler_core_req = %x\n"
+ "fragment_core_req = %x\n",
+ payload->tiler_jc_list,
+ payload->fragment_jc,
+ payload->tiler_heap_free,
+ payload->fragment_hierarchy_mask,
+ payload->tiler_hierarchy_mask,
+ payload->hierarchy_default_weight,
+ payload->tiler_core_req,
+ payload->fragment_core_req);
+ payload_dump(kctx, payload);
+#endif
+
+ t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER;
+ f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER;
+
+ /* Sanity check core requirements*/
+ if ((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
+ ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T ||
+ (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE &
+ ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_FS ||
+ t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES ||
+ f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
+ dev_err(kctx->kbdev->dev, "Invalid core requirements\n");
+ goto out;
+ }
+
+ /* Process tiler job chains */
+ next = payload->tiler_jc_list;
+ if (!next) {
+ dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n");
+ goto out;
+ }
+
+ while (next) {
+ base_jd_replay_jc *jc_struct;
+ struct kbase_vmap_struct jc_map;
+ u64 jc;
+
+ jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map);
+
+ if (!jc_struct) {
+ dev_err(kctx->kbdev->dev, "Failed to map jc struct\n");
+ goto out;
+ }
+
+ jc = jc_struct->jc;
+ next = jc_struct->next;
+ if (next)
+ jc_struct->jc = 0;
+
+ kbase_vunmap(kctx, &jc_map);
+
+ if (jc) {
+ u16 max_hw_job_id = 0;
+
+ if (kbasep_replay_find_hw_job_id(kctx, jc,
+ &max_hw_job_id) != 0)
+ goto out;
+
+ if (kbasep_replay_parse_jc(kctx, jc, prev_jc,
+ payload->tiler_heap_free,
+ payload->tiler_hierarchy_mask,
+ payload->hierarchy_default_weight,
+ hw_job_id_offset, false) != 0) {
+ goto out;
+ }
+
+ hw_job_id_offset += max_hw_job_id;
+
+ prev_jc = jc;
+ }
+ }
+ t_atom->jc = prev_jc;
+
+ /* Process fragment job chain */
+ f_atom->jc = payload->fragment_jc;
+ if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0,
+ payload->tiler_heap_free,
+ payload->fragment_hierarchy_mask,
+ payload->hierarchy_default_weight, 0,
+ true) != 0) {
+ goto out;
+ }
+
+ if (!t_atom->jc || !f_atom->jc) {
+ dev_err(kctx->kbdev->dev, "Invalid payload\n");
+ goto out;
+ }
+
+ dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n",
+ t_atom->jc, f_atom->jc);
+ ret = 0;
+
+out:
+ kbase_vunmap(kctx, &map);
+
+ return ret;
+}
+
+static void kbase_replay_process_worker(struct work_struct *data)
+{
+ struct kbase_jd_atom *katom;
+ struct kbase_context *kctx;
+ struct kbase_jd_context *jctx;
+ bool need_to_try_schedule_context = false;
+
+ struct base_jd_atom_v2 t_atom, f_atom;
+ struct kbase_jd_atom *t_katom, *f_katom;
+ base_jd_prio atom_prio;
+
+ katom = container_of(data, struct kbase_jd_atom, work);
+ kctx = katom->kctx;
+ jctx = &kctx->jctx;
+
+ mutex_lock(&jctx->lock);
+
+ atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority);
+
+ if (kbasep_replay_create_atoms(
+ kctx, &t_atom, &f_atom, atom_prio) != 0) {
+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+ goto out;
+ }
+
+ t_katom = &jctx->atoms[t_atom.atom_number];
+ f_katom = &jctx->atoms[f_atom.atom_number];
+
+ if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) {
+ kbasep_release_katom(kctx, t_atom.atom_number);
+ kbasep_release_katom(kctx, f_atom.atom_number);
+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+ goto out;
+ }
+
+ kbasep_replay_reset_softjob(katom, f_katom);
+
+ need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom);
+ if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+ dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+ kbasep_release_katom(kctx, f_atom.atom_number);
+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+ goto out;
+ }
+ need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom);
+ if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) {
+ dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n");
+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+ goto out;
+ }
+
+ katom->event_code = BASE_JD_EVENT_DONE;
+
+out:
+ if (katom->event_code != BASE_JD_EVENT_DONE) {
+ kbase_disjoint_state_down(kctx->kbdev);
+
+ need_to_try_schedule_context |= jd_done_nolock(katom);
+ }
+
+ if (need_to_try_schedule_context)
+ kbase_js_sched_all(kctx->kbdev);
+
+ mutex_unlock(&jctx->lock);
+}
+
+/**
+ * @brief Check job replay fault
+ *
+ * This will read the job payload, checks fault type and source, then decides
+ * whether replay is required.
+ *
+ * @param[in] katom The atom to be processed
+ * @return true (success) if replay required or false on failure.
+ */
+static bool kbase_replay_fault_check(struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx = katom->kctx;
+ struct device *dev = kctx->kbdev->dev;
+ base_jd_replay_payload *payload;
+ u64 job_header;
+ u64 job_loop_detect;
+ struct job_head *job;
+ struct kbase_vmap_struct job_map;
+ struct kbase_vmap_struct map;
+ bool err = false;
+
+ /* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or
+ * if force_replay is enabled.
+ */
+ if (BASE_JD_EVENT_TERMINATED == katom->event_code) {
+ return false;
+ } else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) {
+ return true;
+ } else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) {
+ katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT;
+ return true;
+ } else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) {
+ /* No replay for faults of type other than
+ * BASE_JD_EVENT_DATA_INVALID_FAULT.
+ */
+ return false;
+ }
+
+ /* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc
+ * to find out whether the source of exception is POLYGON_LIST. Replay
+ * is required if the source of fault is POLYGON_LIST.
+ */
+ payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map);
+ if (!payload) {
+ dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n");
+ return false;
+ }
+
+#ifdef CONFIG_MALI_DEBUG
+ dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload);
+ dev_dbg(dev, "\nPayload structure:\n"
+ "fragment_jc = 0x%llx\n"
+ "fragment_hierarchy_mask = 0x%x\n"
+ "fragment_core_req = 0x%x\n",
+ payload->fragment_jc,
+ payload->fragment_hierarchy_mask,
+ payload->fragment_core_req);
+#endif
+ /* Process fragment job chain */
+ job_header = (u64) payload->fragment_jc;
+ job_loop_detect = job_header;
+ while (job_header) {
+ job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map);
+ if (!job) {
+ dev_err(dev, "failed to map jc\n");
+ /* unmap payload*/
+ kbase_vunmap(kctx, &map);
+ return false;
+ }
+
+
+#ifdef CONFIG_MALI_DEBUG
+ dev_dbg(dev, "\njob_head structure:\n"
+ "Source ID:0x%x Access:0x%x Exception:0x%x\n"
+ "at job addr = %p\n"
+ "not_complete_index = 0x%x\n"
+ "fault_addr = 0x%llx\n"
+ "flags = 0x%x\n"
+ "index = 0x%x\n"
+ "dependencies = 0x%x,0x%x\n",
+ JOB_SOURCE_ID(job->status),
+ ((job->status >> 8) & 0x3),
+ (job->status & 0xFF),
+ job,
+ job->not_complete_index,
+ job->fault_addr,
+ job->flags,
+ job->index,
+ job->dependencies[0],
+ job->dependencies[1]);
+#endif
+
+ /* Replay only when the polygon list reader caused the
+ * DATA_INVALID_FAULT */
+ if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) &&
+ (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->status))) {
+ err = true;
+ kbase_vunmap(kctx, &job_map);
+ break;
+ }
+
+ /* Move on to next fragment job in the list */
+ if (job->flags & JOB_FLAG_DESC_SIZE)
+ job_header = job->next._64;
+ else
+ job_header = job->next._32;
+
+ kbase_vunmap(kctx, &job_map);
+
+ /* Job chain loop detected */
+ if (job_header == job_loop_detect)
+ break;
+ }
+
+ /* unmap payload*/
+ kbase_vunmap(kctx, &map);
+
+ return err;
+}
+
+
+/**
+ * @brief Process a replay job
+ *
+ * Called from kbase_process_soft_job.
+ *
+ * On exit, if the job has completed, katom->event_code will have been updated.
+ * If the job has not completed, and is replaying jobs, then the atom status
+ * will have been reset to KBASE_JD_ATOM_STATE_QUEUED.
+ *
+ * @param[in] katom The atom to be processed
+ * @return false if the atom has completed
+ * true if the atom is replaying jobs
+ */
+bool kbase_replay_process(struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx = katom->kctx;
+ struct kbase_jd_context *jctx = &kctx->jctx;
+
+ if (katom->event_code == BASE_JD_EVENT_DONE) {
+ dev_dbg(kctx->kbdev->dev, "Previous job succeeded - not replaying\n");
+
+ if (katom->retry_count)
+ kbase_disjoint_state_down(kctx->kbdev);
+
+ return false;
+ }
+
+ if (jctx->sched_info.ctx.is_dying) {
+ dev_dbg(kctx->kbdev->dev, "Not replaying; context is dying\n");
+
+ if (katom->retry_count)
+ kbase_disjoint_state_down(kctx->kbdev);
+
+ return false;
+ }
+
+ /* Check job exception type and source before replaying. */
+ if (!kbase_replay_fault_check(katom)) {
+ dev_dbg(kctx->kbdev->dev,
+ "Replay cancelled on event %x\n", katom->event_code);
+ /* katom->event_code is already set to the failure code of the
+ * previous job.
+ */
+ return false;
+ }
+
+ dev_warn(kctx->kbdev->dev, "Replaying jobs retry=%d\n",
+ katom->retry_count);
+
+ katom->retry_count++;
+
+ if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) {
+ dev_err(kctx->kbdev->dev, "Replay exceeded limit - failing jobs\n");
+
+ kbase_disjoint_state_down(kctx->kbdev);
+
+ /* katom->event_code is already set to the failure code of the
+ previous job */
+ return false;
+ }
+
+ /* only enter the disjoint state once for the whole time while the replay is ongoing */
+ if (katom->retry_count == 1)
+ kbase_disjoint_state_up(kctx->kbdev);
+
+ INIT_WORK(&katom->work, kbase_replay_process_worker);
+ queue_work(kctx->event_workq, &katom->work);
+
+ return true;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.c b/drivers/gpu/arm/midgard/mali_kbase_security.c
new file mode 100644
index 00000000000..a0bb3529baf
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_security.c
@@ -0,0 +1,76 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_security.c
+ * Base kernel security capability API
+ */
+
+#include <mali_kbase.h>
+
+static inline bool kbasep_am_i_root(void)
+{
+#if KBASE_HWCNT_DUMP_BYPASS_ROOT
+ return true;
+#else
+ /* Check if root */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)
+ if (uid_eq(current_euid(), GLOBAL_ROOT_UID))
+ return true;
+#else
+ if (current_euid() == 0)
+ return true;
+#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)*/
+ return false;
+#endif /*KBASE_HWCNT_DUMP_BYPASS_ROOT*/
+}
+
+/**
+ * kbase_security_has_capability - see mali_kbase_caps.h for description.
+ */
+
+bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags)
+{
+ /* Assume failure */
+ bool access_allowed = false;
+ bool audit = KBASE_SEC_FLAG_AUDIT & flags;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+ CSTD_UNUSED(kctx);
+
+ /* Detect unsupported flags */
+ KBASE_DEBUG_ASSERT(((~KBASE_SEC_FLAG_MASK) & flags) == 0);
+
+ /* Determine if access is allowed for the given cap */
+ switch (cap) {
+ case KBASE_SEC_MODIFY_PRIORITY:
+ case KBASE_SEC_INSTR_HW_COUNTERS_COLLECT:
+ /* Access is granted only if the caller is privileged */
+ access_allowed = kbasep_am_i_root();
+ break;
+ }
+
+ /* Report problem if requested */
+ if (!access_allowed && audit)
+ dev_warn(kctx->kbdev->dev, "Security capability failure: %d, %p", cap, (void *)kctx);
+
+ return access_allowed;
+}
+
+KBASE_EXPORT_TEST_API(kbase_security_has_capability);
diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.h b/drivers/gpu/arm/midgard/mali_kbase_security.h
new file mode 100644
index 00000000000..024a7ee1aab
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_security.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_security.h
+ * Base kernel security capability APIs
+ */
+
+#ifndef _KBASE_SECURITY_H_
+#define _KBASE_SECURITY_H_
+
+/* Security flags */
+#define KBASE_SEC_FLAG_NOAUDIT (0u << 0) /* Silently handle privilege failure */
+#define KBASE_SEC_FLAG_AUDIT (1u << 0) /* Write audit message on privilege failure */
+#define KBASE_SEC_FLAG_MASK (KBASE_SEC_FLAG_AUDIT) /* Mask of all valid flag bits */
+
+/* List of unique capabilities that have security access privileges */
+enum kbase_security_capability {
+ /* Instrumentation Counters access privilege */
+ KBASE_SEC_INSTR_HW_COUNTERS_COLLECT = 1,
+ KBASE_SEC_MODIFY_PRIORITY
+ /* Add additional access privileges here */
+};
+
+/**
+ * kbase_security_has_capability - determine whether a task has a particular effective capability
+ * @param[in] kctx The task context.
+ * @param[in] cap The capability to check for.
+ * @param[in] flags Additional configuration information
+ * Such as whether to write an audit message or not.
+ * @return true if success (capability is allowed), false otherwise.
+ */
+
+bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags);
+
+#endif /* _KBASE_SECURITY_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.c b/drivers/gpu/arm/midgard/mali_kbase_smc.c
new file mode 100644
index 00000000000..aa2da3ab7e9
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_smc.c
@@ -0,0 +1,77 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+#include <mali_kbase_smc.h>
+
+
+static noinline u32 invoke_smc_fid(u32 function_id, u64 arg0, u64 arg1,
+ u64 arg2, u64 *res0, u64 *res1, u64 *res2)
+{
+ /* 3 args and 3 returns are chosen arbitrarily,
+ see SMC calling convention for limits */
+ asm volatile(
+ "mov x0, %[fid]\n"
+ "mov x1, %[a0]\n"
+ "mov x2, %[a1]\n"
+ "mov x3, %[a2]\n"
+ "smc #0\n"
+ "str x0, [%[re0]]\n"
+ "str x1, [%[re1]]\n"
+ "str x2, [%[re2]]\n"
+ : [fid] "+r" (function_id), [a0] "+r" (arg0),
+ [a1] "+r" (arg1), [a2] "+r" (arg2)
+ : [re0] "r" (res0), [re1] "r" (res1), [re2] "r" (res2)
+ : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+ "x8", "x9", "x10", "x11", "x12", "x13",
+ "x14", "x15", "x16", "x17");
+ return function_id;
+}
+
+void kbase_invoke_smc_fid(u32 fid)
+{
+ u64 res0, res1, res2;
+
+ /* Is fast call (bit 31 set) */
+ KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL);
+ /* bits 16-23 must be zero for fast calls */
+ KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0);
+
+ invoke_smc_fid(fid, 0, 0, 0, &res0, &res1, &res2);
+}
+
+void kbase_invoke_smc(u32 oen, u16 function_number, u64 arg0, u64 arg1,
+ u64 arg2, u64 *res0, u64 *res1, u64 *res2)
+{
+ u32 fid = 0;
+
+ /* Only the six bits allowed should be used. */
+ KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0);
+
+ fid |= SMC_FAST_CALL; /* Bit 31: Fast call */
+ /* Bit 30: 1=SMC64, 0=SMC32 */
+ fid |= oen; /* Bit 29:24: OEN */
+ /* Bit 23:16: Must be zero for fast calls */
+ fid |= (function_number); /* Bit 15:0: function number */
+
+ invoke_smc_fid(fid, arg0, arg1, arg2, res0, res1, res2);
+}
+
+#endif /* CONFIG_ARM64 */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.h b/drivers/gpu/arm/midgard/mali_kbase_smc.h
new file mode 100644
index 00000000000..2d0f5086f75
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_smc.h
@@ -0,0 +1,58 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_SMC_H_
+#define _KBASE_SMC_H_
+
+#ifdef CONFIG_ARM64
+
+#include <mali_kbase.h>
+
+#define SMC_FAST_CALL (1 << 31)
+
+#define SMC_OEN_OFFSET 24
+#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */
+#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET)
+#define SMC_OEN_STD (4 << SMC_OEN_OFFSET)
+
+
+/**
+ * kbase_invoke_smc_fid - Does a secure monitor call with the given function_id
+ * @function_id: The SMC function to call, see SMC Calling convention.
+ */
+void kbase_invoke_smc_fid(u32 function_id);
+
+/**
+ * kbase_invoke_smc_fid - Does a secure monitor call with the given parameters.
+ * see SMC Calling Convention for details
+ * @oen: Owning Entity number (SIP, STD etc).
+ * @function_number: ID specifiy which function within the OEN.
+ * @arg0: argument 0 to pass in the SMC call.
+ * @arg1: argument 1 to pass in the SMC call.
+ * @arg2: argument 2 to pass in the SMC call.
+ * @res0: result 0 returned from the SMC call.
+ * @res1: result 1 returned from the SMC call.
+ * @res2: result 2 returned from the SMC call.
+ */
+void kbase_invoke_smc(u32 oen, u16 function_number, u64 arg0, u64 arg1,
+ u64 arg2, u64 *res0, u64 *res1, u64 *res2);
+
+#endif /* CONFIG_ARM64 */
+
+#endif /* _KBASE_SMC_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
new file mode 100644
index 00000000000..36a61308dc8
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c
@@ -0,0 +1,442 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+#include <linux/dma-mapping.h>
+#ifdef CONFIG_SYNC
+#include "sync.h"
+#include <linux/syscalls.h>
+#include "mali_kbase_sync.h"
+#endif
+#include <mali_kbase_hwaccess_time.h>
+#include <linux/version.h>
+
+/* Mask to check cache alignment of data structures */
+#define KBASE_CACHE_ALIGNMENT_MASK ((1<<L1_CACHE_SHIFT)-1)
+
+/**
+ * @file mali_kbase_softjobs.c
+ *
+ * This file implements the logic behind software only jobs that are
+ * executed within the driver rather than being handed over to the GPU.
+ */
+
+static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom)
+{
+ struct kbase_va_region *reg;
+ phys_addr_t addr = 0;
+ u64 pfn;
+ u32 offset;
+ char *page;
+ struct timespec ts;
+ struct base_dump_cpu_gpu_counters data;
+ u64 system_time;
+ u64 cycle_counter;
+ u64 jc = katom->jc;
+ struct kbase_context *kctx = katom->kctx;
+ int pm_active_err;
+
+ memset(&data, 0, sizeof(data));
+
+ /* Take the PM active reference as late as possible - otherwise, it could
+ * delay suspend until we process the atom (which may be at the end of a
+ * long chain of dependencies */
+ pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE);
+ if (pm_active_err) {
+ struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data;
+
+ /* We're suspended - queue this on the list of suspended jobs
+ * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */
+ mutex_lock(&js_devdata->runpool_mutex);
+ list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list);
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ return pm_active_err;
+ }
+
+ kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time,
+ &ts);
+
+ kbase_pm_context_idle(kctx->kbdev);
+
+ data.sec = ts.tv_sec;
+ data.usec = ts.tv_nsec / 1000;
+ data.system_time = system_time;
+ data.cycle_counter = cycle_counter;
+
+ pfn = jc >> PAGE_SHIFT;
+ offset = jc & ~PAGE_MASK;
+
+ /* Assume this atom will be cancelled until we know otherwise */
+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+ if (offset > 0x1000 - sizeof(data)) {
+ /* Wouldn't fit in the page */
+ return 0;
+ }
+
+ kbase_gpu_vm_lock(kctx);
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc);
+ if (reg &&
+ (reg->flags & KBASE_REG_GPU_WR) &&
+ reg->cpu_alloc && reg->cpu_alloc->pages)
+ addr = reg->cpu_alloc->pages[pfn - reg->start_pfn];
+
+ kbase_gpu_vm_unlock(kctx);
+ if (!addr)
+ return 0;
+
+ page = kmap(pfn_to_page(PFN_DOWN(addr)));
+ if (!page)
+ return 0;
+
+ kbase_sync_single_for_cpu(katom->kctx->kbdev,
+ kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
+ offset, sizeof(data),
+ DMA_BIDIRECTIONAL);
+
+ memcpy(page + offset, &data, sizeof(data));
+
+ kbase_sync_single_for_device(katom->kctx->kbdev,
+ kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) +
+ offset, sizeof(data),
+ DMA_BIDIRECTIONAL);
+
+ kunmap(pfn_to_page(PFN_DOWN(addr)));
+
+ /* Atom was fine - mark it as done */
+ katom->event_code = BASE_JD_EVENT_DONE;
+
+ return 0;
+}
+
+#ifdef CONFIG_SYNC
+
+/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited)
+ *
+ * @param katom The atom to complete
+ */
+static void complete_soft_job(struct kbase_jd_atom *katom)
+{
+ struct kbase_context *kctx = katom->kctx;
+
+ mutex_lock(&kctx->jctx.lock);
+ list_del(&katom->dep_item[0]);
+ kbase_finish_soft_job(katom);
+ if (jd_done_nolock(katom))
+ kbase_js_sched_all(kctx->kbdev);
+ mutex_unlock(&kctx->jctx.lock);
+}
+
+static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result)
+{
+ struct sync_pt *pt;
+ struct sync_timeline *timeline;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+ if (!list_is_singular(&katom->fence->pt_list_head)) {
+#else
+ if (katom->fence->num_fences != 1) {
+#endif
+ /* Not exactly one item in the list - so it didn't (directly) come from us */
+ return BASE_JD_EVENT_JOB_CANCELLED;
+ }
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+ pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list);
+#else
+ pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base);
+#endif
+ timeline = sync_pt_parent(pt);
+
+ if (!kbase_sync_timeline_is_ours(timeline)) {
+ /* Fence has a sync_pt which isn't ours! */
+ return BASE_JD_EVENT_JOB_CANCELLED;
+ }
+
+ kbase_sync_signal_pt(pt, result);
+
+ sync_timeline_signal(timeline);
+
+ return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE;
+}
+
+static void kbase_fence_wait_worker(struct work_struct *data)
+{
+ struct kbase_jd_atom *katom;
+ struct kbase_context *kctx;
+
+ katom = container_of(data, struct kbase_jd_atom, work);
+ kctx = katom->kctx;
+
+ complete_soft_job(katom);
+}
+
+static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter)
+{
+ struct kbase_jd_atom *katom = container_of(waiter, struct kbase_jd_atom, sync_waiter);
+ struct kbase_context *kctx;
+
+ KBASE_DEBUG_ASSERT(NULL != katom);
+
+ kctx = katom->kctx;
+
+ KBASE_DEBUG_ASSERT(NULL != kctx);
+
+ /* Propagate the fence status to the atom.
+ * If negative then cancel this atom and its dependencies.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+ if (fence->status < 0)
+#else
+ if (atomic_read(&fence->status) < 0)
+#endif
+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+ /* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue
+ *
+ * The issue is that we may signal the timeline while holding kctx->jctx.lock and
+ * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work.
+ */
+
+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+ INIT_WORK(&katom->work, kbase_fence_wait_worker);
+ queue_work(kctx->jctx.job_done_wq, &katom->work);
+}
+
+static int kbase_fence_wait(struct kbase_jd_atom *katom)
+{
+ int ret;
+
+ KBASE_DEBUG_ASSERT(NULL != katom);
+ KBASE_DEBUG_ASSERT(NULL != katom->kctx);
+
+ sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback);
+
+ ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter);
+
+ if (ret == 1) {
+ /* Already signalled */
+ return 0;
+ } else if (ret < 0) {
+ goto cancel_atom;
+ }
+ return 1;
+
+ cancel_atom:
+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+ /* We should cause the dependant jobs in the bag to be failed,
+ * to do this we schedule the work queue to complete this job */
+ KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work));
+ INIT_WORK(&katom->work, kbase_fence_wait_worker);
+ queue_work(katom->kctx->jctx.job_done_wq, &katom->work);
+ return 1;
+}
+
+static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom)
+{
+ if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) {
+ /* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */
+ return;
+ }
+
+ /* Wait was cancelled - zap the atoms */
+ katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
+
+ kbase_finish_soft_job(katom);
+
+ if (jd_done_nolock(katom))
+ kbase_js_sched_all(katom->kctx->kbdev);
+}
+#endif /* CONFIG_SYNC */
+
+int kbase_process_soft_job(struct kbase_jd_atom *katom)
+{
+ switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+ return kbase_dump_cpu_gpu_time(katom);
+#ifdef CONFIG_SYNC
+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+ KBASE_DEBUG_ASSERT(katom->fence != NULL);
+ katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+ /* Release the reference as we don't need it any more */
+ sync_fence_put(katom->fence);
+ katom->fence = NULL;
+ break;
+ case BASE_JD_REQ_SOFT_FENCE_WAIT:
+ return kbase_fence_wait(katom);
+#endif /* CONFIG_SYNC */
+ case BASE_JD_REQ_SOFT_REPLAY:
+ return kbase_replay_process(katom);
+ }
+
+ /* Atom is complete */
+ return 0;
+}
+
+void kbase_cancel_soft_job(struct kbase_jd_atom *katom)
+{
+ switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+#ifdef CONFIG_SYNC
+ case BASE_JD_REQ_SOFT_FENCE_WAIT:
+ kbase_fence_cancel_wait(katom);
+ break;
+#endif
+ default:
+ /* This soft-job doesn't support cancellation! */
+ KBASE_DEBUG_ASSERT(0);
+ }
+}
+
+int kbase_prepare_soft_job(struct kbase_jd_atom *katom)
+{
+ switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+ {
+ if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK))
+ return -EINVAL;
+ }
+ break;
+#ifdef CONFIG_SYNC
+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+ {
+ struct base_fence fence;
+ int fd;
+
+ if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+ return -EINVAL;
+
+ fd = kbase_stream_create_fence(fence.basep.stream_fd);
+ if (fd < 0)
+ return -EINVAL;
+
+ katom->fence = sync_fence_fdget(fd);
+
+ if (katom->fence == NULL) {
+ /* The only way the fence can be NULL is if userspace closed it for us.
+ * So we don't need to clear it up */
+ return -EINVAL;
+ }
+ fence.basep.fd = fd;
+ if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) {
+ katom->fence = NULL;
+ sys_close(fd);
+ return -EINVAL;
+ }
+ }
+ break;
+ case BASE_JD_REQ_SOFT_FENCE_WAIT:
+ {
+ struct base_fence fence;
+
+ if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence)))
+ return -EINVAL;
+
+ /* Get a reference to the fence object */
+ katom->fence = sync_fence_fdget(fence.basep.fd);
+ if (katom->fence == NULL)
+ return -EINVAL;
+ }
+ break;
+#endif /* CONFIG_SYNC */
+ case BASE_JD_REQ_SOFT_REPLAY:
+ break;
+ default:
+ /* Unsupported soft-job */
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void kbase_finish_soft_job(struct kbase_jd_atom *katom)
+{
+ switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) {
+ case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME:
+ /* Nothing to do */
+ break;
+#ifdef CONFIG_SYNC
+ case BASE_JD_REQ_SOFT_FENCE_TRIGGER:
+ /* If fence has not yet been signalled, do it now */
+ if (katom->fence) {
+ kbase_fence_trigger(katom, katom->event_code ==
+ BASE_JD_EVENT_DONE ? 0 : -EFAULT);
+ sync_fence_put(katom->fence);
+ katom->fence = NULL;
+ }
+ break;
+ case BASE_JD_REQ_SOFT_FENCE_WAIT:
+ /* Release the reference to the fence object */
+ sync_fence_put(katom->fence);
+ katom->fence = NULL;
+ break;
+#endif /* CONFIG_SYNC */
+ }
+}
+
+void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
+{
+ LIST_HEAD(local_suspended_soft_jobs);
+ struct kbase_jd_atom *tmp_iter;
+ struct kbase_jd_atom *katom_iter;
+ struct kbasep_js_device_data *js_devdata;
+ bool resched = false;
+
+ KBASE_DEBUG_ASSERT(kbdev);
+
+ js_devdata = &kbdev->js_data;
+
+ /* Move out the entire list */
+ mutex_lock(&js_devdata->runpool_mutex);
+ list_splice_init(&js_devdata->suspended_soft_jobs_list,
+ &local_suspended_soft_jobs);
+ mutex_unlock(&js_devdata->runpool_mutex);
+
+ /*
+ * Each atom must be detached from the list and ran separately -
+ * it could be re-added to the old list, but this is unlikely
+ */
+ list_for_each_entry_safe(katom_iter, tmp_iter,
+ &local_suspended_soft_jobs, dep_item[1]) {
+ struct kbase_context *kctx = katom_iter->kctx;
+
+ mutex_lock(&kctx->jctx.lock);
+
+ /* Remove from the global list */
+ list_del(&katom_iter->dep_item[1]);
+ /* Remove from the context's list of waiting soft jobs */
+ list_del(&katom_iter->dep_item[0]);
+
+ if (kbase_process_soft_job(katom_iter) == 0) {
+ kbase_finish_soft_job(katom_iter);
+ resched |= jd_done_nolock(katom_iter);
+ } else {
+ /* The job has not completed */
+ KBASE_DEBUG_ASSERT((katom_iter->core_req &
+ BASEP_JD_REQ_ATOM_TYPE)
+ != BASE_JD_REQ_SOFT_REPLAY);
+ list_add_tail(&katom_iter->dep_item[0],
+ &kctx->waiting_soft_jobs);
+ }
+
+ mutex_unlock(&kctx->jctx.lock);
+ }
+
+ if (resched)
+ kbase_js_sched_all(kbdev);
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.c b/drivers/gpu/arm/midgard/mali_kbase_sync.c
new file mode 100644
index 00000000000..c5fb9815255
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.c
@@ -0,0 +1,182 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_SYNC
+
+#include <linux/seq_file.h>
+#include "sync.h"
+#include <mali_kbase.h>
+#include <mali_kbase_sync.h>
+
+struct mali_sync_timeline {
+ struct sync_timeline timeline;
+ atomic_t counter;
+ atomic_t signalled;
+};
+
+struct mali_sync_pt {
+ struct sync_pt pt;
+ int order;
+ int result;
+};
+
+static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline)
+{
+ return container_of(timeline, struct mali_sync_timeline, timeline);
+}
+
+static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt)
+{
+ return container_of(pt, struct mali_sync_pt, pt);
+}
+
+static struct sync_pt *timeline_dup(struct sync_pt *pt)
+{
+ struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+ struct mali_sync_pt *new_mpt;
+ struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), sizeof(struct mali_sync_pt));
+
+ if (!new_pt)
+ return NULL;
+
+ new_mpt = to_mali_sync_pt(new_pt);
+ new_mpt->order = mpt->order;
+ new_mpt->result = mpt->result;
+
+ return new_pt;
+}
+
+static int timeline_has_signaled(struct sync_pt *pt)
+{
+ struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+ struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+ int result = mpt->result;
+
+ int diff = atomic_read(&mtl->signalled) - mpt->order;
+
+ if (diff >= 0)
+ return (result < 0) ? result : 1;
+
+ return 0;
+}
+
+static int timeline_compare(struct sync_pt *a, struct sync_pt *b)
+{
+ struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt);
+ struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt);
+
+ int diff = ma->order - mb->order;
+
+ if (diff == 0)
+ return 0;
+
+ return (diff < 0) ? -1 : 1;
+}
+
+static void timeline_value_str(struct sync_timeline *timeline, char *str,
+ int size)
+{
+ struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline);
+
+ snprintf(str, size, "%d", atomic_read(&mtl->signalled));
+}
+
+static void pt_value_str(struct sync_pt *pt, char *str, int size)
+{
+ struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+
+ snprintf(str, size, "%d(%d)", mpt->order, mpt->result);
+}
+
+static struct sync_timeline_ops mali_timeline_ops = {
+ .driver_name = "Mali",
+ .dup = timeline_dup,
+ .has_signaled = timeline_has_signaled,
+ .compare = timeline_compare,
+ .timeline_value_str = timeline_value_str,
+ .pt_value_str = pt_value_str,
+};
+
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline)
+{
+ return timeline->ops == &mali_timeline_ops;
+}
+
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name)
+{
+ struct sync_timeline *tl;
+ struct mali_sync_timeline *mtl;
+
+ tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name);
+ if (!tl)
+ return NULL;
+
+ /* Set the counter in our private struct */
+ mtl = to_mali_sync_timeline(tl);
+ atomic_set(&mtl->counter, 0);
+ atomic_set(&mtl->signalled, 0);
+
+ return tl;
+}
+
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent)
+{
+ struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt));
+ struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent);
+ struct mali_sync_pt *mpt;
+
+ if (!pt)
+ return NULL;
+
+ mpt = to_mali_sync_pt(pt);
+ mpt->order = atomic_inc_return(&mtl->counter);
+ mpt->result = 0;
+
+ return pt;
+}
+
+void kbase_sync_signal_pt(struct sync_pt *pt, int result)
+{
+ struct mali_sync_pt *mpt = to_mali_sync_pt(pt);
+ struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt));
+ int signalled;
+ int diff;
+
+ mpt->result = result;
+
+ do {
+ signalled = atomic_read(&mtl->signalled);
+
+ diff = signalled - mpt->order;
+
+ if (diff > 0) {
+ /* The timeline is already at or ahead of this point.
+ * This should not happen unless userspace has been
+ * signalling fences out of order, so warn but don't
+ * violate the sync_pt API.
+ * The warning is only in debug builds to prevent
+ * a malicious user being able to spam dmesg.
+ */
+#ifdef CONFIG_MALI_DEBUG
+ pr_err("Fences were triggered in a different order to allocation!");
+#endif /* CONFIG_MALI_DEBUG */
+ return;
+ }
+ } while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled);
+}
+
+#endif /* CONFIG_SYNC */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h
new file mode 100644
index 00000000000..6d8e34d3c3a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h
@@ -0,0 +1,91 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync.h
+ *
+ */
+
+#ifndef MALI_KBASE_SYNC_H
+#define MALI_KBASE_SYNC_H
+
+#include "sync.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+/* For backwards compatiblility with kernels before 3.17. After 3.17
+ * sync_pt_parent is included in the kernel. */
+static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt)
+{
+ return pt->parent;
+}
+#endif
+
+/*
+ * Create a stream object.
+ * Built on top of timeline object.
+ * Exposed as a file descriptor.
+ * Life-time controlled via the file descriptor:
+ * - dup to add a ref
+ * - close to remove a ref
+ */
+int kbase_stream_create(const char *name, int *const out_fd);
+
+/*
+ * Create a fence in a stream object
+ */
+int kbase_stream_create_fence(int tl_fd);
+
+/*
+ * Validate a fd to be a valid fence
+ * No reference is taken.
+ *
+ * This function is only usable to catch unintentional user errors early,
+ * it does not stop malicious code changing the fd after this function returns.
+ */
+int kbase_fence_validate(int fd);
+
+/* Returns true if the specified timeline is allocated by Mali */
+int kbase_sync_timeline_is_ours(struct sync_timeline *timeline);
+
+/* Allocates a timeline for Mali
+ *
+ * One timeline should be allocated per API context.
+ */
+struct sync_timeline *kbase_sync_timeline_alloc(const char *name);
+
+/* Allocates a sync point within the timeline.
+ *
+ * The timeline must be the one allocated by kbase_sync_timeline_alloc
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ */
+struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent);
+
+/* Signals a particular sync point
+ *
+ * Sync points must be triggered in *exactly* the same order as they are allocated.
+ *
+ * If they are signalled in the wrong order then a message will be printed in debug
+ * builds and otherwise attempts to signal order sync_pts will be ignored.
+ *
+ * result can be negative to indicate error, any other value is interpreted as success.
+ */
+void kbase_sync_signal_pt(struct sync_pt *pt, int result);
+
+#endif
diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
new file mode 100644
index 00000000000..ddd0847a69c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c
@@ -0,0 +1,157 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_kbase_sync_user.c
+ *
+ */
+
+#ifdef CONFIG_SYNC
+
+#include <linux/sched.h>
+#include <linux/fdtable.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/anon_inodes.h>
+#include <linux/version.h>
+#include <linux/uaccess.h>
+#include <mali_kbase_sync.h>
+#include <mali_base_kernel_sync.h>
+
+static int kbase_stream_close(struct inode *inode, struct file *file)
+{
+ struct sync_timeline *tl;
+
+ tl = (struct sync_timeline *)file->private_data;
+ BUG_ON(!tl);
+ sync_timeline_destroy(tl);
+ return 0;
+}
+
+static const struct file_operations stream_fops = {
+ .owner = THIS_MODULE,
+ .release = kbase_stream_close,
+};
+
+int kbase_stream_create(const char *name, int *const out_fd)
+{
+ struct sync_timeline *tl;
+
+ BUG_ON(!out_fd);
+
+ tl = kbase_sync_timeline_alloc(name);
+ if (!tl)
+ return -EINVAL;
+
+ *out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC);
+
+ if (*out_fd < 0) {
+ sync_timeline_destroy(tl);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int kbase_stream_create_fence(int tl_fd)
+{
+ struct sync_timeline *tl;
+ struct sync_pt *pt;
+ struct sync_fence *fence;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0)
+ struct files_struct *files;
+ struct fdtable *fdt;
+#endif
+ int fd;
+ struct file *tl_file;
+
+ tl_file = fget(tl_fd);
+ if (tl_file == NULL)
+ return -EBADF;
+
+ if (tl_file->f_op != &stream_fops) {
+ fd = -EBADF;
+ goto out;
+ }
+
+ tl = tl_file->private_data;
+
+ pt = kbase_sync_pt_alloc(tl);
+ if (!pt) {
+ fd = -EFAULT;
+ goto out;
+ }
+
+ fence = sync_fence_create("mali_fence", pt);
+ if (!fence) {
+ sync_pt_free(pt);
+ fd = -EFAULT;
+ goto out;
+ }
+
+ /* from here the fence owns the sync_pt */
+
+ /* create a fd representing the fence */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)
+ fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ sync_fence_put(fence);
+ goto out;
+ }
+#else
+ fd = get_unused_fd();
+ if (fd < 0) {
+ sync_fence_put(fence);
+ goto out;
+ }
+
+ files = current->files;
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0)
+ __set_close_on_exec(fd, fdt);
+#else
+ FD_SET(fd, fdt->close_on_exec);
+#endif
+ spin_unlock(&files->file_lock);
+#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */
+
+ /* bind fence to the new fd */
+ sync_fence_install(fence, fd);
+
+ out:
+ fput(tl_file);
+
+ return fd;
+}
+
+int kbase_fence_validate(int fd)
+{
+ struct sync_fence *fence;
+
+ fence = sync_fence_fdget(fd);
+ if (!fence)
+ return -EINVAL;
+
+ sync_fence_put(fence);
+ return 0;
+}
+
+#endif /* CONFIG_SYNC */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
new file mode 100644
index 00000000000..871dc316c99
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c
@@ -0,0 +1,1695 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/anon_inodes.h>
+#include <linux/atomic.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/stringify.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_tlstream.h>
+
+/*****************************************************************************/
+
+/* The version of timeline stream. */
+#define KBASEP_TLSTREAM_VERSION 1
+
+/* The maximum expected length of string in tracepoint descriptor. */
+#define STRLEN_MAX 64 /* bytes */
+
+/* The number of nanoseconds in a second. */
+#define NSECS_IN_SEC 1000000000ull /* ns */
+
+/* The number of nanoseconds to wait before autoflushing the stream. */
+#define AUTOFLUSH_TIMEOUT (2ull * NSECS_IN_SEC) /* ns */
+
+/* The period of autoflush checker execution in milliseconds. */
+#define AUTOFLUSH_INTERVAL 1000 /* ms */
+
+/* The maximum size of a single packet used by timeline. */
+#define PACKET_SIZE 2048 /* bytes */
+
+/* The number of packets used by one timeline stream. */
+#define PACKET_COUNT 16
+
+/* The number of bytes reserved for packet header.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_HEADER_SIZE 8 /* bytes */
+
+/* The number of bytes reserved for packet sequence number.
+ * These value must be defined according to MIPE documentation. */
+#define PACKET_NUMBER_SIZE 4 /* bytes */
+
+/* Packet header - first word.
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_STREAMID_POS 0
+#define PACKET_STREAMID_LEN 8
+#define PACKET_RSVD1_POS (PACKET_STREAMID_POS + PACKET_STREAMID_LEN)
+#define PACKET_RSVD1_LEN 8
+#define PACKET_TYPE_POS (PACKET_RSVD1_POS + PACKET_RSVD1_LEN)
+#define PACKET_TYPE_LEN 3
+#define PACKET_CLASS_POS (PACKET_TYPE_POS + PACKET_TYPE_LEN)
+#define PACKET_CLASS_LEN 7
+#define PACKET_FAMILY_POS (PACKET_CLASS_POS + PACKET_CLASS_LEN)
+#define PACKET_FAMILY_LEN 6
+
+/* Packet header - second word
+ * These values must be defined according to MIPE documentation. */
+#define PACKET_LENGTH_POS 0
+#define PACKET_LENGTH_LEN 24
+#define PACKET_SEQBIT_POS (PACKET_LENGTH_POS + PACKET_LENGTH_LEN)
+#define PACKET_SEQBIT_LEN 1
+#define PACKET_RSVD2_POS (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN)
+#define PACKET_RSVD2_LEN 7
+
+/* Types of streams generated by timeline.
+ * Order is significant! Header streams must precede respective body streams. */
+enum tl_stream_type {
+ TL_STREAM_TYPE_OBJ_HEADER,
+ TL_STREAM_TYPE_OBJ_SUMMARY,
+ TL_STREAM_TYPE_OBJ,
+ TL_STREAM_TYPE_AUX_HEADER,
+ TL_STREAM_TYPE_AUX,
+
+ TL_STREAM_TYPE_COUNT
+};
+
+/* Timeline packet family ids.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_family {
+ TL_PACKET_FAMILY_CTRL = 0, /* control packets */
+ TL_PACKET_FAMILY_TL = 1, /* timeline packets */
+
+ TL_PACKET_FAMILY_COUNT
+};
+
+/* Packet classes used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_class {
+ TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */
+ TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */
+};
+
+/* Packet types used in timeline streams.
+ * Values are significant! Check MIPE documentation. */
+enum tl_packet_type {
+ TL_PACKET_TYPE_HEADER = 0, /* stream's header/directory */
+ TL_PACKET_TYPE_BODY = 1, /* stream's body */
+ TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */
+};
+
+/* Message ids of trace events that are recorded in the timeline stream. */
+enum tl_msg_id {
+ /* Timeline object events. */
+ KBASE_TL_NEW_CTX,
+ KBASE_TL_NEW_GPU,
+ KBASE_TL_NEW_LPU,
+ KBASE_TL_NEW_ATOM,
+ KBASE_TL_DEL_CTX,
+ KBASE_TL_DEL_ATOM,
+ KBASE_TL_LIFELINK_LPU_GPU,
+ KBASE_TL_RET_GPU_CTX,
+ KBASE_TL_RET_ATOM_CTX,
+ KBASE_TL_RET_ATOM_LPU,
+ KBASE_TL_NRET_GPU_CTX,
+ KBASE_TL_NRET_ATOM_CTX,
+ KBASE_TL_NRET_ATOM_LPU,
+
+ /* Timeline non-object events. */
+ KBASE_AUX_PM_STATE,
+ KBASE_AUX_JOB_SOFTSTOP,
+ KBASE_AUX_PAGEFAULT,
+ KBASE_AUX_PAGESALLOC
+};
+
+/*****************************************************************************/
+
+/**
+ * struct tl_stream - timeline stream structure
+ * @lock: message order lock
+ * @buffer: array of buffers
+ * @wbi: write buffer index
+ * @rbi: read buffer index
+ * @numbered: if non-zero stream's packets are sequentially numbered
+ * @last_write_time: timestamp indicating last write
+ *
+ * This structure holds information needed to construct proper packets in the
+ * timeline stream. Each message in sequence must bear timestamp that is greater
+ * to one in previous message in the same stream. For this reason lock is held
+ * throughout the process of message creation. Each stream contains set of
+ * buffers. Each buffer will hold one MIPE packet. In case there is no free
+ * space required to store incoming message the oldest buffer is discarded.
+ * Each packet in timeline body stream has sequence number embedded (this value
+ * must increment monotonically and is used by packets receiver to discover
+ * buffer overflows.
+ */
+struct tl_stream {
+ spinlock_t lock;
+
+ struct {
+ atomic_t size; /* number of bytes in buffer */
+ char data[PACKET_SIZE]; /* buffer's data */
+ } buffer[PACKET_COUNT];
+
+ atomic_t wbi;
+ atomic_t rbi;
+
+ int numbered;
+ u64 last_write_time;
+};
+
+/**
+ * struct tp_desc - tracepoint message descriptor structure
+ * @id: tracepoint ID identifying message in stream
+ * @id_str: human readable version of tracepoint ID
+ * @name: tracepoint description
+ * @arg_types: tracepoint's arguments types declaration
+ * @arg_names: comma separated list of tracepoint's arguments names
+ */
+struct tp_desc {
+ u32 id;
+ const char *id_str;
+ const char *name;
+ const char *arg_types;
+ const char *arg_names;
+};
+
+/*****************************************************************************/
+
+/* Configuration of timeline streams generated by kernel.
+ * Kernel emit only streams containing either timeline object events or
+ * auxiliary events. All streams have stream id value of 1 (as opposed to user
+ * space streams that have value of 0). */
+static const struct {
+ enum tl_packet_family pkt_family;
+ enum tl_packet_class pkt_class;
+ enum tl_packet_type pkt_type;
+ unsigned int stream_id;
+} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = {
+ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER, 1},
+ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1},
+ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY, 1},
+ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER, 1},
+ {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY, 1}
+};
+
+/* The timeline streams generated by kernel. */
+static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT];
+
+/* Autoflush timer. */
+static struct timer_list autoflush_timer;
+
+/* If non-zero autoflush timer is active. */
+static atomic_t autoflush_timer_active;
+
+/* Reader lock. Only one reader is allowed to have access to the timeline
+ * streams at any given time. */
+static DEFINE_MUTEX(tl_reader_lock);
+
+/* Indicator of whether the timeline stream file descriptor is already used. */
+static atomic_t tlstream_busy = {0};
+
+/* Timeline stream event queue. */
+static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue);
+
+/* The timeline stream file operations functions. */
+static ssize_t kbasep_tlstream_read(
+ struct file *filp,
+ char __user *buffer,
+ size_t size,
+ loff_t *f_pos);
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait);
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp);
+
+/* The timeline stream file operations structure. */
+static const struct file_operations kbasep_tlstream_fops = {
+ .release = kbasep_tlstream_release,
+ .read = kbasep_tlstream_read,
+ .poll = kbasep_tlstream_poll,
+};
+
+/* Descriptors of timeline messages transmitted in object events stream. */
+static const struct tp_desc tp_desc_obj[] = {
+ {
+ KBASE_TL_NEW_CTX,
+ __stringify(KBASE_TL_NEW_CTX),
+ "object ctx is created",
+ "@pI",
+ "ctx,ctx_nr"
+ },
+ {
+ KBASE_TL_NEW_GPU,
+ __stringify(KBASE_TL_NEW_GPU),
+ "object gpu is created",
+ "@pII",
+ "gpu,gpu_id,core_count"
+ },
+ {
+ KBASE_TL_NEW_LPU,
+ __stringify(KBASE_TL_NEW_LPU),
+ "object lpu is created",
+ "@pII",
+ "lpu,lpu_nr,lpu_fn"
+ },
+ {
+ KBASE_TL_NEW_ATOM,
+ __stringify(KBASE_TL_NEW_ATOM),
+ "object atom is created",
+ "@pI",
+ "atom,atom_nr"
+ },
+ {
+ KBASE_TL_DEL_CTX,
+ __stringify(KBASE_TL_DEL_CTX),
+ "context is destroyed",
+ "@p",
+ "context"
+ },
+ {
+ KBASE_TL_DEL_ATOM,
+ __stringify(KBASE_TL_DEL_ATOM),
+ "atom is destroyed",
+ "@p",
+ "atom"
+ },
+ {
+ KBASE_TL_LIFELINK_LPU_GPU,
+ __stringify(KBASE_TL_LIFELINK_LPU_GPU),
+ "lpu is deleted with gpu",
+ "@pp",
+ "lpu,gpu"
+ },
+ {
+ KBASE_TL_RET_GPU_CTX,
+ __stringify(KBASE_TL_RET_GPU_CTX),
+ "gpu is retained by context",
+ "@pp",
+ "gpu,ctx"
+ },
+ {
+ KBASE_TL_RET_ATOM_CTX,
+ __stringify(KBASE_TL_RET_ATOM_CTX),
+ "atom is retained by context",
+ "@pp",
+ "atom,ctx"
+ },
+ {
+ KBASE_TL_RET_ATOM_LPU,
+ __stringify(KBASE_TL_RET_ATOM_LPU),
+ "atom is retained by lpu",
+ "@pp",
+ "atom,lpu"
+ },
+ {
+ KBASE_TL_NRET_GPU_CTX,
+ __stringify(KBASE_TL_NRET_GPU_CTX),
+ "gpu is released by context",
+ "@pp",
+ "gpu,ctx"
+ },
+ {
+ KBASE_TL_NRET_ATOM_CTX,
+ __stringify(KBASE_TL_NRET_ATOM_CTX),
+ "atom is released by context",
+ "@pp",
+ "atom,context"
+ },
+ {
+ KBASE_TL_NRET_ATOM_LPU,
+ __stringify(KBASE_TL_NRET_ATOM_LPU),
+ "atom is released by lpu",
+ "@pp",
+ "atom,lpu"
+ },
+};
+
+/* Descriptors of timeline messages transmitted in auxiliary events stream. */
+static const struct tp_desc tp_desc_aux[] = {
+ {
+ KBASE_AUX_PM_STATE,
+ __stringify(KBASE_AUX_PM_STATE),
+ "PM state",
+ "@IL",
+ "core_type,core_state_bitset"
+ },
+ {
+ KBASE_AUX_JOB_SOFTSTOP,
+ __stringify(KBASE_AUX_JOB_SOFTSTOP),
+ "Job soft stop",
+ "@I",
+ "tag_id"
+ },
+ {
+ KBASE_AUX_PAGEFAULT,
+ __stringify(KBASE_AUX_PAGEFAULT),
+ "Page fault",
+ "@II",
+ "as_id,page_cnt"
+ },
+ {
+ KBASE_AUX_PAGESALLOC,
+ __stringify(KBASE_AUX_PAGESALLOC),
+ "Total alloc pages change",
+ "@l",
+ "page_cnt_change"
+ }
+};
+
+#if MALI_UNIT_TEST
+/* Number of bytes read by user. */
+static atomic_t tlstream_bytes_collected = {0};
+
+/* Number of bytes generated by tracepoint messages. */
+static atomic_t tlstream_bytes_generated = {0};
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_get_timestamp - return timestamp
+ *
+ * Function returns timestamp value based on raw monotonic timer. Value will
+ * wrap around zero in case of overflow.
+ * Return: timestamp value
+ */
+static u64 kbasep_tlstream_get_timestamp(void)
+{
+ struct timespec ts;
+ u64 timestamp;
+
+ getrawmonotonic(&ts);
+ timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec;
+ return timestamp;
+}
+
+/**
+ * kbasep_tlstream_write_bytes - write data to message buffer
+ * @buffer: buffer where data will be written
+ * @pos: position in the buffer where to place data
+ * @bytes: pointer to buffer holding data
+ * @len: length of data to be written
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_bytes(
+ char *buffer,
+ size_t pos,
+ const void *bytes,
+ size_t len)
+{
+ KBASE_DEBUG_ASSERT(buffer);
+ KBASE_DEBUG_ASSERT(bytes);
+
+ memcpy(&buffer[pos], bytes, len);
+
+ return pos + len;
+}
+
+/**
+ * kbasep_tlstream_write_string - write string to message buffer
+ * @buffer: buffer where data will be written
+ * @pos: position in the buffer where to place data
+ * @string: pointer to buffer holding the source string
+ * @max_write_size: number of bytes that can be stored in buffer
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_string(
+ char *buffer,
+ size_t pos,
+ const char *string,
+ size_t max_write_size)
+{
+ u32 string_len;
+
+ KBASE_DEBUG_ASSERT(buffer);
+ KBASE_DEBUG_ASSERT(string);
+ /* Timeline string consists of at least string length and nul
+ * terminator. */
+ KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
+ max_write_size -= sizeof(string_len);
+
+ string_len = strlcpy(
+ &buffer[pos + sizeof(string_len)],
+ string,
+ max_write_size);
+ string_len += sizeof(char);
+
+ /* Make sure that the source string fit into the buffer. */
+ KBASE_DEBUG_ASSERT(string_len <= max_write_size);
+
+ /* Update string length. */
+ memcpy(&buffer[pos], &string_len, sizeof(string_len));
+
+ return pos + sizeof(string_len) + string_len;
+}
+
+/**
+ * kbasep_tlstream_write_timestamp - write timestamp to message buffer
+ * @buffer: buffer where data will be written
+ * @pos: position in the buffer where to place data
+ *
+ * Return: updated position in the buffer
+ */
+static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos)
+{
+ u64 timestamp = kbasep_tlstream_get_timestamp();
+
+ return kbasep_tlstream_write_bytes(
+ buffer, pos,
+ &timestamp, sizeof(timestamp));
+}
+
+/**
+ * kbasep_tlstream_put_bits - put bits in a word
+ * @word: pointer to the words being modified
+ * @value: value that shall be written to given position
+ * @bitpos: position where value shall be written (in bits)
+ * @bitlen: length of value (in bits)
+ */
+static void kbasep_tlstream_put_bits(
+ u32 *word,
+ u32 value,
+ unsigned int bitpos,
+ unsigned int bitlen)
+{
+ const u32 mask = ((1 << bitlen) - 1) << bitpos;
+
+ KBASE_DEBUG_ASSERT(word);
+ KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen));
+ KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32);
+
+ *word &= ~mask;
+ *word |= ((value << bitpos) & mask);
+}
+
+/**
+ * kbasep_tlstream_packet_header_setup - setup the packet header
+ * @buffer: pointer to the buffer
+ * @pkt_family: packet's family
+ * @pkt_type: packet's type
+ * @pkt_class: packet's class
+ * @stream_id: stream id
+ * @numbered: non-zero if this stream is numbered
+ *
+ * Function sets up immutable part of packet header in the given buffer.
+ */
+static void kbasep_tlstream_packet_header_setup(
+ char *buffer,
+ enum tl_packet_family pkt_family,
+ enum tl_packet_class pkt_class,
+ enum tl_packet_type pkt_type,
+ unsigned int stream_id,
+ int numbered)
+{
+ u32 word0 = 0;
+ u32 word1 = 0;
+
+ KBASE_DEBUG_ASSERT(buffer);
+ KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL);
+ KBASE_DEBUG_ASSERT(
+ (pkt_type == TL_PACKET_TYPE_HEADER) ||
+ (pkt_type == TL_PACKET_TYPE_SUMMARY) ||
+ (pkt_type == TL_PACKET_TYPE_BODY));
+ KBASE_DEBUG_ASSERT(
+ (pkt_class == TL_PACKET_CLASS_OBJ) ||
+ (pkt_class == TL_PACKET_CLASS_AUX));
+
+ kbasep_tlstream_put_bits(
+ &word0, pkt_family,
+ PACKET_FAMILY_POS, PACKET_FAMILY_LEN);
+ kbasep_tlstream_put_bits(
+ &word0, pkt_class,
+ PACKET_CLASS_POS, PACKET_CLASS_LEN);
+ kbasep_tlstream_put_bits(
+ &word0, pkt_type,
+ PACKET_TYPE_POS, PACKET_TYPE_LEN);
+ kbasep_tlstream_put_bits(
+ &word0, stream_id,
+ PACKET_STREAMID_POS, PACKET_STREAMID_LEN);
+
+ if (numbered)
+ kbasep_tlstream_put_bits(
+ &word1, 1,
+ PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN);
+
+ memcpy(&buffer[0], &word0, sizeof(word0));
+ memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_header_update - update the packet header
+ * @buffer: pointer to the buffer
+ * @data_size: amount of data carried in this packet
+ *
+ * Function updates mutable part of packet header in the given buffer.
+ * Note that value of data_size must not including size of the header.
+ */
+static void kbasep_tlstream_packet_header_update(
+ char *buffer,
+ size_t data_size)
+{
+ u32 word0;
+ u32 word1;
+
+ KBASE_DEBUG_ASSERT(buffer);
+ CSTD_UNUSED(word0);
+
+ memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1));
+
+ kbasep_tlstream_put_bits(
+ &word1, data_size,
+ PACKET_LENGTH_POS, PACKET_LENGTH_LEN);
+
+ memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1));
+}
+
+/**
+ * kbasep_tlstream_packet_number_update - update the packet number
+ * @buffer: pointer to the buffer
+ * @counter: value of packet counter for this packet's stream
+ *
+ * Function updates packet number embedded within the packet placed in the
+ * given buffer.
+ */
+static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter)
+{
+ KBASE_DEBUG_ASSERT(buffer);
+
+ memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter));
+}
+
+/**
+ * kbasep_timeline_stream_reset - reset stream
+ * @stream: pointer to the stream structure
+ *
+ * Function discards all pending messages and resets packet counters.
+ */
+static void kbasep_timeline_stream_reset(struct tl_stream *stream)
+{
+ unsigned int i;
+
+ for (i = 0; i < PACKET_COUNT; i++) {
+ if (stream->numbered)
+ atomic_set(
+ &stream->buffer[i].size,
+ PACKET_HEADER_SIZE +
+ PACKET_NUMBER_SIZE);
+ else
+ atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE);
+ }
+
+ atomic_set(&stream->wbi, 0);
+ atomic_set(&stream->rbi, 0);
+}
+
+/**
+ * kbasep_timeline_stream_init - initialize timeline stream
+ * @stream: pointer to the stream structure
+ * @stream_type: stream type
+ */
+static void kbasep_timeline_stream_init(
+ struct tl_stream *stream,
+ enum tl_stream_type stream_type)
+{
+ unsigned int i;
+
+ KBASE_DEBUG_ASSERT(stream);
+ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+ spin_lock_init(&stream->lock);
+
+ /* All packets carrying tracepoints shall be numbered. */
+ if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type)
+ stream->numbered = 1;
+ else
+ stream->numbered = 0;
+
+ for (i = 0; i < PACKET_COUNT; i++)
+ kbasep_tlstream_packet_header_setup(
+ stream->buffer[i].data,
+ tl_stream_cfg[stream_type].pkt_family,
+ tl_stream_cfg[stream_type].pkt_class,
+ tl_stream_cfg[stream_type].pkt_type,
+ tl_stream_cfg[stream_type].stream_id,
+ stream->numbered);
+
+ kbasep_timeline_stream_reset(tl_stream[stream_type]);
+}
+
+/**
+ * kbasep_timeline_stream_term - terminate timeline stream
+ * @stream: pointer to the stream structure
+ */
+static void kbasep_timeline_stream_term(struct tl_stream *stream)
+{
+ KBASE_DEBUG_ASSERT(stream);
+}
+
+/**
+ * kbasep_tlstream_msgbuf_submit - submit packet to the user space
+ * @stream: pointer to the stream structure
+ * @wb_idx_raw: write buffer index
+ * @wb_size: length of data stored in current buffer
+ *
+ * Function updates currently written buffer with packet header. Then write
+ * index is incremented and buffer is handled to user space. Parameters
+ * of new buffer are returned using provided arguments.
+ *
+ * Return: length of data in new buffer
+ *
+ * Warning: User must update the stream structure with returned value.
+ */
+static size_t kbasep_tlstream_msgbuf_submit(
+ struct tl_stream *stream,
+ unsigned int wb_idx_raw,
+ unsigned int wb_size)
+{
+ unsigned int rb_idx_raw = atomic_read(&stream->rbi);
+ unsigned int wb_idx = wb_idx_raw % PACKET_COUNT;
+
+ kbasep_tlstream_packet_header_update(
+ stream->buffer[wb_idx].data,
+ wb_size - PACKET_HEADER_SIZE);
+
+ if (stream->numbered)
+ kbasep_tlstream_packet_number_update(
+ stream->buffer[wb_idx].data,
+ wb_idx_raw);
+
+ /* Increasing write buffer index will expose this packet to the reader.
+ * As stream->lock is not taken on reader side we must make sure memory
+ * is updated correctly before this will happen. */
+ smp_wmb();
+ wb_idx_raw++;
+ atomic_set(&stream->wbi, wb_idx_raw);
+
+ /* Inform user that packets are ready for reading. */
+ wake_up_interruptible(&tl_event_queue);
+
+ /* Detect and mark overflow in this stream. */
+ if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) {
+ /* Reader side depends on this increment to correctly handle
+ * overflows. The value shall be updated only if it was not
+ * modified by the reader. The data holding buffer will not be
+ * updated before stream->lock is released, however size of the
+ * buffer will. Make sure this increment is globally visible
+ * before information about selected write buffer size. */
+ atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1);
+ }
+
+ wb_size = PACKET_HEADER_SIZE;
+ if (stream->numbered)
+ wb_size += PACKET_NUMBER_SIZE;
+
+ return wb_size;
+}
+
+/**
+ * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer
+ * @stream_type: type of the stream that shall be locked
+ * @msg_size: message size
+ * @flags: pointer to store flags passed back on stream release
+ *
+ * Function will lock the stream and reserve the number of bytes requested
+ * in msg_size for the user.
+ *
+ * Return: pointer to the buffer where message can be stored
+ *
+ * Warning: Stream must be relased with kbasep_tlstream_msgbuf_release().
+ * Only atomic operations are allowed while stream is locked
+ * (i.e. do not use any operation that may sleep).
+ */
+static char *kbasep_tlstream_msgbuf_acquire(
+ enum tl_stream_type stream_type,
+ size_t msg_size,
+ unsigned long *flags)
+{
+ struct tl_stream *stream;
+ unsigned int wb_idx_raw;
+ unsigned int wb_idx;
+ size_t wb_size;
+
+ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+ KBASE_DEBUG_ASSERT(
+ PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >=
+ msg_size);
+
+ stream = tl_stream[stream_type];
+
+ spin_lock_irqsave(&stream->lock, *flags);
+
+ wb_idx_raw = atomic_read(&stream->wbi);
+ wb_idx = wb_idx_raw % PACKET_COUNT;
+ wb_size = atomic_read(&stream->buffer[wb_idx].size);
+
+ /* Select next buffer if data will not fit into current one. */
+ if (PACKET_SIZE < wb_size + msg_size) {
+ wb_size = kbasep_tlstream_msgbuf_submit(
+ stream, wb_idx_raw, wb_size);
+ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+ }
+
+ /* Reserve space in selected buffer. */
+ atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size);
+
+#if MALI_UNIT_TEST
+ atomic_add(msg_size, &tlstream_bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+ return &stream->buffer[wb_idx].data[wb_size];
+}
+
+/**
+ * kbasep_tlstream_msgbuf_release - unlock selected stream
+ * @stream_type: type of the stream that shall be locked
+ * @flags: value obtained during stream acquire
+ *
+ * Function releases stream that has been previously locked with a call to
+ * kbasep_tlstream_msgbuf_acquire().
+ */
+static void kbasep_tlstream_msgbuf_release(
+ enum tl_stream_type stream_type,
+ unsigned long flags)
+{
+ struct tl_stream *stream;
+
+ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+
+ stream = tl_stream[stream_type];
+ stream->last_write_time = kbasep_tlstream_get_timestamp();
+
+ spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/*****************************************************************************/
+
+/**
+ * kbasep_tlstream_flush_stream - flush stream
+ * @stype: type of stream to be flushed
+ *
+ * Flush pending data in timeline stream.
+ */
+static void kbasep_tlstream_flush_stream(enum tl_stream_type stype)
+{
+ struct tl_stream *stream = tl_stream[stype];
+ unsigned long flags;
+ unsigned int wb_idx_raw;
+ unsigned int wb_idx;
+ size_t wb_size;
+ size_t min_size = PACKET_HEADER_SIZE;
+
+ if (stream->numbered)
+ min_size += PACKET_NUMBER_SIZE;
+
+ spin_lock_irqsave(&stream->lock, flags);
+
+ wb_idx_raw = atomic_read(&stream->wbi);
+ wb_idx = wb_idx_raw % PACKET_COUNT;
+ wb_size = atomic_read(&stream->buffer[wb_idx].size);
+
+ if (wb_size > min_size) {
+ wb_size = kbasep_tlstream_msgbuf_submit(
+ stream, wb_idx_raw, wb_size);
+ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+ atomic_set(&stream->buffer[wb_idx].size, wb_size);
+ }
+ spin_unlock_irqrestore(&stream->lock, flags);
+}
+
+/**
+ * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback
+ * @data: unused
+ *
+ * Timer is executed periodically to check if any of the stream contains
+ * buffer ready to be submitted to user space.
+ */
+static void kbasep_tlstream_autoflush_timer_callback(unsigned long data)
+{
+ u64 timestamp = kbasep_tlstream_get_timestamp();
+ enum tl_stream_type stype;
+ int rcode;
+
+ CSTD_UNUSED(data);
+
+ for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) {
+ struct tl_stream *stream = tl_stream[stype];
+ unsigned long flags;
+ unsigned int wb_idx_raw;
+ unsigned int wb_idx;
+ size_t wb_size;
+ size_t min_size = PACKET_HEADER_SIZE;
+
+ if (stream->numbered)
+ min_size += PACKET_NUMBER_SIZE;
+
+ spin_lock_irqsave(&stream->lock, flags);
+
+ wb_idx_raw = atomic_read(&stream->wbi);
+ wb_idx = wb_idx_raw % PACKET_COUNT;
+ wb_size = atomic_read(&stream->buffer[wb_idx].size);
+
+ if (
+ (wb_size > min_size) &&
+ (
+ timestamp - stream->last_write_time >
+ AUTOFLUSH_TIMEOUT)) {
+
+ wb_size = kbasep_tlstream_msgbuf_submit(
+ stream, wb_idx_raw, wb_size);
+ wb_idx = (wb_idx_raw + 1) % PACKET_COUNT;
+ atomic_set(&stream->buffer[wb_idx].size, wb_size);
+ }
+ spin_unlock_irqrestore(&stream->lock, flags);
+ }
+
+ if (atomic_read(&autoflush_timer_active))
+ rcode = mod_timer(
+ &autoflush_timer,
+ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+ CSTD_UNUSED(rcode);
+}
+
+/**
+ * kbasep_tlstream_packet_pending - check timeline streams for pending packets
+ * @stype: pointer to variable where stream type will be placed
+ * @rb_idx_raw: pointer to variable where read buffer index will be placed
+ *
+ * Function checks all streams for pending packets. It will stop as soon as
+ * packet ready to be submitted to user space is detected. Variables under
+ * pointers, passed as the parameters to this function will be updated with
+ * values pointing to right stream and buffer.
+ *
+ * Return: non-zero if any of timeline streams has at last one packet ready
+ */
+static int kbasep_tlstream_packet_pending(
+ enum tl_stream_type *stype,
+ unsigned int *rb_idx_raw)
+{
+ int pending = 0;
+
+ KBASE_DEBUG_ASSERT(stype);
+ KBASE_DEBUG_ASSERT(rb_idx_raw);
+
+ for (
+ *stype = 0;
+ (*stype < TL_STREAM_TYPE_COUNT) && !pending;
+ (*stype)++) {
+ if (NULL != tl_stream[*stype]) {
+ *rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi);
+ /* Read buffer index may be updated by writer in case of
+ * overflow. Read and write buffer indexes must be
+ * loaded in correct order. */
+ smp_rmb();
+ if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw)
+ pending = 1;
+ }
+ }
+ (*stype)--;
+
+ return pending;
+}
+
+/**
+ * kbasep_tlstream_read - copy data from streams to buffer provided by user
+ * @filp: pointer to file structure (unused)
+ * @buffer: pointer to the buffer provided by user
+ * @size: maximum amount of data that can be stored in the buffer
+ * @f_pos: pointer to file offset (unused)
+ *
+ * Return: number of bytes stored in the buffer
+ */
+static ssize_t kbasep_tlstream_read(
+ struct file *filp,
+ char __user *buffer,
+ size_t size,
+ loff_t *f_pos)
+{
+ ssize_t copy_len = 0;
+
+ KBASE_DEBUG_ASSERT(filp);
+ KBASE_DEBUG_ASSERT(buffer);
+ KBASE_DEBUG_ASSERT(f_pos);
+ CSTD_UNUSED(filp);
+
+ if ((0 > *f_pos) || (PACKET_SIZE > size))
+ return -EINVAL;
+
+ mutex_lock(&tl_reader_lock);
+
+ while (copy_len < size) {
+ enum tl_stream_type stype;
+ unsigned int rb_idx_raw;
+ unsigned int rb_idx;
+ size_t rb_size;
+
+ /* If we don't have any data yet, wait for packet to be
+ * submitted. If we already read some packets and there is no
+ * packet pending return back to user. */
+ if (0 < copy_len) {
+ if (!kbasep_tlstream_packet_pending(
+ &stype,
+ &rb_idx_raw))
+ break;
+ } else {
+ if (wait_event_interruptible(
+ tl_event_queue,
+ kbasep_tlstream_packet_pending(
+ &stype,
+ &rb_idx_raw))) {
+ copy_len = -ERESTARTSYS;
+ break;
+ }
+ }
+
+ /* Check if this packet fits into the user buffer.
+ * If so copy its content. */
+ rb_idx = rb_idx_raw % PACKET_COUNT;
+ rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size);
+ if (rb_size > size - copy_len)
+ break;
+ if (copy_to_user(
+ &buffer[copy_len],
+ tl_stream[stype]->buffer[rb_idx].data,
+ rb_size)) {
+ copy_len = -EFAULT;
+ break;
+ }
+
+ /* Verify if there was no overflow in selected stream. Make sure
+ * that if incorrect size was used we will know about it. */
+ smp_rmb();
+ if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) {
+ copy_len += rb_size;
+ atomic_inc(&tl_stream[stype]->rbi);
+
+#if MALI_UNIT_TEST
+ atomic_add(rb_size, &tlstream_bytes_collected);
+#endif /* MALI_UNIT_TEST */
+ }
+ }
+
+ mutex_unlock(&tl_reader_lock);
+
+ return copy_len;
+}
+
+/**
+ * kbasep_tlstream_poll - poll timeline stream for packets
+ * @filp: pointer to file structure
+ * @wait: pointer to poll table
+ * Return: POLLIN if data can be read without blocking, otherwise zero
+ */
+static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait)
+{
+ enum tl_stream_type stream_type;
+ unsigned int rb_idx;
+
+ KBASE_DEBUG_ASSERT(filp);
+ KBASE_DEBUG_ASSERT(wait);
+
+ poll_wait(filp, &tl_event_queue, wait);
+ if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx))
+ return POLLIN;
+ return 0;
+}
+
+/**
+ * kbasep_tlstream_release - release timeline stream descriptor
+ * @inode: pointer to inode structure
+ * @filp: pointer to file structure
+ *
+ * Return always return zero
+ */
+static int kbasep_tlstream_release(struct inode *inode, struct file *filp)
+{
+ KBASE_DEBUG_ASSERT(inode);
+ KBASE_DEBUG_ASSERT(filp);
+ CSTD_UNUSED(inode);
+ CSTD_UNUSED(filp);
+ atomic_set(&tlstream_busy, 0);
+ return 0;
+}
+
+/**
+ * kbasep_tlstream_timeline_header - prepare timeline header stream packet
+ * @stream_type: type of the stream that will carry header data
+ * @tp_desc: pointer to array with tracepoint descriptors
+ * @tp_count: number of descriptors in the given array
+ *
+ * Functions fills in information about tracepoints stored in body stream
+ * associated with this header stream.
+ */
+static void kbasep_tlstream_timeline_header(
+ enum tl_stream_type stream_type,
+ const struct tp_desc *tp_desc,
+ u32 tp_count)
+{
+ const u8 tv = KBASEP_TLSTREAM_VERSION; /* tlstream version */
+ const u8 ps = sizeof(void *); /* pointer size */
+ size_t msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count);
+ char *buffer;
+ size_t pos = 0;
+ unsigned long flags;
+ unsigned int i;
+
+ KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type);
+ KBASE_DEBUG_ASSERT(tp_desc);
+
+ /* Calculate the size of the timeline message. */
+ for (i = 0; i < tp_count; i++) {
+ msg_size += sizeof(tp_desc[i].id);
+ msg_size +=
+ strnlen(tp_desc[i].id_str, STRLEN_MAX) +
+ sizeof(char) + sizeof(u32);
+ msg_size +=
+ strnlen(tp_desc[i].name, STRLEN_MAX) +
+ sizeof(char) + sizeof(u32);
+ msg_size +=
+ strnlen(tp_desc[i].arg_types, STRLEN_MAX) +
+ sizeof(char) + sizeof(u32);
+ msg_size +=
+ strnlen(tp_desc[i].arg_names, STRLEN_MAX) +
+ sizeof(char) + sizeof(u32);
+ }
+
+ KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size);
+
+ buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv));
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &tp_count, sizeof(tp_count));
+
+ for (i = 0; i < tp_count; i++) {
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos,
+ &tp_desc[i].id, sizeof(tp_desc[i].id));
+ pos = kbasep_tlstream_write_string(
+ buffer, pos,
+ tp_desc[i].id_str, msg_size - pos);
+ pos = kbasep_tlstream_write_string(
+ buffer, pos,
+ tp_desc[i].name, msg_size - pos);
+ pos = kbasep_tlstream_write_string(
+ buffer, pos,
+ tp_desc[i].arg_types, msg_size - pos);
+ pos = kbasep_tlstream_write_string(
+ buffer, pos,
+ tp_desc[i].arg_names, msg_size - pos);
+ }
+
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(stream_type, flags);
+
+ /* We don't expect any more data to be read in this stream.
+ * As header stream must be read before its associated body stream,
+ * make this packet visible to the user straightaway. */
+ kbasep_tlstream_flush_stream(stream_type);
+}
+
+/*****************************************************************************/
+
+int kbase_tlstream_init(void)
+{
+ enum tl_stream_type i;
+ int rcode;
+
+ /* Prepare stream structures. */
+ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+ tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL);
+ if (!tl_stream[i])
+ break;
+ kbasep_timeline_stream_init(tl_stream[i], i);
+ }
+ if (TL_STREAM_TYPE_COUNT > i) {
+ for (; i > 0; i--) {
+ kbasep_timeline_stream_term(tl_stream[i - 1]);
+ kfree(tl_stream[i - 1]);
+ }
+ return -ENOMEM;
+ }
+
+ /* Initialize autoflush timer. */
+ atomic_set(&autoflush_timer_active, 1);
+ setup_timer(&autoflush_timer,
+ kbasep_tlstream_autoflush_timer_callback,
+ 0);
+ rcode = mod_timer(
+ &autoflush_timer,
+ jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL));
+ CSTD_UNUSED(rcode);
+
+ return 0;
+}
+
+void kbase_tlstream_term(void)
+{
+ enum tl_stream_type i;
+
+ atomic_set(&autoflush_timer_active, 0);
+ del_timer_sync(&autoflush_timer);
+
+ for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) {
+ kbasep_timeline_stream_term(tl_stream[i]);
+ kfree(tl_stream[i]);
+ }
+}
+
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd)
+{
+ if (0 == atomic_cmpxchg(&tlstream_busy, 0, 1)) {
+ *fd = anon_inode_getfd(
+ "[mali_tlstream]",
+ &kbasep_tlstream_fops,
+ kctx,
+ O_RDONLY | O_CLOEXEC);
+ if (0 > *fd) {
+ atomic_set(&tlstream_busy, 0);
+ return *fd;
+ }
+
+ /* Reset and initialize header streams. */
+ kbasep_timeline_stream_reset(
+ tl_stream[TL_STREAM_TYPE_OBJ_HEADER]);
+ kbasep_timeline_stream_reset(
+ tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]);
+ kbasep_timeline_stream_reset(
+ tl_stream[TL_STREAM_TYPE_AUX_HEADER]);
+ kbasep_tlstream_timeline_header(
+ TL_STREAM_TYPE_OBJ_HEADER,
+ tp_desc_obj,
+ ARRAY_SIZE(tp_desc_obj));
+ kbasep_tlstream_timeline_header(
+ TL_STREAM_TYPE_AUX_HEADER,
+ tp_desc_aux,
+ ARRAY_SIZE(tp_desc_aux));
+ } else {
+ *fd = -EBUSY;
+ }
+
+ return 0;
+}
+
+void kbase_tlstream_flush_streams(void)
+{
+ enum tl_stream_type stype;
+
+ for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++)
+ kbasep_tlstream_flush_stream(stype);
+}
+
+void kbase_tlstream_reset_body_streams(void)
+{
+ kbasep_timeline_stream_reset(
+ tl_stream[TL_STREAM_TYPE_OBJ]);
+ kbasep_timeline_stream_reset(
+ tl_stream[TL_STREAM_TYPE_AUX]);
+}
+
+#if MALI_UNIT_TEST
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated)
+{
+ KBASE_DEBUG_ASSERT(bytes_collected);
+ KBASE_DEBUG_ASSERT(bytes_generated);
+ *bytes_collected = atomic_read(&tlstream_bytes_collected);
+ *bytes_generated = atomic_read(&tlstream_bytes_generated);
+}
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr)
+{
+ const u32 msg_id = KBASE_TL_NEW_CTX;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ_SUMMARY,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &context, sizeof(context));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &nr, sizeof(nr));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count)
+{
+ const u32 msg_id = KBASE_TL_NEW_GPU;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) +
+ sizeof(core_count);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ_SUMMARY,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &gpu, sizeof(gpu));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &id, sizeof(id));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &core_count, sizeof(core_count));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn)
+{
+ const u32 msg_id = KBASE_TL_NEW_LPU;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) +
+ sizeof(fn);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ_SUMMARY,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &lpu, sizeof(lpu));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &nr, sizeof(nr));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &fn, sizeof(fn));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu)
+{
+ const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ_SUMMARY,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &lpu, sizeof(lpu));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &gpu, sizeof(gpu));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags);
+}
+
+/*****************************************************************************/
+
+void kbase_tlstream_tl_new_ctx(void *context, u32 nr)
+{
+ const u32 msg_id = KBASE_TL_NEW_CTX;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &context, sizeof(context));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &nr, sizeof(nr));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_new_atom(void *atom, u32 nr)
+{
+ const u32 msg_id = KBASE_TL_NEW_ATOM;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(nr);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &atom, sizeof(atom));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &nr, sizeof(nr));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_del_ctx(void *context)
+{
+ const u32 msg_id = KBASE_TL_DEL_CTX;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(context);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &context, sizeof(context));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_del_atom(void *atom)
+{
+ const u32 msg_id = KBASE_TL_DEL_ATOM;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(atom);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &atom, sizeof(atom));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context)
+{
+ const u32 msg_id = KBASE_TL_RET_GPU_CTX;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &gpu, sizeof(gpu));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &context, sizeof(context));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context)
+{
+ const u32 msg_id = KBASE_TL_RET_ATOM_CTX;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &atom, sizeof(atom));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &context, sizeof(context));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu)
+{
+ const u32 msg_id = KBASE_TL_RET_ATOM_LPU;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &atom, sizeof(atom));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &lpu, sizeof(lpu));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context)
+{
+ const u32 msg_id = KBASE_TL_NRET_GPU_CTX;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &gpu, sizeof(gpu));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &context, sizeof(context));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context)
+{
+ const u32 msg_id = KBASE_TL_NRET_ATOM_CTX;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &atom, sizeof(atom));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &context, sizeof(context));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu)
+{
+ const u32 msg_id = KBASE_TL_NRET_ATOM_LPU;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_OBJ,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &atom, sizeof(atom));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &lpu, sizeof(lpu));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags);
+}
+
+/*****************************************************************************/
+
+void kbase_tlstream_aux_pm_state(u32 core_type, u64 state)
+{
+ const u32 msg_id = KBASE_AUX_PM_STATE;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(core_type) +
+ sizeof(state);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_AUX,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &core_type, sizeof(core_type));
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void kbase_tlstream_aux_job_softstop(u32 js_id)
+{
+ const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(js_id);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_AUX,
+ msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &js_id, sizeof(js_id));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count)
+{
+ const u32 msg_id = KBASE_AUX_PAGEFAULT;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(mmu_as) +
+ sizeof(page_count);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_AUX, msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &mmu_as, sizeof(mmu_as));
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos, &page_count, sizeof(page_count));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
+void kbase_tlstream_aux_pagesalloc(s64 page_count_change)
+{
+ const u32 msg_id = KBASE_AUX_PAGESALLOC;
+ const size_t msg_size =
+ sizeof(msg_id) + sizeof(u64) + sizeof(page_count_change);
+ unsigned long flags;
+ char *buffer;
+ size_t pos = 0;
+
+ buffer = kbasep_tlstream_msgbuf_acquire(
+ TL_STREAM_TYPE_AUX, msg_size, &flags);
+ KBASE_DEBUG_ASSERT(buffer);
+
+ pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+ pos = kbasep_tlstream_write_timestamp(buffer, pos);
+ pos = kbasep_tlstream_write_bytes(
+ buffer, pos,
+ &page_count_change, sizeof(page_count_change));
+ KBASE_DEBUG_ASSERT(msg_size == pos);
+
+ kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags);
+}
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
new file mode 100644
index 00000000000..494f5c784f6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h
@@ -0,0 +1,284 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#if !defined(_KBASE_TLSTREAM_H)
+#define _KBASE_TLSTREAM_H
+
+#include <mali_kbase.h>
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_init - initialize timeline infrastructure in kernel
+ * Return: zero on success, negative number on error
+ */
+int kbase_tlstream_init(void);
+
+/**
+ * kbase_tlstream_term - terminate timeline infrastructure in kernel
+ *
+ * Timeline need have to been previously enabled with kbase_tlstream_init().
+ */
+void kbase_tlstream_term(void);
+
+/**
+ * kbase_tlstream_acquire - acquire timeline stream file descriptor
+ * @kctx: kernel common context
+ * @fd: timeline stream file descriptor
+ *
+ * This descriptor is meant to be used by userspace timeline to gain access to
+ * kernel timeline stream. This stream is later broadcasted by user space to the
+ * timeline client.
+ * Only one entity can own the descriptor at any given time. Descriptor shall be
+ * closed if unused. If descriptor cannot be obtained (i.e. when it is already
+ * being used) argument fd will contain negative value.
+ *
+ * Return: zero on success (this does not necessarily mean that stream
+ * descriptor could be returned), negative number on error
+ */
+int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd);
+
+/**
+ * kbase_tlstream_flush_streams - flush timeline streams.
+ *
+ * Function will flush pending data in all timeline streams.
+ */
+void kbase_tlstream_flush_streams(void);
+
+/**
+ * kbase_tlstream_reset_body_streams - reset timeline body streams.
+ *
+ * Function will discard pending data in all timeline body streams.
+ */
+void kbase_tlstream_reset_body_streams(void);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_tlstream_test - start timeline stream data generator
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay in milliseconds between trace points written by one
+ * writer
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg: if non-zero aux messages will be included
+ *
+ * This test starts a requested number of asynchronous writers in both IRQ and
+ * thread context. Each writer will generate required number of test
+ * tracepoints (tracepoints with embedded information about writer that
+ * should be verified by user space reader). Tracepoints will be emitted in
+ * all timeline body streams. If aux_msg is non-zero writer will also
+ * generate not testable tracepoints (tracepoints without information about
+ * writer). These tracepoints are used to check correctness of remaining
+ * timeline message generating functions. Writer will wait requested time
+ * between generating another set of messages. This call blocks until all
+ * writers finish.
+ */
+void kbase_tlstream_test(
+ unsigned int tpw_count,
+ unsigned int msg_delay,
+ unsigned int msg_count,
+ int aux_msg);
+
+/**
+ * kbase_tlstream_stats - read timeline stream statistics
+ * @bytes_collected: will hold number of bytes read by the user
+ * @bytes_generated: will hold number of bytes generated by trace points
+ */
+void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated);
+#endif /* MALI_UNIT_TEST */
+
+/*****************************************************************************/
+
+/**
+ * kbase_tlstream_tl_summary_new_ctx - create context object in timeline
+ * summary
+ * @context: name of the context object
+ * @nr: context number
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ * This message is directed to timeline summary stream.
+ */
+void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr);
+
+/**
+ * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary
+ * @gpu: name of the GPU object
+ * @id: id value of this GPU
+ * @core_count: number of cores this GPU hosts
+ *
+ * Function emits a timeline message informing about GPU creation. GPU is
+ * created with two attributes: id and core count.
+ * This message is directed to timeline summary stream.
+ */
+void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count);
+
+/**
+ * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary
+ * @lpu: name of the Logical Processing Unit object
+ * @nr: sequential number assigned to this LPU
+ * @fn: property describing this LPU's functional abilities
+ *
+ * Function emits a timeline message informing about LPU creation. LPU is
+ * created with two attributes: number linking this LPU with GPU's job slot
+ * and function bearing information about this LPU abilities.
+ * This message is directed to timeline summary stream.
+ */
+void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn);
+
+/**
+ * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU
+ * @lpu: name of the Logical Processing Unit object
+ * @gpu: name of the GPU object
+ *
+ * Function emits a timeline message informing that LPU object shall be deleted
+ * along with GPU object.
+ * This message is directed to timeline summary stream.
+ */
+void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu);
+
+/**
+ * kbase_tlstream_tl_new_ctx - create context object in timeline
+ * @context: name of the context object
+ * @nr: context number
+ *
+ * Function emits a timeline message informing about context creation. Context
+ * is created with context number (its attribute), that can be used to link
+ * kbase context with userspace context.
+ */
+void kbase_tlstream_tl_new_ctx(void *context, u32 nr);
+
+/**
+ * kbase_tlstream_tl_new_atom - create atom object in timeline
+ * @atom: name of the atom object
+ * @nr: sequential number assigned to this atom
+ *
+ * Function emits a timeline message informing about atom creation. Atom is
+ * created with atom number (its attribute) that links it with actual work
+ * bucket id understood by hardware.
+ */
+void kbase_tlstream_tl_new_atom(void *atom, u32 nr);
+
+/**
+ * kbase_tlstream_tl_del_ctx - destroy context object in timeline
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that context object ceased to
+ * exist.
+ */
+void kbase_tlstream_tl_del_ctx(void *context);
+
+/**
+ * kbase_tlstream_tl_del_atom - destroy atom object in timeline
+ * @atom: name of the atom object
+ *
+ * Function emits a timeline message informing that atom object ceased to
+ * exist.
+ */
+void kbase_tlstream_tl_del_atom(void *atom);
+
+/**
+ * kbase_tlstream_tl_ret_gpu_ctx - retain GPU by context
+ * @gpu: name of the GPU object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that GPU object is being held
+ * by context and must not be deleted unless it is released.
+ */
+void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context);
+
+/**
+ * kbase_tlstream_tl_ret_atom_ctx - retain atom by context
+ * @atom: name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by context and must not be deleted unless it is released.
+ */
+void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context);
+
+/**
+ * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU
+ * @atom: name of the atom object
+ * @lpu: name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being held
+ * by LPU and must not be deleted unless it is released.
+ */
+void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu);
+
+/**
+ * kbase_tlstream_tl_nret_gpu_ctx - release GPU by context
+ * @gpu: name of the GPU object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that GPU object is being released
+ * by context.
+ */
+void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context);
+
+/**
+ * kbase_tlstream_tl_nret_atom_ctx - release atom by context
+ * @atom: name of the atom object
+ * @context: name of the context object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by context.
+ */
+void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context);
+
+/**
+ * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU
+ * @atom: name of the atom object
+ * @lpu: name of the Logical Processing Unit object
+ *
+ * Function emits a timeline message informing that atom object is being
+ * released by LPU.
+ */
+void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu);
+
+/**
+ * kbase_tlstream_aux_pm_state - timeline message: power management state
+ * @core_type: core type (shader, tiler, l2 cache, l3 cache)
+ * @state: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
+ */
+void kbase_tlstream_aux_pm_state(u32 core_type, u64 state);
+
+/**
+ * kbase_tlstream_aux_job_softstop - soft job stop occurred
+ * @js_id: job slot id
+ */
+void kbase_tlstream_aux_job_softstop(u32 js_id);
+
+/**
+ * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event
+ * resulting in new pages being mapped
+ * @mmu_as: MMU address space number
+ * @page_count: number of currently used pages
+ */
+void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count);
+
+/**
+ * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated
+ * pages is changed
+ * @page_count_change: number of pages to be added or subtracted (according to
+ * the sign)
+ */
+void kbase_tlstream_aux_pagesalloc(s64 page_count_change);
+
+#endif /* _KBASE_TLSTREAM_H */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
new file mode 100644
index 00000000000..e2e0544208c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h
@@ -0,0 +1,264 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE *****
+ * ***** DO NOT INCLUDE DIRECTLY *****
+ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * The purpose of this header file is just to contain a list of trace code idenitifers
+ *
+ * Each identifier is wrapped in a macro, so that its string form and enum form can be created
+ *
+ * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block.
+ *
+ * This allows automatic creation of an enum and a corresponding array of strings
+ *
+ * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE.
+ * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE.
+ *
+ * e.g.:
+ * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X
+ * typedef enum
+ * {
+ * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X )
+ * #include "mali_kbase_trace_defs.h"
+ * #undef KBASE_TRACE_CODE_MAKE_CODE
+ * } kbase_trace_code;
+ *
+ * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE
+ *
+ *
+ * The use of the macro here is:
+ * - KBASE_TRACE_CODE_MAKE_CODE( X )
+ *
+ * Which produces:
+ * - For an enum, KBASE_TRACE_CODE_X
+ * - For a string, "X"
+ *
+ *
+ * For example:
+ * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to:
+ * - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum
+ * - "JM_JOB_COMPLETE" for the string
+ * - To use it to trace an event, do:
+ * - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val );
+ */
+
+#if 0 /* Dummy section to avoid breaking formatting */
+int dummy_array[] = {
+#endif
+
+/*
+ * Core events
+ */
+ /* no info_val, no gpu_addr, no atom */
+ KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY),
+ /* no info_val, no gpu_addr, no atom */
+ KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM),
+ /* info_val == GPU_IRQ_STATUS register */
+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ),
+ /* info_val == bits cleared */
+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR),
+ /* info_val == GPU_IRQ_STATUS register */
+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE),
+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET),
+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET),
+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR),
+ /* GPU addr==dump address */
+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE),
+ KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES),
+/*
+ * Job Slot management events
+ */
+ /* info_val==irq rawstat at start */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ),
+ /* info_val==jobs processed */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END),
+/* In the following:
+ *
+ * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases)
+ * - uatom==kernel-side mapped uatom address (for correlation with user-side)
+ */
+ /* info_val==exit code; gpu_addr==chain gpuaddr */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE),
+ /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT),
+ /* gpu_addr is as follows:
+ * - If JS_STATUS active after soft-stop, val==gpu addr written to
+ * JS_HEAD on submit
+ * - otherwise gpu_addr==0 */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP),
+ KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0),
+ KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1),
+ /* gpu_addr==JS_HEAD read */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP),
+ /* gpu_addr==JS_HEAD read */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0),
+ /* gpu_addr==JS_HEAD read */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1),
+ /* gpu_addr==JS_TAIL read */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD),
+/* gpu_addr is as follows:
+ * - If JS_STATUS active before soft-stop, val==JS_HEAD
+ * - otherwise gpu_addr==0
+ */
+ /* gpu_addr==JS_HEAD read */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD),
+ KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS),
+ KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE),
+ /* info_val == is_scheduled */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED),
+ /* info_val == is_scheduled */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED),
+ KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE),
+ /* info_val == nr jobs submitted */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP),
+ /* gpu_addr==JS_HEAD_NEXT last written */
+ KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT),
+ KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET),
+ KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER),
+ KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER),
+/*
+ * Job dispatch events
+ */
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE),
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER),
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END),
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB),
+ /* gpu_addr==0, info_val==0, uatom==0 */
+ KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT),
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL),
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER),
+/*
+ * Scheduler Core events
+ */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK),
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB),
+ /* gpu_addr==last value written/would be written to JS_HEAD */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX),
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB),
+ /* gpu_addr==value to write into JS_HEAD */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED),
+ /* kctx is the one being evicted, info_val == kctx to put in */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED),
+ /* info_val == lower 32 bits of affinity */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT),
+ /* info_val == lower 32 bits of affinity */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED),
+ /* info_val == lower 32 bits of affinity */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED),
+ /* info_val == lower 32 bits of rechecked affinity */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED),
+ /* info_val == lower 32 bits of rechecked affinity */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED),
+ /* info_val == lower 32 bits of affinity */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE),
+ /* info_val == the ctx attribute now on ctx */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX),
+ /* info_val == the ctx attribute now on runpool */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL),
+ /* info_val == the ctx attribute now off ctx */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX),
+ /* info_val == the ctx attribute now off runpool */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL),
+/*
+ * Scheduler Policy events
+ */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX),
+ /* info_val == whether it was evicted */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ),
+ /* gpu_addr==JS_HEAD to write if the job were run */
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START),
+ KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END),
+/*
+ * Power Management Events
+ */
+ KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
+ /* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */
+ KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF),
+ /* info_val == policy number, or -1 for "Already changing" */
+ KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY),
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY),
+ /* info_val == policy number */
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT),
+ /* info_val == policy number */
+ KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM),
+/* Unused code just to make it easier to not have a comma at the end.
+ * All other codes MUST come before this */
+ KBASE_TRACE_CODE_MAKE_CODE(DUMMY)
+
+#if 0 /* Dummy section to avoid breaking formatting */
+};
+#endif
+
+/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
new file mode 100644
index 00000000000..aac9858875a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c
@@ -0,0 +1,232 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_jm.h>
+#include <mali_kbase_hwaccess_jm.h>
+
+#define CREATE_TRACE_POINTS
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+#include "mali_timeline.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans);
+EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active);
+
+struct kbase_trace_timeline_desc {
+ char *enum_str;
+ char *desc;
+ char *format;
+ char *format_desc;
+};
+
+static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = {
+ #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc }
+ #include "mali_kbase_trace_timeline_defs.h"
+ #undef KBASE_TIMELINE_TRACE_CODE
+};
+
+#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table)
+
+static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos)
+{
+ if (*pos >= KBASE_NR_TRACE_CODES)
+ return NULL;
+
+ return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data)
+{
+}
+
+static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos)
+{
+ (*pos)++;
+
+ if (*pos == KBASE_NR_TRACE_CODES)
+ return NULL;
+
+ return &kbase_trace_timeline_desc_table[*pos];
+}
+
+static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data)
+{
+ struct kbase_trace_timeline_desc *trace_desc = data;
+
+ seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc);
+ return 0;
+}
+
+
+static const struct seq_operations kbasep_trace_timeline_seq_ops = {
+ .start = kbasep_trace_timeline_seq_start,
+ .next = kbasep_trace_timeline_seq_next,
+ .stop = kbasep_trace_timeline_seq_stop,
+ .show = kbasep_trace_timeline_seq_show,
+};
+
+static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &kbasep_trace_timeline_seq_ops);
+}
+
+static const struct file_operations kbasep_trace_timeline_debugfs_fops = {
+ .open = kbasep_trace_timeline_debugfs_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev)
+{
+ debugfs_create_file("mali_timeline_defs",
+ S_IRUGO, kbdev->mali_debugfs_directory, NULL,
+ &kbasep_trace_timeline_debugfs_fops);
+}
+
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+ struct kbase_jd_atom *katom, int js)
+{
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ if (kbdev->timeline.slot_atoms_submitted[js] > 0) {
+ KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1);
+ } else {
+ base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+ KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1);
+ KBASE_TIMELINE_JOB_START(kctx, js, atom_number);
+ }
+ ++kbdev->timeline.slot_atoms_submitted[js];
+
+ KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+ struct kbase_jd_atom *katom, int js,
+ kbasep_js_atom_done_code done_code)
+{
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+
+ if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) {
+ KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0);
+ } else {
+ /* Job finished in JS_HEAD */
+ base_atom_id atom_number = kbase_jd_atom_id(kctx, katom);
+
+ KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0);
+ KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number);
+
+ /* see if we need to trace the job in JS_NEXT moving to JS_HEAD */
+ if (kbase_backend_nr_atoms_submitted(kbdev, js)) {
+ struct kbase_jd_atom *next_katom;
+ struct kbase_context *next_kctx;
+
+ /* Peek the next atom - note that the atom in JS_HEAD will already
+ * have been dequeued */
+ next_katom = kbase_backend_inspect_head(kbdev, js);
+ WARN_ON(!next_katom);
+ next_kctx = next_katom->kctx;
+ KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0);
+ KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1);
+ KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom));
+ }
+ }
+
+ --kbdev->timeline.slot_atoms_submitted[js];
+
+ KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]);
+}
+
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+ int uid = 0;
+ int old_uid;
+
+ /* If a producer already exists for the event, try to use their UID (multiple-producers) */
+ uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]);
+ old_uid = uid;
+
+ /* Get a new non-zero UID if we don't have one yet */
+ while (!uid)
+ uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter);
+
+ /* Try to use this UID */
+ if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid))
+ /* If it changed, raced with another producer: we've lost this UID */
+ uid = 0;
+
+ KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid);
+}
+
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+ int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+ if (uid != 0) {
+ if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+ /* If it changed, raced with another consumer: we've lost this UID */
+ uid = 0;
+
+ KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+ }
+}
+
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+ int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]);
+
+ if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0))
+ /* If it changed, raced with another consumer: we've lost this UID */
+ uid = 0;
+
+ KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid);
+}
+
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->pm.power_change_lock);
+ /* Simply log the start of the transition */
+ kbdev->timeline.l2_transitioning = true;
+ KBASE_TIMELINE_POWERING_L2(kbdev);
+}
+
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->pm.power_change_lock);
+ /* Simply log the end of the transition */
+ if (kbdev->timeline.l2_transitioning) {
+ kbdev->timeline.l2_transitioning = false;
+ KBASE_TIMELINE_POWERED_L2(kbdev);
+ }
+}
+
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
new file mode 100644
index 00000000000..6a3cd407e4a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h
@@ -0,0 +1,356 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_KBASE_TRACE_TIMELINE_H)
+#define _KBASE_TRACE_TIMELINE_H
+
+#ifdef CONFIG_MALI_TRACE_TIMELINE
+
+enum kbase_trace_timeline_code {
+ #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val
+ #include "mali_kbase_trace_timeline_defs.h"
+ #undef KBASE_TIMELINE_TRACE_CODE
+};
+
+/** Initialize Timeline DebugFS entries */
+void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev);
+
+/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE
+ * functions.
+ * Output is timestamped by either sched_clock() (default), local_clock(), or
+ * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */
+#include "mali_timeline.h"
+
+/* Trace number of atoms in flight for kctx (atoms either not completed, or in
+ process of being returned to user */
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec, \
+ (int)kctx->timeline.owner_tgid, \
+ count); \
+ } while (0)
+
+/* Trace atom_id being Ready to Run */
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec, \
+ CTX_FLOW_ATOM_READY, \
+ (int)kctx->timeline.owner_tgid, \
+ atom_id); \
+ } while (0)
+
+/* Trace number of atoms submitted to job slot js
+ *
+ * NOTE: This uses a different tracepoint to the head/next/soft-stop actions,
+ * so that those actions can be filtered out separately from this
+ *
+ * This is because this is more useful, as we can use it to calculate general
+ * utilization easily and accurately */
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec, \
+ SW_SET_GPU_SLOT_ACTIVE, \
+ (int)kctx->timeline.owner_tgid, \
+ js, count); \
+ } while (0)
+
+
+/* Trace atoms present in JS_NEXT */
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \
+ SW_SET_GPU_SLOT_NEXT, \
+ (int)kctx->timeline.owner_tgid, \
+ js, count); \
+ } while (0)
+
+/* Trace atoms present in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \
+ SW_SET_GPU_SLOT_HEAD, \
+ (int)kctx->timeline.owner_tgid, \
+ js, count); \
+ } while (0)
+
+/* Trace that a soft stop/evict from next is being attempted on a slot */
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \
+ SW_SET_GPU_SLOT_STOPPING, \
+ (kctx) ? (int)kctx->timeline.owner_tgid : 0, \
+ js, count); \
+ } while (0)
+
+
+
+/* Trace state of overall GPU power */
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \
+ SW_SET_GPU_POWER_ACTIVE, active); \
+ } while (0)
+
+/* Trace state of tiler power */
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \
+ SW_SET_GPU_POWER_TILER_ACTIVE, \
+ hweight64(bitmap)); \
+ } while (0)
+
+/* Trace number of shaders currently powered */
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \
+ SW_SET_GPU_POWER_SHADER_ACTIVE, \
+ hweight64(bitmap)); \
+ } while (0)
+
+/* Trace state of L2 power */
+#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \
+ SW_SET_GPU_POWER_L2_ACTIVE, \
+ hweight64(bitmap)); \
+ } while (0)
+
+/* Trace state of L2 cache*/
+#define KBASE_TIMELINE_POWERING_L2(kbdev) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \
+ SW_FLOW_GPU_POWER_L2_POWERING, \
+ 1); \
+ } while (0)
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \
+ SW_FLOW_GPU_POWER_L2_ACTIVE, \
+ 1); \
+ } while (0)
+
+/* Trace kbase_pm_send_event message send */
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \
+ SW_FLOW_PM_SEND_EVENT, \
+ event_type, pm_event_id); \
+ } while (0)
+
+/* Trace kbase_pm_worker message receive */
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \
+ SW_FLOW_PM_HANDLE_EVENT, \
+ event_type, pm_event_id); \
+ } while (0)
+
+
+/* Trace atom_id starting in JS_HEAD */
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \
+ HW_START_GPU_JOB_CHAIN_SW_APPROX, \
+ (int)kctx->timeline.owner_tgid, \
+ js, _consumerof_atom_number); \
+ } while (0)
+
+/* Trace atom_id stopping on JS_HEAD */
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \
+ HW_STOP_GPU_JOB_CHAIN_SW_APPROX, \
+ (int)kctx->timeline.owner_tgid, \
+ js, _producerof_atom_number_completed); \
+ } while (0)
+
+/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a
+ * certin caller */
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec, \
+ trace_code, 1); \
+ } while (0)
+
+/* Trace number of contexts active */
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) \
+ do { \
+ struct timespec ts; \
+ getrawmonotonic(&ts); \
+ trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec, \
+ count); \
+ } while (0)
+
+
+/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */
+
+/**
+ * Trace that an atom is starting on a job slot
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+ struct kbase_jd_atom *katom, int js);
+
+/**
+ * Trace that an atom has done on a job slot
+ *
+ * 'Done' in this sense can occur either because:
+ * - the atom in JS_HEAD finished
+ * - the atom in JS_NEXT was evicted
+ *
+ * Whether the atom finished or was evicted is passed in @a done_code
+ *
+ * It is assumed that the atom has already been removed from the submit slot,
+ * with either:
+ * - kbasep_jm_dequeue_submit_slot()
+ * - kbasep_jm_dequeue_tail_submit_slot()
+ *
+ * The caller must be holding kbasep_js_device_data::runpool_irq::lock
+ */
+void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+ struct kbase_jd_atom *katom, int js,
+ kbasep_js_atom_done_code done_code);
+
+
+/** Trace a pm event starting */
+void kbase_timeline_pm_send_event(struct kbase_device *kbdev,
+ enum kbase_timeline_pm_event event_sent);
+
+/** Trace a pm event finishing */
+void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Check whether a pm event was present, and if so trace finishing it */
+void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event);
+
+/** Trace L2 power-up start */
+void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev);
+
+/** Trace L2 power-up done */
+void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev);
+
+#else
+
+#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP()
+
+#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_POWERED_L2(kbdev) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP()
+
+#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP()
+
+#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP()
+
+#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP()
+
+static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx,
+ struct kbase_jd_atom *katom, int js)
+{
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx,
+ struct kbase_jd_atom *katom, int js,
+ kbasep_js_atom_done_code done_code)
+{
+ lockdep_assert_held(&kbdev->js_data.runpool_irq.lock);
+}
+
+static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent)
+{
+}
+
+static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev)
+{
+}
+
+static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev)
+{
+}
+#endif /* CONFIG_MALI_TRACE_TIMELINE */
+
+#endif /* _KBASE_TRACE_TIMELINE_H */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
new file mode 100644
index 00000000000..a7dc3faf4c7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h
@@ -0,0 +1,134 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE *****
+ * ***** DO NOT INCLUDE DIRECTLY *****
+ * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */
+
+/*
+ * Conventions on Event Names:
+ *
+ * - The prefix determines something about how the timeline should be
+ * displayed, and is split up into various parts, separated by underscores:
+ * - 'SW' and 'HW' as the first part will be used to determine whether a
+ * timeline is to do with Software or Hardware - effectively, separate
+ * 'channels' for Software and Hardware
+ * - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and
+ * signify related pairs of events - these are optional.
+ * - 'FLOW' indicates a generic event, which can use dependencies
+ * - This gives events such as:
+ * - 'SW_ENTER_FOO'
+ * - 'SW_LEAVE_FOO'
+ * - 'SW_FLOW_BAR_1'
+ * - 'SW_FLOW_BAR_2'
+ * - 'HW_START_BAZ'
+ * - 'HW_STOP_BAZ'
+ * - And an unadorned HW event:
+ * - 'HW_BAZ_FROZBOZ'
+ */
+
+/*
+ * Conventions on parameter names:
+ * - anything with 'instance' in the name will have a separate timeline based
+ * on that instances.
+ * - underscored-prefixed parameters will by hidden by default on timelines
+ *
+ * Hence:
+ * - Different job slots have their own 'instance', based on the instance value
+ * - Per-context info (e.g. atoms on a context) have their own 'instance'
+ * (i.e. each context should be on a different timeline)
+ *
+ * Note that globally-shared resources can be tagged with a tgid, but we don't
+ * want an instance per context:
+ * - There's no point having separate Job Slot timelines for each context, that
+ * would be confusing - there's only really 3 job slots!
+ * - There's no point having separate Shader-powered timelines for each
+ * context, that would be confusing - all shader cores (whether it be 4, 8,
+ * etc) are shared in the system.
+ */
+
+ /*
+ * CTX events
+ */
+ /* Separate timelines for each context 'instance'*/
+ KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT, "CTX: Atoms in flight", "%d,%d", "_instance_tgid,_value_number_of_atoms"),
+ KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY, "CTX: Atoms Ready to Run", "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"),
+
+ /*
+ * SW Events
+ */
+ /* Separate timelines for each slot 'instance' */
+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE, "SW: GPU slot active", "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"),
+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT, "SW: GPU atom in NEXT", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"),
+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD, "SW: GPU atom in HEAD", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"),
+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING, "SW: Try Soft-Stop on GPU slot", "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"),
+ /* Shader and overall power is shared - can't have separate instances of
+ * it, just tagging with the context */
+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE, "SW: GPU power active", "%d,%d", "_tgid,_value_is_power_active"),
+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE, "SW: GPU tiler powered", "%d,%d", "_tgid,_value_number_of_tilers"),
+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered", "%d,%d", "_tgid,_value_number_of_shaders"),
+ KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powered", "%d,%d", "_tgid,_value_number_of_l2"),
+
+ /* SW Power event messaging. _event_type is one from the kbase_pm_event enum */
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT, "SW: PM Send Event", "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT, "SW: PM Handle Event", "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"),
+ /* SW L2 power events */
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING, "SW: GPU L2 powering", "%d,%d", "_tgid,_writerof_l2_transitioning"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powering done", "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"),
+
+ KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE, "SW: Context Active", "%d,%d", "_tgid,_value_active"),
+
+ /*
+ * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock()
+ */
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"),
+
+ /*
+ * Significant Indirect callers of kbase_pm_check_transitions_nolock()
+ */
+ /* kbase_pm_request_cores */
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"),
+ /* kbase_pm_release_cores */
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+ KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"),
+ /*
+ * END: SW Functions that call kbase_pm_check_transitions_nolock()
+ */
+
+ /*
+ * HW Events
+ */
+ KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX, "HW: Job Chain start (SW approximated)", "%d,%d,%d", "_tgid,job_slot,_consumerof_atom_number_ready"),
+ KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX, "HW: Job Chain stop (SW approximated)", "%d,%d,%d", "_tgid,job_slot,_producerof_atom_number_completed")
diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h
new file mode 100644
index 00000000000..5139014ec2c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h
@@ -0,0 +1,561 @@
+/*
+ *
+ * (C) COPYRIGHT 2008-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UKU_H_
+#define _KBASE_UKU_H_
+
+#include "mali_uk.h"
+#include "mali_base_kernel.h"
+
+/* This file needs to support being included from kernel and userside (which use different defines) */
+#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON
+#define SUPPORT_MALI_ERROR_INJECT
+#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */
+#if defined(CONFIG_MALI_NO_MALI)
+#define SUPPORT_MALI_NO_MALI
+#elif defined(MALI_NO_MALI)
+#if MALI_NO_MALI
+#define SUPPORT_MALI_NO_MALI
+#endif
+#endif
+
+#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+
+#include "mali_kbase_gpuprops_types.h"
+
+#define BASE_UK_VERSION_MAJOR 9
+#define BASE_UK_VERSION_MINOR 0
+
+struct kbase_uk_mem_alloc {
+ union uk_header header;
+ /* IN */
+ u64 va_pages;
+ u64 commit_pages;
+ u64 extent;
+ /* IN/OUT */
+ u64 flags;
+ /* OUT */
+ u64 gpu_va;
+ u16 va_alignment;
+ u8 padding[6];
+};
+
+struct kbase_uk_mem_free {
+ union uk_header header;
+ /* IN */
+ u64 gpu_addr;
+ /* OUT */
+};
+
+struct kbase_uk_mem_alias {
+ union uk_header header;
+ /* IN/OUT */
+ u64 flags;
+ /* IN */
+ u64 stride;
+ u64 nents;
+ union kbase_pointer ai;
+ /* OUT */
+ u64 gpu_va;
+ u64 va_pages;
+};
+
+struct kbase_uk_mem_import {
+ union uk_header header;
+ /* IN */
+ union kbase_pointer phandle;
+ u32 type;
+ u32 padding;
+ /* IN/OUT */
+ u64 flags;
+ /* OUT */
+ u64 gpu_va;
+ u64 va_pages;
+};
+
+struct kbase_uk_mem_flags_change {
+ union uk_header header;
+ /* IN */
+ u64 gpu_va;
+ u64 flags;
+ u64 mask;
+};
+
+struct kbase_uk_job_submit {
+ union uk_header header;
+ /* IN */
+ union kbase_pointer addr;
+ u32 nr_atoms;
+ u32 stride; /* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */
+ /* OUT */
+};
+
+struct kbase_uk_post_term {
+ union uk_header header;
+};
+
+struct kbase_uk_dump_fault_term {
+ union uk_header header;
+};
+
+struct kbase_uk_sync_now {
+ union uk_header header;
+
+ /* IN */
+ struct base_syncset sset;
+
+ /* OUT */
+};
+
+struct kbase_uk_hwcnt_setup {
+ union uk_header header;
+
+ /* IN */
+ u64 dump_buffer;
+ u32 jm_bm;
+ u32 shader_bm;
+ u32 tiler_bm;
+ u32 unused_1; /* keep for backwards compatibility */
+ u32 mmu_l2_bm;
+ u32 padding;
+ /* OUT */
+};
+
+struct kbase_uk_hwcnt_dump {
+ union uk_header header;
+};
+
+struct kbase_uk_hwcnt_clear {
+ union uk_header header;
+};
+
+struct kbase_uk_fence_validate {
+ union uk_header header;
+ /* IN */
+ s32 fd;
+ u32 padding;
+ /* OUT */
+};
+
+struct kbase_uk_stream_create {
+ union uk_header header;
+ /* IN */
+ char name[32];
+ /* OUT */
+ s32 fd;
+ u32 padding;
+};
+
+#ifdef BASE_LEGACY_UK7_SUPPORT
+/**
+ * This structure is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+#define BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN ((u32)0x00000001)
+struct base_cpu_id_props {
+ /**
+ * CPU ID
+ */
+ u32 id;
+
+ /**
+ * CPU Part number
+ */
+ u16 part;
+ /**
+ * ASCII code of implementer trademark
+ */
+ u8 implementer;
+
+ /**
+ * CPU Variant
+ */
+ u8 variant;
+ /**
+ * CPU Architecture
+ */
+ u8 arch;
+
+ /**
+ * CPU revision
+ */
+ u8 rev;
+
+ /**
+ * Validity of CPU id where 0-invalid and
+ * 1-valid only if ALL the cpu_id props are valid
+ */
+ u8 valid;
+
+ u8 padding[1];
+};
+
+/**
+ * This structure is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+struct base_cpu_props {
+ u32 nr_cores; /**< Number of CPU cores */
+
+ /**
+ * CPU page size as a Logarithm to Base 2. The compile-time
+ * equivalent is @ref OSU_CONFIG_CPU_PAGE_SIZE_LOG2
+ */
+ u32 cpu_page_size_log2;
+
+ /**
+ * CPU L1 Data cache line size as a Logarithm to Base 2. The compile-time
+ * equivalent is @ref OSU_CONFIG_CPU_L1_DCACHE_LINE_SIZE_LOG2.
+ */
+ u32 cpu_l1_dcache_line_size_log2;
+
+ /**
+ * CPU L1 Data cache size, in bytes. The compile-time equivalient is
+ * @ref OSU_CONFIG_CPU_L1_DCACHE_SIZE.
+ *
+ * This CPU Property is mainly provided to implement OpenCL's
+ * clGetDeviceInfo(), which allows the CL_DEVICE_GLOBAL_MEM_CACHE_SIZE
+ * hint to be queried.
+ */
+ u32 cpu_l1_dcache_size;
+
+ /**
+ * CPU Property Flags bitpattern.
+ *
+ * This is a combination of bits as specified by the macros prefixed with
+ * 'BASE_CPU_PROPERTY_FLAG_'.
+ */
+ u32 cpu_flags;
+
+ /**
+ * Maximum clock speed in MHz.
+ * @usecase 'Maximum' CPU Clock Speed information is required by OpenCL's
+ * clGetDeviceInfo() function for the CL_DEVICE_MAX_CLOCK_FREQUENCY hint.
+ */
+ u32 max_cpu_clock_speed_mhz;
+
+ /**
+ * @brief Total memory, in bytes.
+ *
+ * This is the theoretical maximum memory available to the CPU. It is
+ * unlikely that a client will be able to allocate all of this memory for
+ * their own purposes, but this at least provides an upper bound on the
+ * memory available to the CPU.
+ *
+ * This is required for OpenCL's clGetDeviceInfo() call when
+ * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL CPU devices.
+ */
+ u64 available_memory_size;
+
+ /**
+ * CPU ID detailed info
+ */
+ struct base_cpu_id_props cpu_id;
+
+ u32 padding;
+};
+
+/**
+ * This structure is kept for the backward compatibility reasons.
+ * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE
+ * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call
+ * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having
+ * the function for reading cpu properties moved from base to osu.
+ */
+struct kbase_uk_cpuprops {
+ union uk_header header;
+
+ /* IN */
+ struct base_cpu_props props;
+ /* OUT */
+};
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+
+struct kbase_uk_gpuprops {
+ union uk_header header;
+
+ /* IN */
+ struct mali_base_gpu_props props;
+ /* OUT */
+};
+
+struct kbase_uk_mem_query {
+ union uk_header header;
+ /* IN */
+ u64 gpu_addr;
+#define KBASE_MEM_QUERY_COMMIT_SIZE 1
+#define KBASE_MEM_QUERY_VA_SIZE 2
+#define KBASE_MEM_QUERY_FLAGS 3
+ u64 query;
+ /* OUT */
+ u64 value;
+};
+
+struct kbase_uk_mem_commit {
+ union uk_header header;
+ /* IN */
+ u64 gpu_addr;
+ u64 pages;
+ /* OUT */
+ u32 result_subcode;
+ u32 padding;
+};
+
+struct kbase_uk_find_cpu_offset {
+ union uk_header header;
+ /* IN */
+ u64 gpu_addr;
+ u64 cpu_addr;
+ u64 size;
+ /* OUT */
+ u64 offset;
+};
+
+#define KBASE_GET_VERSION_BUFFER_SIZE 64
+struct kbase_uk_get_ddk_version {
+ union uk_header header;
+ /* OUT */
+ char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE];
+ u32 version_string_size;
+ u32 padding;
+};
+
+struct kbase_uk_disjoint_query {
+ union uk_header header;
+ /* OUT */
+ u32 counter;
+ u32 padding;
+};
+
+struct kbase_uk_set_flags {
+ union uk_header header;
+ /* IN */
+ u32 create_flags;
+ u32 padding;
+};
+
+#if MALI_UNIT_TEST
+#define TEST_ADDR_COUNT 4
+#define KBASE_TEST_BUFFER_SIZE 128
+struct kbase_exported_test_data {
+ u64 test_addr[TEST_ADDR_COUNT]; /**< memory address */
+ u32 test_addr_pages[TEST_ADDR_COUNT]; /**< memory size in pages */
+ union kbase_pointer kctx; /**< base context created by process */
+ union kbase_pointer mm; /**< pointer to process address space */
+ u8 buffer1[KBASE_TEST_BUFFER_SIZE]; /**< unit test defined parameter */
+ u8 buffer2[KBASE_TEST_BUFFER_SIZE]; /**< unit test defined parameter */
+};
+
+struct kbase_uk_set_test_data {
+ union uk_header header;
+ /* IN */
+ struct kbase_exported_test_data test_data;
+};
+
+#endif /* MALI_UNIT_TEST */
+
+#ifdef SUPPORT_MALI_ERROR_INJECT
+struct kbase_uk_error_params {
+ union uk_header header;
+ /* IN */
+ struct kbase_error_params params;
+};
+#endif /* SUPPORT_MALI_ERROR_INJECT */
+
+#ifdef SUPPORT_MALI_NO_MALI
+struct kbase_uk_model_control_params {
+ union uk_header header;
+ /* IN */
+ struct kbase_model_control_params params;
+};
+#endif /* SUPPORT_MALI_NO_MALI */
+
+#define KBASE_MAXIMUM_EXT_RESOURCES 255
+
+struct kbase_uk_ext_buff_kds_data {
+ union uk_header header;
+ union kbase_pointer external_resource;
+ union kbase_pointer file_descriptor;
+ u32 num_res; /* limited to KBASE_MAXIMUM_EXT_RESOURCES */
+ u32 padding;
+};
+
+struct kbase_uk_profiling_controls {
+ union uk_header header;
+ u32 profiling_controls[FBDUMP_CONTROL_MAX];
+};
+
+struct kbase_uk_debugfs_mem_profile_add {
+ union uk_header header;
+ u32 len;
+ union kbase_pointer buf;
+};
+
+struct kbase_uk_context_id {
+ union uk_header header;
+ /* OUT */
+ int id;
+};
+
+#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \
+ defined(CONFIG_MALI_MIPE_ENABLED)
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header: UK structure header
+ * @fd: timeline stream file descriptor
+ *
+ * This structure is used used when performing a call to acquire kernel side
+ * timeline stream file descriptor.
+ */
+struct kbase_uk_tlstream_acquire {
+ union uk_header header;
+ /* IN */
+ /* OUT */
+ s32 fd;
+};
+
+/**
+ * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure
+ * @header: UK structure header
+ *
+ * This structure is used when performing a call to flush kernel side
+ * timeline streams.
+ */
+struct kbase_uk_tlstream_flush {
+ union uk_header header;
+ /* IN */
+ /* OUT */
+};
+
+#if MALI_UNIT_TEST
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header: UK structure header
+ * @tpw_count: number of trace point writers in each context
+ * @msg_delay: time delay between tracepoints from one writer in milliseconds
+ * @msg_count: number of trace points written by one writer
+ * @aux_msg: if non-zero aux messages will be included
+ *
+ * This structure is used when performing a call to start timeline stream test
+ * embedded in kernel.
+ */
+struct kbase_uk_tlstream_test {
+ union uk_header header;
+ /* IN */
+ u32 tpw_count;
+ u32 msg_delay;
+ u32 msg_count;
+ u32 aux_msg;
+ /* OUT */
+};
+
+/**
+ * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure
+ * @header: UK structure header
+ * @bytes_collected: number of bytes read by user
+ * @bytes_generated: number of bytes generated by tracepoints
+ *
+ * This structure is used when performing a call to obtain timeline stream
+ * statistics.
+ */
+struct kbase_uk_tlstream_stats {
+ union uk_header header; /**< UK structure header. */
+ /* IN */
+ /* OUT */
+ u32 bytes_collected;
+ u32 bytes_generated;
+};
+#endif /* MALI_UNIT_TEST */
+#endif /* MALI_KTLSTREAM_ENABLED */
+
+enum kbase_uk_function_id {
+ KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0),
+ KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1),
+ KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2),
+ KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3),
+ KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4),
+ KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5),
+ KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6),
+
+#ifdef BASE_LEGACY_UK6_SUPPORT
+ KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7),
+#endif /* BASE_LEGACY_UK6_SUPPORT */
+
+ KBASE_FUNC_SYNC = (UK_FUNC_ID + 8),
+
+ KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9),
+
+ KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10),
+ KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11),
+ KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12),
+
+#ifdef BASE_LEGACY_UK7_SUPPORT
+ KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE = (UK_FUNC_ID + 13),
+#endif /* BASE_LEGACY_UK7_SUPPORT */
+ KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14),
+
+ KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15),
+
+ KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16),
+ KBASE_FUNC_EXT_BUFFER_LOCK = (UK_FUNC_ID + 17),
+ KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18),
+
+ KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19),
+ KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20),
+ KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21),
+
+ KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23),
+ KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24),
+ KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25),
+ KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26),
+ /* to be used only for testing
+ * purposes, otherwise these controls
+ * are set through gator API */
+
+ KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27),
+ KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28),
+ KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29),
+
+ KBASE_FUNC_DUMP_FAULT_TERM = (UK_FUNC_ID + 30),
+
+ KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31),
+
+#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \
+ defined(CONFIG_MALI_MIPE_ENABLED)
+ KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32),
+#if MALI_UNIT_TEST
+ KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33),
+ KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34),
+#endif /* MALI_UNIT_TEST */
+ KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35),
+#endif /* MALI_KTLSTREAM_ENABLED */
+
+ KBASE_FUNC_MAX
+};
+
+#endif /* _KBASE_UKU_H_ */
+
diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.c b/drivers/gpu/arm/midgard/mali_kbase_utility.c
new file mode 100644
index 00000000000..be474ff8740
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.c
@@ -0,0 +1,33 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <mali_kbase.h>
+
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry)
+{
+ struct list_head *pos = base->next;
+
+ while (pos != base) {
+ if (pos == entry)
+ return true;
+
+ pos = pos->next;
+ }
+ return false;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h
new file mode 100644
index 00000000000..fd7252dab0d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h
@@ -0,0 +1,37 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_UTILITY_H
+#define _KBASE_UTILITY_H
+
+#ifndef _KBASE_H_
+#error "Don't include this file directly, use mali_kbase.h instead"
+#endif
+
+/** Test whether the given list entry is a member of the given list.
+ *
+ * @param base The head of the list to be tested
+ * @param entry The list entry to be tested
+ *
+ * @return true if entry is a member of base
+ * false otherwise
+ */
+bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry);
+
+#endif /* _KBASE_UTILITY_H */
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
new file mode 100644
index 00000000000..d1c68702e9c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c
@@ -0,0 +1,485 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+
+#define NR_CNT_BLOCKS_PER_GROUP 8
+#define NR_CNT_PER_BLOCK 64
+#define NR_BYTES_PER_CNT 4
+#define NR_BYTES_PER_HDR 16
+#define PRFCNT_EN_MASK_OFFSET 0x8
+
+struct kbase_vinstr_context {
+ struct kbase_device *kbdev;
+ struct kbase_context *kctx;
+ struct kbase_vmap_struct vmap;
+ struct mutex lock;
+ u64 gpu_va;
+ void *cpu_va;
+ size_t dump_size;
+ u32 nclients;
+ struct list_head clients;
+};
+
+struct kbase_vinstr_client {
+ bool kernel;
+ void *dump_buffer;
+ u32 bitmap[4];
+ void *accum_buffer;
+ size_t dump_size;
+ struct list_head list;
+};
+
+static int map_kernel_dump_buffer(struct kbase_vinstr_context *ctx)
+{
+ struct kbase_va_region *reg;
+ struct kbase_context *kctx = ctx->kctx;
+ u64 flags, nr_pages;
+ u16 va_align = 0;
+
+ flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR;
+ ctx->dump_size = kbase_vinstr_dump_size(ctx);
+ nr_pages = PFN_UP(ctx->dump_size);
+
+ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+ &ctx->gpu_va, &va_align);
+ if (!reg)
+ return -ENOMEM;
+
+ ctx->cpu_va = kbase_vmap(kctx, ctx->gpu_va, ctx->dump_size, &ctx->vmap);
+ if (!ctx->cpu_va) {
+ kbase_mem_free(kctx, ctx->gpu_va);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void unmap_kernel_dump_buffer(struct kbase_vinstr_context *ctx)
+{
+ struct kbase_context *kctx = ctx->kctx;
+
+ kbase_vunmap(kctx, &ctx->vmap);
+ kbase_mem_free(kctx, ctx->gpu_va);
+}
+
+static int map_client_accum_buffer(struct kbase_vinstr_context *ctx,
+ struct kbase_vinstr_client *cli)
+{
+ cli->dump_size = kbase_vinstr_dump_size(ctx);
+ cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL);
+ return !cli->accum_buffer ? -ENOMEM : 0;
+}
+
+static void unmap_client_accum_buffer(struct kbase_vinstr_context *ctx,
+ struct kbase_vinstr_client *cli)
+{
+ kfree(cli->accum_buffer);
+}
+
+static int create_vinstr_kctx(struct kbase_vinstr_context *ctx)
+{
+ struct kbase_uk_hwcnt_setup setup;
+ int err;
+
+ ctx->kctx = kbase_create_context(ctx->kbdev, true);
+ if (!ctx->kctx)
+ return -ENOMEM;
+
+ /* Map the master kernel dump buffer. The HW dumps the counters
+ * into this memory region. */
+ err = map_kernel_dump_buffer(ctx);
+ if (err) {
+ kbase_destroy_context(ctx->kctx);
+ return err;
+ }
+
+ setup.dump_buffer = ctx->gpu_va;
+ /* The GPU requires us to disable the prfcnt collection block for
+ * reprogramming it. This introduces jitter and disrupts existing
+ * clients. Therefore we enable all of them. */
+ setup.jm_bm = 0xffffffff;
+ setup.tiler_bm = 0xffffffff;
+ setup.shader_bm = 0xffffffff;
+ setup.mmu_l2_bm = 0xffffffff;
+
+ err = kbase_instr_hwcnt_enable(ctx->kctx, &setup);
+ if (err) {
+ unmap_kernel_dump_buffer(ctx);
+ kbase_destroy_context(ctx->kctx);
+ return err;
+ }
+
+ return 0;
+}
+
+static void destroy_vinstr_kctx(struct kbase_vinstr_context *ctx)
+{
+ kbase_instr_hwcnt_disable(ctx->kctx);
+ unmap_kernel_dump_buffer(ctx);
+ kbase_destroy_context(ctx->kctx);
+ ctx->kctx = NULL;
+}
+
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev)
+{
+ struct kbase_vinstr_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return NULL;
+ INIT_LIST_HEAD(&ctx->clients);
+ mutex_init(&ctx->lock);
+ ctx->kbdev = kbdev;
+ return ctx;
+}
+
+void kbase_vinstr_term(struct kbase_vinstr_context *ctx)
+{
+ struct kbase_vinstr_client *cli;
+
+ while (!list_empty(&ctx->clients)) {
+ cli = list_first_entry(&ctx->clients,
+ struct kbase_vinstr_client, list);
+ list_del(&cli->list);
+ unmap_client_accum_buffer(ctx, cli);
+ kfree(cli);
+ ctx->nclients--;
+ }
+ if (ctx->kctx)
+ destroy_vinstr_kctx(ctx);
+ kfree(ctx);
+}
+
+struct kbase_vinstr_client *kbase_vinstr_attach_client(struct kbase_vinstr_context *ctx,
+ bool kernel, u64 dump_buffer, u32 bitmap[4])
+{
+ struct kbase_vinstr_client *cli;
+
+ cli = kmalloc(sizeof(*cli), GFP_KERNEL);
+ if (!cli)
+ return NULL;
+
+ cli->kernel = kernel;
+ cli->dump_buffer = (void *)(uintptr_t)dump_buffer;
+ cli->bitmap[SHADER_HWCNT_BM] = bitmap[SHADER_HWCNT_BM];
+ cli->bitmap[TILER_HWCNT_BM] = bitmap[TILER_HWCNT_BM];
+ cli->bitmap[MMU_L2_HWCNT_BM] = bitmap[MMU_L2_HWCNT_BM];
+ cli->bitmap[JM_HWCNT_BM] = bitmap[JM_HWCNT_BM];
+
+ mutex_lock(&ctx->lock);
+ /* If this is the first client, create the vinstr kbase
+ * context. This context is permanently resident until the
+ * last client exits. */
+ if (!ctx->nclients) {
+ if (create_vinstr_kctx(ctx) < 0) {
+ kfree(cli);
+ mutex_unlock(&ctx->lock);
+ return NULL;
+ }
+ }
+
+ /* The GPU resets the counter block every time there is a request
+ * to dump it. We need a per client kernel buffer for accumulating
+ * the counters. */
+ if (map_client_accum_buffer(ctx, cli) < 0) {
+ kfree(cli);
+ if (!ctx->nclients)
+ destroy_vinstr_kctx(ctx);
+ mutex_unlock(&ctx->lock);
+ return NULL;
+ }
+
+ ctx->nclients++;
+ list_add(&cli->list, &ctx->clients);
+ mutex_unlock(&ctx->lock);
+
+ return cli;
+}
+
+void kbase_vinstr_detach_client(struct kbase_vinstr_context *ctx,
+ struct kbase_vinstr_client *cli)
+{
+ struct kbase_vinstr_client *iter, *tmp;
+
+ mutex_lock(&ctx->lock);
+ list_for_each_entry_safe(iter, tmp, &ctx->clients, list) {
+ if (iter == cli) {
+ list_del(&iter->list);
+ unmap_client_accum_buffer(ctx, cli);
+ kfree(iter);
+ ctx->nclients--;
+ if (!ctx->nclients)
+ destroy_vinstr_kctx(ctx);
+ break;
+ }
+ }
+ mutex_unlock(&ctx->lock);
+}
+
+size_t kbase_vinstr_dump_size(struct kbase_vinstr_context *ctx)
+{
+ struct kbase_device *kbdev = ctx->kctx->kbdev;
+ size_t dump_size;
+
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) {
+ u32 nr_cg;
+
+ nr_cg = kbdev->gpu_props.num_core_groups;
+ dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP *
+ NR_CNT_PER_BLOCK *
+ NR_BYTES_PER_CNT;
+ } else {
+ /* assume v5 for now */
+ u32 nr_l2, nr_sc;
+
+ nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+ nr_sc = kbdev->gpu_props.props.coherency_info.group[0].num_cores;
+ /* JM and tiler counter blocks are always present */
+ dump_size = (2 + nr_l2 + nr_sc) *
+ NR_CNT_PER_BLOCK *
+ NR_BYTES_PER_CNT;
+ }
+ return dump_size;
+}
+
+/* Accumulate counters in the dump buffer */
+static void accum_dump_buffer(void *dst, void *src, size_t dump_size)
+{
+ size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+ u32 *d = dst;
+ u32 *s = src;
+ size_t i, j;
+
+ for (i = 0; i < dump_size; i += block_size) {
+ /* skip over the header block */
+ d += NR_BYTES_PER_HDR / sizeof(u32);
+ s += NR_BYTES_PER_HDR / sizeof(u32);
+ for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) {
+ /* saturate result if addition would result in wraparound */
+ if (U32_MAX - *d < *s)
+ *d = U32_MAX;
+ else
+ *d += *s;
+ d++;
+ s++;
+ }
+ }
+}
+
+/* This is the Midgard v4 patch function. It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v4(struct kbase_vinstr_context *ctx,
+ struct kbase_vinstr_client *cli)
+{
+ u32 *mask;
+ u8 *dst = cli->accum_buffer;
+ u8 *src = ctx->cpu_va;
+ u32 nr_cg = ctx->kctx->kbdev->gpu_props.num_core_groups;
+ size_t i, group_size, group;
+ enum {
+ SC0_BASE = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+ SC1_BASE = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+ SC2_BASE = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+ SC3_BASE = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+ TILER_BASE = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+ MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT,
+ JM_BASE = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT
+ };
+
+ group_size = NR_CNT_BLOCKS_PER_GROUP *
+ NR_CNT_PER_BLOCK *
+ NR_BYTES_PER_CNT;
+ for (i = 0; i < nr_cg; i++) {
+ group = i * group_size;
+ /* copy shader core headers */
+ memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE],
+ NR_BYTES_PER_HDR);
+ memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE],
+ NR_BYTES_PER_HDR);
+ memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE],
+ NR_BYTES_PER_HDR);
+ memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE],
+ NR_BYTES_PER_HDR);
+
+ /* copy tiler header */
+ memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE],
+ NR_BYTES_PER_HDR);
+
+ /* copy mmu header */
+ memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE],
+ NR_BYTES_PER_HDR);
+
+ /* copy job manager header */
+ memcpy(&dst[group + JM_BASE], &src[group + JM_BASE],
+ NR_BYTES_PER_HDR);
+
+ /* patch the shader core enable mask */
+ mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[SHADER_HWCNT_BM];
+ mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[SHADER_HWCNT_BM];
+ mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[SHADER_HWCNT_BM];
+ mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[SHADER_HWCNT_BM];
+
+ /* patch the tiler core enable mask */
+ mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[TILER_HWCNT_BM];
+
+ /* patch the mmu core enable mask */
+ mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+
+ /* patch the job manager enable mask */
+ mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[JM_HWCNT_BM];
+ }
+}
+
+/* This is the Midgard v5 patch function. It copies the headers for each
+ * of the defined blocks from the master kernel buffer and then patches up
+ * the performance counter enable mask for each of the blocks to exclude
+ * counters that were not requested by the client. */
+static void patch_dump_buffer_hdr_v5(struct kbase_vinstr_context *ctx,
+ struct kbase_vinstr_client *cli)
+{
+ struct kbase_device *kbdev = ctx->kctx->kbdev;
+ u32 i, nr_l2, nr_sc;
+ u32 *mask;
+ u8 *dst = cli->accum_buffer;
+ u8 *src = ctx->cpu_va;
+ size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT;
+
+ /* copy and patch job manager header */
+ memcpy(dst, src, NR_BYTES_PER_HDR);
+ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[JM_HWCNT_BM];
+ dst += block_size;
+ src += block_size;
+
+ /* copy and patch tiler header */
+ memcpy(dst, src, NR_BYTES_PER_HDR);
+ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[TILER_HWCNT_BM];
+ dst += block_size;
+ src += block_size;
+
+ /* copy and patch MMU/L2C headers */
+ nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices;
+ for (i = 0; i < nr_l2; i++) {
+ memcpy(dst, src, NR_BYTES_PER_HDR);
+ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[MMU_L2_HWCNT_BM];
+ dst += block_size;
+ src += block_size;
+ }
+
+ /* copy and patch shader core headers */
+ nr_sc = kbdev->gpu_props.props.coherency_info.group[0].num_cores;
+ for (i = 0; i < nr_sc; i++) {
+ memcpy(dst, src, NR_BYTES_PER_HDR);
+ mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET];
+ *mask &= cli->bitmap[SHADER_HWCNT_BM];
+ dst += block_size;
+ src += block_size;
+ }
+}
+
+static void accum_clients(struct kbase_vinstr_context *ctx)
+{
+ struct kbase_vinstr_client *iter;
+ int v4;
+
+ v4 = kbase_hw_has_feature(ctx->kbdev, BASE_HW_FEATURE_V4);
+ list_for_each_entry(iter, &ctx->clients, list) {
+ if (v4)
+ patch_dump_buffer_hdr_v4(ctx, iter);
+ else
+ patch_dump_buffer_hdr_v5(ctx, iter);
+ accum_dump_buffer(iter->accum_buffer, ctx->cpu_va,
+ iter->dump_size);
+ }
+}
+
+int kbase_vinstr_dump(struct kbase_vinstr_context *ctx,
+ struct kbase_vinstr_client *cli)
+{
+ int err = 0;
+
+ if (!cli)
+ return -EINVAL;
+
+ mutex_lock(&ctx->lock);
+ err = kbase_instr_hwcnt_request_dump(ctx->kctx);
+ if (err)
+ goto out;
+
+ err = kbase_instr_hwcnt_wait_for_dump(ctx->kctx);
+ if (err)
+ goto out;
+
+ accum_clients(ctx);
+
+ if (!cli->kernel) {
+ if (copy_to_user((void __user *)cli->dump_buffer,
+ cli->accum_buffer, cli->dump_size)) {
+ err = -EFAULT;
+ goto out;
+ }
+ } else {
+ memcpy(cli->dump_buffer, cli->accum_buffer, cli->dump_size);
+ }
+
+ memset(cli->accum_buffer, 0, cli->dump_size);
+out:
+ mutex_unlock(&ctx->lock);
+ return err;
+}
+
+int kbase_vinstr_clear(struct kbase_vinstr_context *ctx,
+ struct kbase_vinstr_client *cli)
+{
+ int err = 0;
+
+ if (!cli)
+ return -EINVAL;
+
+ mutex_lock(&ctx->lock);
+ err = kbase_instr_hwcnt_request_dump(ctx->kctx);
+ if (err)
+ goto out;
+
+ err = kbase_instr_hwcnt_wait_for_dump(ctx->kctx);
+ if (err)
+ goto out;
+
+ err = kbase_instr_hwcnt_clear(ctx->kctx);
+ if (err)
+ goto out;
+
+ accum_clients(ctx);
+
+ memset(cli->accum_buffer, 0, cli->dump_size);
+out:
+ mutex_unlock(&ctx->lock);
+ return err;
+}
diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
new file mode 100644
index 00000000000..2e846e81a80
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h
@@ -0,0 +1,102 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _KBASE_VINSTR_H_
+#define _KBASE_VINSTR_H_
+
+enum {
+ SHADER_HWCNT_BM,
+ TILER_HWCNT_BM,
+ MMU_L2_HWCNT_BM,
+ JM_HWCNT_BM
+};
+
+struct kbase_vinstr_context;
+struct kbase_vinstr_client;
+
+/**
+ * kbase_vinstr_init() - Initialize the vinstr core
+ * @kbdev: Kbase device
+ *
+ * Return: A pointer to the vinstr context on success or NULL on failure
+ */
+struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_vinstr_term() - Terminate the vinstr core
+ * @ctx: Vinstr context
+ */
+void kbase_vinstr_term(struct kbase_vinstr_context *ctx);
+
+/**
+ * kbase_vinstr_attach_client - Attach a client to the vinstr core
+ * @ctx: Vinstr context
+ * @kernel: True if this client is a kernel-side client, false
+ * otherwise
+ * @dump_buffer: Client's dump buffer
+ * @bitmap: Bitmaps describing which counters should be enabled
+ *
+ * Return: A vinstr opaque client handle or NULL or failure
+ */
+struct kbase_vinstr_client *kbase_vinstr_attach_client(struct kbase_vinstr_context *ctx,
+ bool kernel,
+ u64 dump_buffer,
+ u32 bitmap[4]);
+
+/**
+ * kbase_vinstr_detach_client - Detach a client from the vinstr core
+ * @ctx: Vinstr context
+ * @cli: Pointer to vinstr client
+ */
+void kbase_vinstr_detach_client(struct kbase_vinstr_context *ctx,
+ struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_dump_size - Get the size of the dump buffer
+ * @ctx: Vinstr context
+ *
+ * This is only useful for kernel-side clients to know how much
+ * memory they need to allocate to receive the performance counter
+ * memory block.
+ *
+ * Return: Returns the size of the client side buffer
+ */
+size_t kbase_vinstr_dump_size(struct kbase_vinstr_context *ctx);
+
+/**
+ * kbase_vinstr_dump - Performs a synchronous hardware counter dump for a given
+ * kbase context
+ * @ctx: Vinstr context
+ * @cli: Pointer to vinstr client
+ *
+ * Return: 0 on success
+ */
+int kbase_vinstr_dump(struct kbase_vinstr_context *ctx,
+ struct kbase_vinstr_client *cli);
+
+/**
+ * kbase_vinstr_clear - Performs a reset of the hardware counters for a given
+ * kbase context
+ * @ctx: Vinstr context
+ * @cli: Pointer to vinstr client
+ *
+ * Return: 0 on success
+ */
+int kbase_vinstr_clear(struct kbase_vinstr_context *ctx,
+ struct kbase_vinstr_client *cli);
+
+#endif /* _KBASE_VINSTR_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
new file mode 100644
index 00000000000..5d6b4021d62
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h
@@ -0,0 +1,201 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_KBASE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mali_slot_template,
+ TP_PROTO(int jobslot, unsigned int info_val),
+ TP_ARGS(jobslot, info_val),
+ TP_STRUCT__entry(
+ __field(unsigned int, jobslot)
+ __field(unsigned int, info_val)
+ ),
+ TP_fast_assign(
+ __entry->jobslot = jobslot;
+ __entry->info_val = info_val;
+ ),
+ TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val)
+);
+
+#define DEFINE_MALI_SLOT_EVENT(name) \
+DEFINE_EVENT(mali_slot_template, mali_##name, \
+ TP_PROTO(int jobslot, unsigned int info_val), \
+ TP_ARGS(jobslot, info_val))
+DEFINE_MALI_SLOT_EVENT(JM_SUBMIT);
+DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE);
+DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0);
+DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP);
+DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT);
+DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED);
+DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT);
+DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED);
+DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB);
+DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ);
+#undef DEFINE_MALI_SLOT_EVENT
+
+DECLARE_EVENT_CLASS(mali_refcount_template,
+ TP_PROTO(int refcount, unsigned int info_val),
+ TP_ARGS(refcount, info_val),
+ TP_STRUCT__entry(
+ __field(unsigned int, refcount)
+ __field(unsigned int, info_val)
+ ),
+ TP_fast_assign(
+ __entry->refcount = refcount;
+ __entry->info_val = info_val;
+ ),
+ TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val)
+);
+
+#define DEFINE_MALI_REFCOUNT_EVENT(name) \
+DEFINE_EVENT(mali_refcount_template, mali_##name, \
+ TP_PROTO(int refcount, unsigned int info_val), \
+ TP_ARGS(refcount, info_val))
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK);
+DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX);
+DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE);
+DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE);
+#undef DEFINE_MALI_REFCOUNT_EVENT
+
+DECLARE_EVENT_CLASS(mali_add_template,
+ TP_PROTO(int gpu_addr, unsigned int info_val),
+ TP_ARGS(gpu_addr, info_val),
+ TP_STRUCT__entry(
+ __field(unsigned int, gpu_addr)
+ __field(unsigned int, info_val)
+ ),
+ TP_fast_assign(
+ __entry->gpu_addr = gpu_addr;
+ __entry->info_val = info_val;
+ ),
+ TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val)
+);
+
+#define DEFINE_MALI_ADD_EVENT(name) \
+DEFINE_EVENT(mali_add_template, mali_##name, \
+ TP_PROTO(int gpu_addr, unsigned int info_val), \
+ TP_ARGS(gpu_addr, info_val))
+DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY);
+DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR);
+DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER);
+DEFINE_MALI_ADD_EVENT(JD_DONE);
+DEFINE_MALI_ADD_EVENT(JD_CANCEL);
+DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT);
+DEFINE_MALI_ADD_EVENT(JM_IRQ);
+DEFINE_MALI_ADD_EVENT(JM_IRQ_END);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS);
+DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED);
+DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE);
+DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET);
+DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE);
+DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX);
+DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START);
+DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP);
+DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP);
+DEFINE_MALI_ADD_EVENT(PM_PWRON);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWRON_L2);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER);
+DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED);
+DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER);
+DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED);
+DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
+DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
+DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT);
+DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM);
+DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY);
+DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS);
+#undef DEFINE_MALI_ADD_EVENT
+
+#endif /* _TRACE_MALI_KBASE_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_linux_kbase_trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h
new file mode 100644
index 00000000000..fc3cf32ba4d
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_linux_trace.h
@@ -0,0 +1,211 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_H
+
+#include <linux/stringify.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali
+#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
+#define TRACE_INCLUDE_FILE mali_linux_trace
+
+#define MALI_JOB_SLOTS_EVENT_CHANGED
+
+/**
+ * mali_job_slots_event - called from mali_kbase_core_linux.c
+ * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro.
+ */
+TRACE_EVENT(mali_job_slots_event,
+ TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid,
+ unsigned char job_id),
+ TP_ARGS(event_id, tgid, pid, job_id),
+ TP_STRUCT__entry(
+ __field(unsigned int, event_id)
+ __field(unsigned int, tgid)
+ __field(unsigned int, pid)
+ __field(unsigned char, job_id)
+ ),
+ TP_fast_assign(
+ __entry->event_id = event_id;
+ __entry->tgid = tgid;
+ __entry->pid = pid;
+ __entry->job_id = job_id;
+ ),
+ TP_printk("event=%u tgid=%u pid=%u job_id=%u",
+ __entry->event_id, __entry->tgid, __entry->pid, __entry->job_id)
+);
+
+/**
+ * mali_pm_status - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF)
+ */
+TRACE_EVENT(mali_pm_status,
+ TP_PROTO(unsigned int event_id, unsigned long long value),
+ TP_ARGS(event_id, value),
+ TP_STRUCT__entry(
+ __field(unsigned int, event_id)
+ __field(unsigned long long, value)
+ ),
+ TP_fast_assign(
+ __entry->event_id = event_id;
+ __entry->value = value;
+ ),
+ TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_on - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power up
+ */
+TRACE_EVENT(mali_pm_power_on,
+ TP_PROTO(unsigned int event_id, unsigned long long value),
+ TP_ARGS(event_id, value),
+ TP_STRUCT__entry(
+ __field(unsigned int, event_id)
+ __field(unsigned long long, value)
+ ),
+ TP_fast_assign(
+ __entry->event_id = event_id;
+ __entry->value = value;
+ ),
+ TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_pm_power_off - Called by mali_kbase_pm_driver.c
+ * @event_id: core type (shader, tiler, l2 cache)
+ * @value: 64bits bitmask reporting the cores to power down
+ */
+TRACE_EVENT(mali_pm_power_off,
+ TP_PROTO(unsigned int event_id, unsigned long long value),
+ TP_ARGS(event_id, value),
+ TP_STRUCT__entry(
+ __field(unsigned int, event_id)
+ __field(unsigned long long, value)
+ ),
+ TP_fast_assign(
+ __entry->event_id = event_id;
+ __entry->value = value;
+ ),
+ TP_printk("event %u = %llu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_page_fault_insert_pages - Called by page_fault_worker()
+ * it reports an MMU page fault resulting in new pages being mapped.
+ * @event_id: MMU address space number.
+ * @value: number of newly allocated pages
+ */
+TRACE_EVENT(mali_page_fault_insert_pages,
+ TP_PROTO(int event_id, unsigned long value),
+ TP_ARGS(event_id, value),
+ TP_STRUCT__entry(
+ __field(int, event_id)
+ __field(unsigned long, value)
+ ),
+ TP_fast_assign(
+ __entry->event_id = event_id;
+ __entry->value = value;
+ ),
+ TP_printk("event %d = %lu", __entry->event_id, __entry->value)
+);
+
+/**
+ * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space()
+ * it reports that a certain MMU address space is in use now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_in_use,
+ TP_PROTO(int event_id),
+ TP_ARGS(event_id),
+ TP_STRUCT__entry(
+ __field(int, event_id)
+ ),
+ TP_fast_assign(
+ __entry->event_id = event_id;
+ ),
+ TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal()
+ * it reports that a certain MMU address space has been released now.
+ * @event_id: MMU address space number.
+ */
+TRACE_EVENT(mali_mmu_as_released,
+ TP_PROTO(int event_id),
+ TP_ARGS(event_id),
+ TP_STRUCT__entry(
+ __field(int, event_id)
+ ),
+ TP_fast_assign(
+ __entry->event_id = event_id;
+ ),
+ TP_printk("event=%d", __entry->event_id)
+);
+
+/**
+ * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages()
+ * and by kbase_atomic_sub_pages()
+ * it reports that the total number of allocated pages is changed.
+ * @event_id: number of pages to be added or subtracted (according to the sign).
+ */
+TRACE_EVENT(mali_total_alloc_pages_change,
+ TP_PROTO(long long int event_id),
+ TP_ARGS(event_id),
+ TP_STRUCT__entry(
+ __field(long long int, event_id)
+ ),
+ TP_fast_assign(
+ __entry->event_id = event_id;
+ ),
+ TP_printk("event=%lld", __entry->event_id)
+);
+
+/**
+ * mali_sw_counter - not currently used
+ * @event_id: counter id
+ */
+TRACE_EVENT(mali_sw_counter,
+ TP_PROTO(unsigned int event_id, signed long long value),
+ TP_ARGS(event_id, value),
+ TP_STRUCT__entry(
+ __field(int, event_id)
+ __field(long long, value)
+ ),
+ TP_fast_assign(
+ __entry->event_id = event_id;
+ __entry->value = value;
+ ),
+ TP_printk("event %d = %lld", __entry->event_id, __entry->value)
+);
+
+#endif /* _TRACE_MALI_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef linux
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/arm/midgard/mali_malisw.h b/drivers/gpu/arm/midgard/mali_malisw.h
new file mode 100644
index 00000000000..99452933eab
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_malisw.h
@@ -0,0 +1,131 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Kernel-wide include for common macros and types.
+ */
+
+#ifndef _MALISW_H_
+#define _MALISW_H_
+
+#include <linux/version.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
+#define U8_MAX ((u8)~0U)
+#define S8_MAX ((s8)(U8_MAX>>1))
+#define S8_MIN ((s8)(-S8_MAX - 1))
+#define U16_MAX ((u16)~0U)
+#define S16_MAX ((s16)(U16_MAX>>1))
+#define S16_MIN ((s16)(-S16_MAX - 1))
+#define U32_MAX ((u32)~0U)
+#define S32_MAX ((s32)(U32_MAX>>1))
+#define S32_MIN ((s32)(-S32_MAX - 1))
+#define U64_MAX ((u64)~0ULL)
+#define S64_MAX ((s64)(U64_MAX>>1))
+#define S64_MIN ((s64)(-S64_MAX - 1))
+#endif /* LINUX_VERSION_CODE */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0)
+#define SIZE_MAX (~(size_t)0)
+#endif /* LINUX_VERSION_CODE */
+
+/**
+ * MIN - Return the lesser of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * Refer to MAX macro for more details
+ */
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+/**
+ * MAX - Return the greater of two values.
+ *
+ * As a macro it may evaluate its arguments more than once.
+ * If called on the same two arguments as MIN it is guaranteed to return
+ * the one that MIN didn't return. This is significant for types where not
+ * all values are comparable e.g. NaNs in floating-point types. But if you want
+ * to retrieve the min and max of two values, consider using a conditional swap
+ * instead.
+ */
+#define MAX(x, y) ((x) < (y) ? (y) : (x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for suppressing unused variable warnings. Where possible
+ * such variables should be removed; this macro is present for cases where we
+ * much support API backwards compatibility.
+ */
+#define CSTD_UNUSED(x) ((void)(x))
+
+/**
+ * @hideinitializer
+ * Function-like macro for use where "no behavior" is desired. This is useful
+ * when compile time macros turn a function-like macro in to a no-op, but
+ * where having no statement is otherwise invalid.
+ */
+#define CSTD_NOP(...) ((void)#__VA_ARGS__)
+
+/**
+ * Function-like macro for converting a pointer in to a u64 for storing into
+ * an external data structure. This is commonly used when pairing a 32-bit
+ * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion
+ * is complex and a straight cast does not work reliably as pointers are
+ * often considered as signed.
+ */
+#define PTR_TO_U64(x) ((uint64_t)((uintptr_t)(x)))
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a single level macro.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR1( MY_MACRO )
+ * > "MY_MACRO"
+ * @endcode
+ */
+#define CSTD_STR1(x) #x
+
+/**
+ * @hideinitializer
+ * Function-like macro for stringizing a macro's value. This should not be used
+ * if the macro is defined in a way which may have no value; use the
+ * alternative @c CSTD_STR2N macro should be used instead.
+ * @code
+ * #define MY_MACRO 32
+ * CSTD_STR2( MY_MACRO )
+ * > "32"
+ * @endcode
+ */
+#define CSTD_STR2(x) CSTD_STR1(x)
+
+/**
+ * Specify an assertion value which is evaluated at compile time. Recommended
+ * usage is specification of a @c static @c INLINE function containing all of
+ * the assertions thus:
+ *
+ * @code
+ * static INLINE [module]_compile_time_assertions( void )
+ * {
+ * COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) );
+ * }
+ * @endcode
+ *
+ * @note Use @c static not @c STATIC. We never want to turn off this @c static
+ * specification for testing purposes.
+ */
+#define CSTD_COMPILE_TIME_ASSERT(expr) \
+ do { switch (0) { case 0: case (expr):; } } while (false)
+
+#endif /* _MALISW_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h
new file mode 100644
index 00000000000..180fea1dfd1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h
@@ -0,0 +1,542 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _MIDGARD_REGMAP_H_
+#define _MIDGARD_REGMAP_H_
+
+/*
+ * Begin Register Offsets
+ */
+
+#define GPU_CONTROL_BASE 0x0000
+#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r))
+#define GPU_ID 0x000 /* (RO) GPU and revision identifier */
+#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */
+#define SUSPEND_SIZE 0x008 /* (RO) Fixed-function suspend buffer
+ size */
+#define TILER_FEATURES 0x00C /* (RO) Tiler Features */
+#define MEM_FEATURES 0x010 /* (RO) Memory system features */
+#define MMU_FEATURES 0x014 /* (RO) MMU features */
+#define AS_PRESENT 0x018 /* (RO) Address space slots present */
+#define JS_PRESENT 0x01C /* (RO) Job slots present */
+#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */
+#define GPU_IRQ_CLEAR 0x024 /* (WO) */
+#define GPU_IRQ_MASK 0x028 /* (RW) */
+#define GPU_IRQ_STATUS 0x02C /* (RO) */
+
+/* IRQ flags */
+#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */
+#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. Intended to use with SOFT_RESET
+ commands which may take time. */
+#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down
+ and the power manager is idle. */
+
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */
+
+#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+ | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#define GPU_COMMAND 0x030 /* (WO) */
+#define GPU_STATUS 0x034 /* (RO) */
+
+
+#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */
+
+#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */
+
+#define PWR_KEY 0x050 /* (WO) Power manager key register */
+#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */
+#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */
+
+#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory region base address, low word */
+#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory region base address, high word */
+#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable flags for Job Manager */
+#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable flags for shader cores */
+#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable flags for tiler */
+#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable flags for MMU/L2 cache */
+
+#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES 0x0AC /* (RO) Thread features */
+
+#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */
+#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */
+#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */
+#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */
+#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */
+#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */
+#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */
+#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */
+#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */
+#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */
+#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */
+#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */
+#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */
+#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */
+#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */
+#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */
+
+
+#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */
+
+
+#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */
+
+#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */
+
+
+#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */
+#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration settings (Implementation specific register) */
+#define L2_MMU_CONFIG 0xF0C /* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */
+
+#define JOB_CONTROL_BASE 0x1000
+
+#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */
+#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */
+#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */
+#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
+#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */
+
+#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
+#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */
+#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */
+#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */
+#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */
+#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */
+#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */
+#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */
+#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */
+#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */
+#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */
+#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */
+#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */
+#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */
+#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */
+#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */
+
+#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */
+#define JS_STATUS 0x24 /* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */
+
+#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */
+
+
+#define MEMORY_MANAGEMENT_BASE 0x2000
+#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
+
+#define MMU_AS0 0x400 /* Configuration registers for address space 0 */
+#define MMU_AS1 0x440 /* Configuration registers for address space 1 */
+#define MMU_AS2 0x480 /* Configuration registers for address space 2 */
+#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */
+#define MMU_AS4 0x500 /* Configuration registers for address space 4 */
+#define MMU_AS5 0x540 /* Configuration registers for address space 5 */
+#define MMU_AS6 0x580 /* Configuration registers for address space 6 */
+#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */
+#define MMU_AS8 0x600 /* Configuration registers for address space 8 */
+#define MMU_AS9 0x640 /* Configuration registers for address space 9 */
+#define MMU_AS10 0x680 /* Configuration registers for address space 10 */
+#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */
+#define MMU_AS12 0x700 /* Configuration registers for address space 12 */
+#define MMU_AS13 0x740 /* Configuration registers for address space 13 */
+#define MMU_AS14 0x780 /* Configuration registers for address space 14 */
+#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */
+
+#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */
+#define AS_STATUS 0x28 /* (RO) Status flags for address space n */
+
+
+
+/* End Register Offsets */
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+ MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS 16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers */
+#define MMU_PAGE_FAULT(n) (1UL << (n))
+#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin LPAE MMU TRANSTAB register values
+ */
+#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000
+#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0)
+#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1)
+#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0)
+#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2)
+#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4)
+
+#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003
+
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3)
+
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8)
+
+
+/*
+ * Begin Command Values
+ */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP 0x00 /* NOP Operation */
+#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs
+ (deprecated - only for use with T60x) */
+#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then
+ flush all L2 caches then issue a flush region command to all MMUs */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12)
+#define JS_CONFIG_THREAD_PRI(n) ((n) << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status insead a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE 0x00
+#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE 0x40
+#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50
+#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE 0x60
+#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */
+
+/* End Command Values */
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_AS_SHIFT 4 /* address space bitmap starts from bit 4 of the register */
+#define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */
+#define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */
+
+/* AS<n>_MEMATTR values: */
+
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull
+
+/* Symbol for default MEMATTR to use */
+#define AS_MEMATTR_INDEX_DEFAULT 0
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC 2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA 4
+
+/* GPU_ID register */
+#define GPU_ID_VERSION_STATUS_SHIFT 0
+#define GPU_ID_VERSION_MINOR_SHIFT 4
+#define GPU_ID_VERSION_MAJOR_SHIFT 12
+#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16
+#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT)
+#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT)
+#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT)
+#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT)
+
+/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */
+#define GPU_ID_PI_T60X 0x6956
+#define GPU_ID_PI_T62X 0x0620
+#define GPU_ID_PI_T76X 0x0750
+#define GPU_ID_PI_T72X 0x0720
+#define GPU_ID_PI_TFRX 0x0880
+#define GPU_ID_PI_T86X 0x0860
+#define GPU_ID_PI_T82X 0x0820
+#define GPU_ID_PI_T83X 0x0830
+
+/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */
+#define GPU_ID_S_15DEV0 0x1
+#define GPU_ID_S_EAC 0x2
+
+/* Helper macro to create a GPU_ID assuming valid values for id, major, minor, status */
+#define GPU_ID_MAKE(id, major, minor, status) \
+ (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \
+ ((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \
+ ((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \
+ ((status) << GPU_ID_VERSION_STATUS_SHIFT))
+
+/* End GPU_ID register */
+
+/* JS<n>_FEATURES register */
+
+#define JS_FEATURE_NULL_JOB (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB (1u << 4)
+#define JS_FEATURE_VERTEX_JOB (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB (1u << 6)
+#define JS_FEATURE_TILER_JOB (1u << 7)
+#define JS_FEATURE_FUSED_JOB (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB (1u << 9)
+
+/* End JS<n>_FEATURES register */
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT (24)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT)
+
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT (26)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT)
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED 0
+#define IMPLEMENTATION_SILICON 1
+#define IMPLEMENTATION_FPGA 2
+#define IMPLEMENTATION_MODEL 3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT 256
+#define THREAD_MWS_DEFAULT 256
+#define THREAD_MBS_DEFAULT 256
+#define THREAD_MR_DEFAULT 1024
+#define THREAD_MTQ_DEFAULT 4
+#define THREAD_MTGS_DEFAULT 10
+
+/* End THREAD_* registers */
+
+/* COHERENCY_* values*/
+#define COHERENCY_ACE_LITE 0
+#define COHERENCY_ACE 1
+#define COHERENCY_NONE 0xFFFF
+#define COHERENCY_FEATURE_BIT(x) (1 << (x))
+/* End COHERENCY_* values */
+
+/* SHADER_CONFIG register */
+
+#define SC_ALT_COUNTERS (1ul << 3)
+#define SC_OVERRIDE_FWD_PIXEL_KILL (1ul << 4)
+#define SC_SDC_DISABLE_OQ_DISCARD (1ul << 6)
+#define SC_LS_PAUSEBUFFER_DISABLE (1ul << 16)
+#define SC_ENABLE_TEXGRD_FLAGS (1ul << 25)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+
+#define TC_CLOCK_GATE_OVERRIDE (1ul << 0)
+
+/* End TILER_CONFIG register */
+
+
+#endif /* _MIDGARD_REGMAP_H_ */
diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h
new file mode 100644
index 00000000000..d8286a36009
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_timeline.h
@@ -0,0 +1,396 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mali_timeline
+
+#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MALI_TIMELINE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(mali_timeline_atoms_in_flight,
+
+ TP_PROTO(u64 ts_sec,
+ u32 ts_nsec,
+ int tgid,
+ int count),
+
+ TP_ARGS(ts_sec,
+ ts_nsec,
+ tgid,
+ count),
+
+ TP_STRUCT__entry(
+ __field(u64, ts_sec)
+ __field(u32, ts_nsec)
+ __field(int, tgid)
+ __field(int, count)
+ ),
+
+ TP_fast_assign(
+ __entry->ts_sec = ts_sec;
+ __entry->ts_nsec = ts_nsec;
+ __entry->tgid = tgid;
+ __entry->count = count;
+ ),
+
+ TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT,
+ (int)__entry->ts_sec,
+ (int)__entry->ts_nsec,
+ __entry->tgid,
+ __entry->count)
+);
+
+
+TRACE_EVENT(mali_timeline_atom,
+
+ TP_PROTO(u64 ts_sec,
+ u32 ts_nsec,
+ int event_type,
+ int tgid,
+ int atom_id),
+
+ TP_ARGS(ts_sec,
+ ts_nsec,
+ event_type,
+ tgid,
+ atom_id),
+
+ TP_STRUCT__entry(
+ __field(u64, ts_sec)
+ __field(u32, ts_nsec)
+ __field(int, event_type)
+ __field(int, tgid)
+ __field(int, atom_id)
+ ),
+
+ TP_fast_assign(
+ __entry->ts_sec = ts_sec;
+ __entry->ts_nsec = ts_nsec;
+ __entry->event_type = event_type;
+ __entry->tgid = tgid;
+ __entry->atom_id = atom_id;
+ ),
+
+ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+ (int)__entry->ts_sec,
+ (int)__entry->ts_nsec,
+ __entry->tgid,
+ __entry->atom_id,
+ __entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_active,
+
+ TP_PROTO(u64 ts_sec,
+ u32 ts_nsec,
+ int event_type,
+ int tgid,
+ int js,
+ int count),
+
+ TP_ARGS(ts_sec,
+ ts_nsec,
+ event_type,
+ tgid,
+ js,
+ count),
+
+ TP_STRUCT__entry(
+ __field(u64, ts_sec)
+ __field(u32, ts_nsec)
+ __field(int, event_type)
+ __field(int, tgid)
+ __field(int, js)
+ __field(int, count)
+ ),
+
+ TP_fast_assign(
+ __entry->ts_sec = ts_sec;
+ __entry->ts_nsec = ts_nsec;
+ __entry->event_type = event_type;
+ __entry->tgid = tgid;
+ __entry->js = js;
+ __entry->count = count;
+ ),
+
+ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+ (int)__entry->ts_sec,
+ (int)__entry->ts_nsec,
+ __entry->tgid,
+ __entry->js,
+ __entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_slot_action,
+
+ TP_PROTO(u64 ts_sec,
+ u32 ts_nsec,
+ int event_type,
+ int tgid,
+ int js,
+ int count),
+
+ TP_ARGS(ts_sec,
+ ts_nsec,
+ event_type,
+ tgid,
+ js,
+ count),
+
+ TP_STRUCT__entry(
+ __field(u64, ts_sec)
+ __field(u32, ts_nsec)
+ __field(int, event_type)
+ __field(int, tgid)
+ __field(int, js)
+ __field(int, count)
+ ),
+
+ TP_fast_assign(
+ __entry->ts_sec = ts_sec;
+ __entry->ts_nsec = ts_nsec;
+ __entry->event_type = event_type;
+ __entry->tgid = tgid;
+ __entry->js = js;
+ __entry->count = count;
+ ),
+
+ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+ (int)__entry->ts_sec,
+ (int)__entry->ts_nsec,
+ __entry->tgid,
+ __entry->js,
+ __entry->count)
+);
+
+TRACE_EVENT(mali_timeline_gpu_power_active,
+
+ TP_PROTO(u64 ts_sec,
+ u32 ts_nsec,
+ int event_type,
+ int active),
+
+ TP_ARGS(ts_sec,
+ ts_nsec,
+ event_type,
+ active),
+
+ TP_STRUCT__entry(
+ __field(u64, ts_sec)
+ __field(u32, ts_nsec)
+ __field(int, event_type)
+ __field(int, active)
+ ),
+
+ TP_fast_assign(
+ __entry->ts_sec = ts_sec;
+ __entry->ts_nsec = ts_nsec;
+ __entry->event_type = event_type;
+ __entry->active = active;
+ ),
+
+ TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+ (int)__entry->ts_sec,
+ (int)__entry->ts_nsec,
+ __entry->active)
+
+);
+
+TRACE_EVENT(mali_timeline_l2_power_active,
+
+ TP_PROTO(u64 ts_sec,
+ u32 ts_nsec,
+ int event_type,
+ int state),
+
+ TP_ARGS(ts_sec,
+ ts_nsec,
+ event_type,
+ state),
+
+ TP_STRUCT__entry(
+ __field(u64, ts_sec)
+ __field(u32, ts_nsec)
+ __field(int, event_type)
+ __field(int, state)
+ ),
+
+ TP_fast_assign(
+ __entry->ts_sec = ts_sec;
+ __entry->ts_nsec = ts_nsec;
+ __entry->event_type = event_type;
+ __entry->state = state;
+ ),
+
+ TP_printk("%i,%i.%.9i,0,%i", __entry->event_type,
+ (int)__entry->ts_sec,
+ (int)__entry->ts_nsec,
+ __entry->state)
+
+);
+TRACE_EVENT(mali_timeline_pm_event,
+
+ TP_PROTO(u64 ts_sec,
+ u32 ts_nsec,
+ int event_type,
+ int pm_event_type,
+ unsigned int pm_event_id),
+
+ TP_ARGS(ts_sec,
+ ts_nsec,
+ event_type,
+ pm_event_type,
+ pm_event_id),
+
+ TP_STRUCT__entry(
+ __field(u64, ts_sec)
+ __field(u32, ts_nsec)
+ __field(int, event_type)
+ __field(int, pm_event_type)
+ __field(unsigned int, pm_event_id)
+ ),
+
+ TP_fast_assign(
+ __entry->ts_sec = ts_sec;
+ __entry->ts_nsec = ts_nsec;
+ __entry->event_type = event_type;
+ __entry->pm_event_type = pm_event_type;
+ __entry->pm_event_id = pm_event_id;
+ ),
+
+ TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type,
+ (int)__entry->ts_sec,
+ (int)__entry->ts_nsec,
+ __entry->pm_event_type, __entry->pm_event_id)
+
+);
+
+TRACE_EVENT(mali_timeline_slot_atom,
+
+ TP_PROTO(u64 ts_sec,
+ u32 ts_nsec,
+ int event_type,
+ int tgid,
+ int js,
+ int atom_id),
+
+ TP_ARGS(ts_sec,
+ ts_nsec,
+ event_type,
+ tgid,
+ js,
+ atom_id),
+
+ TP_STRUCT__entry(
+ __field(u64, ts_sec)
+ __field(u32, ts_nsec)
+ __field(int, event_type)
+ __field(int, tgid)
+ __field(int, js)
+ __field(int, atom_id)
+ ),
+
+ TP_fast_assign(
+ __entry->ts_sec = ts_sec;
+ __entry->ts_nsec = ts_nsec;
+ __entry->event_type = event_type;
+ __entry->tgid = tgid;
+ __entry->js = js;
+ __entry->atom_id = atom_id;
+ ),
+
+ TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type,
+ (int)__entry->ts_sec,
+ (int)__entry->ts_nsec,
+ __entry->tgid,
+ __entry->js,
+ __entry->atom_id)
+);
+
+TRACE_EVENT(mali_timeline_pm_checktrans,
+
+ TP_PROTO(u64 ts_sec,
+ u32 ts_nsec,
+ int trans_code,
+ int trans_id),
+
+ TP_ARGS(ts_sec,
+ ts_nsec,
+ trans_code,
+ trans_id),
+
+ TP_STRUCT__entry(
+ __field(u64, ts_sec)
+ __field(u32, ts_nsec)
+ __field(int, trans_code)
+ __field(int, trans_id)
+ ),
+
+ TP_fast_assign(
+ __entry->ts_sec = ts_sec;
+ __entry->ts_nsec = ts_nsec;
+ __entry->trans_code = trans_code;
+ __entry->trans_id = trans_id;
+ ),
+
+ TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code,
+ (int)__entry->ts_sec,
+ (int)__entry->ts_nsec,
+ __entry->trans_id)
+
+);
+
+TRACE_EVENT(mali_timeline_context_active,
+
+ TP_PROTO(u64 ts_sec,
+ u32 ts_nsec,
+ int count),
+
+ TP_ARGS(ts_sec,
+ ts_nsec,
+ count),
+
+ TP_STRUCT__entry(
+ __field(u64, ts_sec)
+ __field(u32, ts_nsec)
+ __field(int, count)
+ ),
+
+ TP_fast_assign(
+ __entry->ts_sec = ts_sec;
+ __entry->ts_nsec = ts_nsec;
+ __entry->count = count;
+ ),
+
+ TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE,
+ (int)__entry->ts_sec,
+ (int)__entry->ts_nsec,
+ __entry->count)
+);
+
+#endif /* _MALI_TIMELINE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h
new file mode 100644
index 00000000000..841d03fb587
--- /dev/null
+++ b/drivers/gpu/arm/midgard/mali_uk.h
@@ -0,0 +1,141 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_uk.h
+ * Types and definitions that are common across OSs for both the user
+ * and kernel side of the User-Kernel interface.
+ */
+
+#ifndef _UK_H_
+#define _UK_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @addtogroup base_api
+ * @{
+ */
+
+/**
+ * @defgroup uk_api User-Kernel Interface API
+ *
+ * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device
+ * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver.
+ *
+ * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent
+ * kernel-side API (UKK) via an OS-specific communication mechanism.
+ *
+ * This API is internal to the Midgard DDK and is not exposed to any applications.
+ *
+ * @{
+ */
+
+/**
+ * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The
+ * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this
+ * identifier to select a UKK client to the uku_open() function.
+ *
+ * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id
+ * enumeration and the uku_open() implemenation for the various OS ports need to be updated to
+ * provide a mapping of the identifier to the OS specific device name.
+ *
+ */
+enum uk_client_id {
+ /**
+ * Value used to identify the Base driver UK client.
+ */
+ UK_CLIENT_MALI_T600_BASE,
+
+ /** The number of uk clients supported. This must be the last member of the enum */
+ UK_CLIENT_COUNT
+};
+
+/**
+ * Each function callable through the UK interface has a unique number.
+ * Functions provided by UK clients start from number UK_FUNC_ID.
+ * Numbers below UK_FUNC_ID are used for internal UK functions.
+ */
+enum uk_func {
+ UKP_FUNC_ID_CHECK_VERSION, /**< UKK Core internal function */
+ /**
+ * Each UK client numbers the functions they provide starting from
+ * number UK_FUNC_ID. This number is then eventually assigned to the
+ * id field of the union uk_header structure when preparing to make a
+ * UK call. See your UK client for a list of their function numbers.
+ */
+ UK_FUNC_ID = 512
+};
+
+/**
+ * Arguments for a UK call are stored in a structure. This structure consists
+ * of a fixed size header and a payload. The header carries a 32-bit number
+ * identifying the UK function to be called (see uk_func). When the UKK client
+ * receives this header and executed the requested UK function, it will use
+ * the same header to store the result of the function in the form of a
+ * int return code. The size of this structure is such that the
+ * first member of the payload following the header can be accessed efficiently
+ * on a 32 and 64-bit kernel and the structure has the same size regardless
+ * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined
+ * accordingly in the OS specific mali_uk_os.h header file.
+ */
+union uk_header {
+ /**
+ * 32-bit number identifying the UK function to be called.
+ * Also see uk_func.
+ */
+ u32 id;
+ /**
+ * The int return code returned by the called UK function.
+ * See the specification of the particular UK function you are
+ * calling for the meaning of the error codes returned. All
+ * UK functions return 0 on success.
+ */
+ u32 ret;
+ /*
+ * Used to ensure 64-bit alignment of this union. Do not remove.
+ * This field is used for padding and does not need to be initialized.
+ */
+ u64 sizer;
+};
+
+/**
+ * This structure carries a 16-bit major and minor number and is sent along with an internal UK call
+ * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side.
+ */
+struct uku_version_check_args {
+ union uk_header header;
+ /**< UK call header */
+ u16 major;
+ /**< This field carries the user-side major version on input and the kernel-side major version on output */
+ u16 minor;
+ /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */
+ u8 padding[4];
+};
+
+/** @} end group uk_api */
+
+/** @} *//* end group base_api */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* _UK_H_ */
diff --git a/drivers/gpu/arm/midgard/platform/Kbuild b/drivers/gpu/arm/midgard/platform/Kbuild
new file mode 100644
index 00000000000..558657bbced
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/Kbuild
@@ -0,0 +1,21 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
+ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y)
+# remove begin and end quotes from the Kconfig string type
+ platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME))
+ obj-y += $(platform_name)/
+endif
diff --git a/drivers/gpu/arm/midgard/platform/Kconfig b/drivers/gpu/arm/midgard/platform/Kconfig
new file mode 100644
index 00000000000..8fb4e917c4f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/Kconfig
@@ -0,0 +1,24 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
+
+# Add your platform specific Kconfig file here
+#
+# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+#
+# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME
+#
+
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
new file mode 100644
index 00000000000..679945bec52
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
new file mode 100644
index 00000000000..046302e8e41
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c
@@ -0,0 +1,23 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+int kbase_platform_early_init(void)
+{
+ /* Nothing needed at this stage */
+ return 0;
+}
+
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
new file mode 100644
index 00000000000..f92961c53a0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h
@@ -0,0 +1,86 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (5000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (5000)
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value: NA
+ */
+#define CPU_SPEED_FUNC (NULL)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value: NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#define POWER_MODEL_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
new file mode 100644
index 00000000000..6f8a72555e3
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/pm_runtime.h>
+#include <linux/suspend.h>
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+ int ret;
+
+ dev_dbg(kbdev->dev, "pm_callback_power_on %p\n",
+ (void *)kbdev->dev->pm_domain);
+
+ ret = pm_runtime_get_sync(kbdev->dev);
+
+ dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret);
+
+ return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "pm_callback_power_off\n");
+
+ pm_runtime_put_autosuspend(kbdev->dev);
+}
+
+int kbase_device_runtime_init(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+ pm_runtime_enable(kbdev->dev);
+#ifdef CONFIG_MALI_MIDGARD_DEBUG_SYS
+ {
+ int err = kbase_platform_create_sysfs_file(kbdev->dev);
+
+ if (err)
+ return err;
+ }
+#endif /* CONFIG_MALI_MIDGARD_DEBUG_SYS */
+ return 0;
+}
+
+void kbase_device_runtime_disable(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+ pm_runtime_disable(kbdev->dev);
+}
+
+static int pm_callback_runtime_on(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+
+ return 0;
+}
+
+static void pm_callback_runtime_off(struct kbase_device *kbdev)
+{
+ dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+}
+
+static void pm_callback_resume(struct kbase_device *kbdev)
+{
+ int ret = pm_callback_runtime_on(kbdev);
+
+ WARN_ON(ret);
+}
+
+static void pm_callback_suspend(struct kbase_device *kbdev)
+{
+ pm_callback_runtime_off(kbdev);
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+ .power_on_callback = pm_callback_power_on,
+ .power_off_callback = pm_callback_power_off,
+ .power_suspend_callback = pm_callback_suspend,
+ .power_resume_callback = pm_callback_resume,
+#ifdef CONFIG_PM_RUNTIME
+ .power_runtime_init_callback = kbase_device_runtime_init,
+ .power_runtime_term_callback = kbase_device_runtime_disable,
+ .power_runtime_on_callback = pm_callback_runtime_on,
+ .power_runtime_off_callback = pm_callback_runtime_off,
+#else /* CONFIG_PM_RUNTIME */
+ .power_runtime_init_callback = NULL,
+ .power_runtime_term_callback = NULL,
+ .power_runtime_on_callback = NULL,
+ .power_runtime_off_callback = NULL,
+#endif /* CONFIG_PM_RUNTIME */
+};
+
+
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild b/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
new file mode 100644
index 00000000000..5b6ef37bb71
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild
@@ -0,0 +1,19 @@
+#
+# (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_juno_soc.o
+
+
+obj-m += juno_mali_opp.o
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c b/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
new file mode 100644
index 00000000000..ccfd8ccb012
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c
@@ -0,0 +1,84 @@
+/*
+ *
+ * (C) COPYRIGHT 2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/scpi_protocol.h>
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)
+#include <linux/pm_opp.h>
+#else /* Linux >= 3.13 */
+/* In 3.13 the OPP include header file, types, and functions were all
+ * renamed. Use the old filename for the include, and define the new names to
+ * the old, when an old kernel is detected.
+ */
+#include <linux/opp.h>
+#define dev_pm_opp_add opp_add
+#endif /* Linux >= 3.13 */
+
+
+static int init_juno_opps_from_scpi(struct device *dev)
+{
+ struct scpi_opp *sopp;
+ int i;
+
+ /* Hard coded for Juno. 2 is GPU domain */
+ sopp = scpi_dvfs_get_opps(2);
+ if (IS_ERR_OR_NULL(sopp))
+ return PTR_ERR(sopp);
+
+ for (i = 0; i < sopp->count; i++) {
+ struct scpi_opp_entry *e = &sopp->opp[i];
+
+ dev_info(dev, "Mali OPP from SCPI: %u Hz @ %u mV\n",
+ e->freq_hz, e->volt_mv);
+
+ dev_pm_opp_add(dev, e->freq_hz, e->volt_mv * 1000);
+ }
+
+ return 0;
+}
+
+static int juno_setup_opps(void)
+{
+ struct device_node *np;
+ struct platform_device *pdev;
+ int err;
+
+ np = of_find_node_by_name(NULL, "gpu");
+ if (!np) {
+ pr_err("Failed to find DT entry for Mali\n");
+ return -EFAULT;
+ }
+
+ pdev = of_find_device_by_node(np);
+ if (!pdev) {
+ pr_err("Failed to find device for Mali\n");
+ of_node_put(np);
+ return -EFAULT;
+ }
+
+ err = init_juno_opps_from_scpi(&pdev->dev);
+
+ of_node_put(np);
+
+ return err;
+}
+
+module_init(juno_setup_opps);
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
new file mode 100644
index 00000000000..86f2bb50caf
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c
@@ -0,0 +1,172 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#ifdef CONFIG_DEVFREQ_THERMAL
+#include <linux/devfreq_cooling.h>
+#endif
+#include <linux/thermal.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+/* Versatile Express (VE) Juno Development Platform */
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+ .job_irq_number = 65,
+ .mmu_irq_number = 66,
+ .gpu_irq_number = 64,
+ .io_memory_region = {
+ .start = 0x2D000000,
+ .end = 0x2D000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+ return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+ /* Cause a GPU hard reset to test whether we have actually idled the GPU
+ * and that we properly reconfigure the GPU on power up.
+ * Usually this would be dangerous, but if the GPU is working correctly it should
+ * be completely safe as the GPU should not be active at this point.
+ * However this is disabled normally because it will most likely interfere with
+ * bus logging etc.
+ */
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+ kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+ .power_on_callback = pm_callback_power_on,
+ .power_off_callback = pm_callback_power_off,
+ .power_suspend_callback = NULL,
+ .power_resume_callback = NULL
+};
+
+#ifdef CONFIG_DEVFREQ_THERMAL
+
+#define FALLBACK_STATIC_TEMPERATURE 55000
+
+static unsigned long juno_model_static_power(unsigned long voltage)
+{
+ struct thermal_zone_device *tz;
+ unsigned long temperature, temp;
+ unsigned long temp_squared, temp_cubed, temp_scaling_factor;
+ const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10);
+ const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10;
+
+ tz = thermal_zone_get_zone_by_name("gpu");
+ if (IS_ERR(tz)) {
+ pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n",
+ PTR_ERR(tz));
+ temperature = FALLBACK_STATIC_TEMPERATURE;
+ } else {
+ int ret;
+
+ ret = tz->ops->get_temp(tz, &temperature);
+ if (ret) {
+ pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n",
+ ret);
+ temperature = FALLBACK_STATIC_TEMPERATURE;
+ }
+ }
+
+ /* Calculate the temperature scaling factor. To be applied to the
+ * voltage scaled power.
+ */
+ temp = temperature / 1000;
+ temp_squared = temp * temp;
+ temp_cubed = temp_squared * temp;
+ temp_scaling_factor =
+ (2 * temp_cubed)
+ - (80 * temp_squared)
+ + (4700 * temp)
+ + 32000;
+
+ return (((coefficient * voltage_cubed) >> 20)
+ * temp_scaling_factor)
+ / 1000000;
+}
+
+static unsigned long juno_model_dynamic_power(unsigned long freq,
+ unsigned long voltage)
+{
+ /* The inputs: freq (f) is in Hz, and voltage (v) in mV.
+ * The coefficient (c) is in mW/(MHz mV mV).
+ *
+ * This function calculates the dynamic power after this formula:
+ * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz)
+ */
+ const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */
+ const unsigned long f_mhz = freq / 1000000; /* MHz */
+ const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */
+
+ return (coefficient * v2 * f_mhz) / 1000000; /* mW */
+}
+
+struct devfreq_cooling_ops juno_model_ops = {
+ .get_static_power = juno_model_static_power,
+ .get_dynamic_power = juno_model_dynamic_power,
+};
+
+#endif /* CONFIG_DEVFREQ_THERMAL */
+
+static int juno_secure_mode_enable(struct kbase_device *kbdev)
+{
+ /* TODO: enable secure mode */
+ /*dev_err(kbdev->dev, "SWITCHING TO SECURE\n");*/
+ return 0; /* all ok */
+}
+
+static int juno_secure_mode_disable(struct kbase_device *kbdev)
+{
+ /* TODO: Turn off secure mode and reset GPU */
+ /*dev_err(kbdev->dev, "SWITCHING TO NON-SECURE\n");*/
+ return 0; /* all ok */
+}
+
+struct kbase_secure_ops juno_secure_ops = {
+ .secure_mode_enable = juno_secure_mode_enable,
+ .secure_mode_disable = juno_secure_mode_disable,
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+ .io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+ return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+ /* Nothing needed at this stage */
+ return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
new file mode 100644
index 00000000000..fa5e9e9a5b1
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h
@@ -0,0 +1,94 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 600000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 600000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value: NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value: NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#ifdef CONFIG_DEVFREQ_THERMAL
+#define POWER_MODEL_CALLBACKS (&juno_model_ops)
+#else
+#define POWER_MODEL_CALLBACKS (NULL)
+#endif
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (&juno_secure_ops)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
+#ifdef CONFIG_DEVFREQ_THERMAL
+extern struct devfreq_cooling_ops juno_model_ops;
+#endif
+extern struct kbase_secure_ops juno_secure_ops;
diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
new file mode 100644
index 00000000000..7cb3be7f78c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h
@@ -0,0 +1,26 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * @brief Entry point to transfer control to a platform for early initialization
+ *
+ * This function is called early on in the initialization during execution of
+ * @ref kbase_driver_init.
+ *
+ * @return Zero to indicate success non-zero for failure.
+ */
+int kbase_platform_early_init(void);
diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
new file mode 100644
index 00000000000..01f9dfce93c
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h
@@ -0,0 +1,38 @@
+/*
+ *
+ * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifdef CONFIG_MALI_PLATFORM_FAKE
+
+/**
+ * kbase_platform_fake_register - Entry point for fake platform registration
+ *
+ * This function is called early on in the initialization during execution of
+ * kbase_driver_init.
+ *
+ * Return: 0 to indicate success, non-zero for failure.
+ */
+int kbase_platform_fake_register(void);
+
+/**
+ * kbase_platform_fake_unregister - Entry point for fake platform unregistration
+ *
+ * This function is called in the termination during execution of
+ * kbase_driver_exit.
+ */
+void kbase_platform_fake_unregister(void);
+
+#endif /* CONFIG_MALI_PLATFORM_FAKE */
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
new file mode 100644
index 00000000000..084a1561343
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
new file mode 100644
index 00000000000..ac5060af6a7
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX (5000)
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN (5000)
+
+/**
+ * Values used for determining the GPU frequency based on the LogicTile type
+ * Used by the function kbase_get_platform_logic_tile_type
+ */
+#define VE_VIRTEX6_GPU_FREQ_MIN 5000
+#define VE_VIRTEX6_GPU_FREQ_MAX 5000
+#define VE_VIRTEX7_GPU_FREQ_MIN 40000
+#define VE_VIRTEX7_GPU_FREQ_MAX 40000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value: NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value: NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#define POWER_MODEL_CALLBACKS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
new file mode 100644
index 00000000000..687b1a8c043
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c
@@ -0,0 +1,85 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+#include "mali_kbase_config_platform.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+ .job_irq_number = 68,
+ .mmu_irq_number = 69,
+ .gpu_irq_number = 70,
+ .io_memory_region = {
+ .start = 0xFC010000,
+ .end = 0xFC010000 + (4096 * 4) - 1
+ }
+};
+#endif /* CONFIG_OF */
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+ return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+ /* Cause a GPU hard reset to test whether we have actually idled the GPU
+ * and that we properly reconfigure the GPU on power up.
+ * Usually this would be dangerous, but if the GPU is working correctly it should
+ * be completely safe as the GPU should not be active at this point.
+ * However this is disabled normally because it will most likely interfere with
+ * bus logging etc.
+ */
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+ kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+ .power_on_callback = pm_callback_power_on,
+ .power_off_callback = pm_callback_power_off,
+ .power_suspend_callback = NULL,
+ .power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+ .io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+ return &versatile_platform_config;
+}
+
+
+int kbase_platform_early_init(void)
+{
+ /* Nothing needed at this stage */
+ return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
new file mode 100644
index 00000000000..9bc51f1e2da
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,210 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START (0x10000000)
+#define SYS_CFGDATA_OFFSET (0x000000A0)
+#define SYS_CFGCTRL_OFFSET (0x000000A4)
+#define SYS_CFGSTAT_OFFSET (0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31)
+#define READ_REG_BIT_VALUE (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE (2 << 0)
+#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0)
+#define SYS_CFG_ERROR_BIT_VALUE (1 << 1)
+
+#define FEED_REG_BIT_MASK (0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13)
+
+/* the following three values used for reading
+ * HBI value of the LogicTile daughterboard */
+#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000)
+#define VE_SYS_PROC_ID1_OFFSET (0x00000088)
+#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED (0)
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed
+ * @cpu_clock - the value of CPU clock speed in MHz
+ *
+ * Returns 0 on success, error code otherwise.
+ *
+ * The implementation is platform specific.
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+ int err = 0;
+ u32 reg_val = 0;
+ u32 osc2_value = 0;
+ u32 pa_divide = 0;
+ u32 pb_divide = 0;
+ u32 pc_divide = 0;
+ void __iomem *syscfg_reg = NULL;
+ void __iomem *scc_reg = NULL;
+
+ if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) {
+ *cpu_clock = cpu_clock_speed;
+ return 0;
+ }
+
+ /* Init the value in case something goes wrong */
+ *cpu_clock = 0;
+
+ /* Map CPU register into virtual memory */
+ syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000);
+ if (syscfg_reg == NULL) {
+ err = -EIO;
+ goto syscfg_reg_map_failed;
+ }
+
+ scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000);
+ if (scc_reg == NULL) {
+ err = -EIO;
+ goto scc_reg_map_failed;
+ }
+
+ raw_spin_lock(&syscfg_lock);
+
+ /* Read SYS regs - OSC2 */
+ reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET);
+
+ /* Check if there is any other undergoing request */
+ if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) {
+ err = -EBUSY;
+ goto ongoing_request;
+ }
+ /* Reset the CGFGSTAT reg */
+ writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET));
+
+ writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE |
+ DCC_DEFAULT_BIT_VALUE |
+ SYS_CFG_OSC_FUNC_BIT_VALUE |
+ SITE_DEFAULT_BIT_VALUE |
+ BOARD_STACK_POS_DEFAULT_BIT_VALUE |
+ DEVICE_DEFAULT_BIT_VALUE,
+ (syscfg_reg + SYS_CFGCTRL_OFFSET));
+ /* Wait for the transaction to complete */
+ while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) &
+ SYS_CFG_COMPLETE_BIT_VALUE))
+ ;
+ /* Read SYS_CFGSTAT Register to get the status of submitted
+ * transaction */
+ reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET);
+
+ if (reg_val & SYS_CFG_ERROR_BIT_VALUE) {
+ /* Error while setting register */
+ err = -EIO;
+ goto set_reg_error;
+ }
+
+ osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET);
+ /* Read the SCC CFGRW0 register */
+ reg_val = readl(scc_reg);
+
+ /*
+ * Select the appropriate feed:
+ * CFGRW0[0] - CLKOB
+ * CFGRW0[1] - CLKOC
+ * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL)
+ */
+ /* Calculate the FCLK */
+ if (IS_SINGLE_BIT_SET(reg_val, 0)) {
+ /* CFGRW0[0] - CLKOB */
+ /* CFGRW0[6:3] */
+ pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+ FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+ FCLK_PA_DIVIDE_BIT_SHIFT);
+ /* CFGRW0[10:7] */
+ pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+ FCLK_PB_DIVIDE_BIT_SHIFT)) >>
+ FCLK_PB_DIVIDE_BIT_SHIFT);
+ *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+ } else if (IS_SINGLE_BIT_SET(reg_val, 1)) {
+ /* CFGRW0[1] - CLKOC */
+ /* CFGRW0[6:3] */
+ pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+ FCLK_PA_DIVIDE_BIT_SHIFT)) >>
+ FCLK_PA_DIVIDE_BIT_SHIFT);
+ /* CFGRW0[14:11] */
+ pc_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+ FCLK_PC_DIVIDE_BIT_SHIFT)) >>
+ FCLK_PC_DIVIDE_BIT_SHIFT);
+ *cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1);
+ } else if (IS_SINGLE_BIT_SET(reg_val, 2)) {
+ /* CFGRW0[2] - FACLK */
+ /* CFGRW0[18:15] */
+ pa_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+ AXICLK_PA_DIVIDE_BIT_SHIFT)) >>
+ AXICLK_PA_DIVIDE_BIT_SHIFT);
+ /* CFGRW0[22:19] */
+ pb_divide = ((reg_val & (FEED_REG_BIT_MASK <<
+ AXICLK_PB_DIVIDE_BIT_SHIFT)) >>
+ AXICLK_PB_DIVIDE_BIT_SHIFT);
+ *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1);
+ } else {
+ err = -EIO;
+ }
+
+set_reg_error:
+ongoing_request:
+ raw_spin_unlock(&syscfg_lock);
+ *cpu_clock /= HZ_IN_MHZ;
+
+ if (!err)
+ cpu_clock_speed = *cpu_clock;
+
+ iounmap(scc_reg);
+
+scc_reg_map_failed:
+ iounmap(syscfg_reg);
+
+syscfg_reg_map_failed:
+
+ return err;
+}
+
+u32 kbase_get_platform_logic_tile_type(void)
+{
+ void __iomem *syscfg_reg = NULL;
+ u32 sys_procid1 = 0;
+
+ syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4);
+
+ sys_procid1 = (NULL != syscfg_reg) ? readl(syscfg_reg) : 0;
+
+ return sys_procid1 & VE_LOGIC_TILE_HBI_MASK;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
new file mode 100644
index 00000000000..ef9bfd72161
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,48 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+/**
+ * kbase_get_platform_logic_tile_type - determines which LogicTile type
+ * is used by Versatile Express
+ *
+ * When platform_config build parameter is specified as vexpress, i.e.,
+ * platform_config=vexpress, GPU frequency may vary dependent on the
+ * particular platform. The GPU frequency depends on the LogicTile type.
+ *
+ * This function is called by kbase_common_device_init to determine
+ * which LogicTile type is used by the platform by reading the HBI value
+ * of the daughterboard which holds the LogicTile:
+ *
+ * 0x192 HBI0192 Virtex-5
+ * 0x217 HBI0217 Virtex-6
+ * 0x247 HBI0247 Virtex-7
+ *
+ * Return: HBI value of the logic tile daughterboard, zero if not accessible
+ */
+u32 kbase_get_platform_logic_tile_type(void);
+
+#endif /* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
new file mode 100644
index 00000000000..d9bfabc28b6
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild
@@ -0,0 +1,16 @@
+#
+# (C) COPYRIGHT 2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+obj-y += mali_kbase_config_vexpress.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
new file mode 100644
index 00000000000..11c320ecc1f
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -0,0 +1,86 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 5000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 5000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value: NA
+ */
+#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value: NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#define POWER_MODEL_CALLBACKS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
new file mode 100644
index 00000000000..3ff0930fb4a
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c
@@ -0,0 +1,79 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+ .job_irq_number = 68,
+ .mmu_irq_number = 69,
+ .gpu_irq_number = 70,
+ .io_memory_region = {
+ .start = 0x2f010000,
+ .end = 0x2f010000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+ return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+ /* Cause a GPU hard reset to test whether we have actually idled the GPU
+ * and that we properly reconfigure the GPU on power up.
+ * Usually this would be dangerous, but if the GPU is working correctly it should
+ * be completely safe as the GPU should not be active at this point.
+ * However this is disabled normally because it will most likely interfere with
+ * bus logging etc.
+ */
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+ kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+ .power_on_callback = pm_callback_power_on,
+ .power_off_callback = pm_callback_power_off,
+ .power_suspend_callback = NULL,
+ .power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+ .io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+ return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+ /* Nothing needed at this stage */
+ return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
new file mode 100644
index 00000000000..0cb41ce8952
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild
@@ -0,0 +1,18 @@
+#
+# (C) COPYRIGHT 2012 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+
+obj-y += mali_kbase_config_vexpress.o
+obj-y += mali_kbase_cpu_vexpress.o
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
new file mode 100644
index 00000000000..9bc2985fef0
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -0,0 +1,88 @@
+/*
+ *
+ * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#include "mali_kbase_cpu_vexpress.h"
+
+/**
+ * Maximum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MAX 10000
+/**
+ * Minimum frequency GPU will be clocked at. Given in kHz.
+ * This must be specified as there is no default value.
+ *
+ * Attached value: number in kHz
+ * Default value: NA
+ */
+#define GPU_FREQ_KHZ_MIN 10000
+
+/**
+ * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock
+ *
+ * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_cpu_clk_speed_func.
+ * Default Value: NA
+ */
+#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed)
+
+/**
+ * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock
+ *
+ * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func
+ * for the function prototype.
+ *
+ * Attached value: A kbase_gpu_clk_speed_func.
+ * Default Value: NA
+ */
+#define GPU_SPEED_FUNC (NULL)
+
+/**
+ * Power management configuration
+ *
+ * Attached value: pointer to @ref kbase_pm_callback_conf
+ * Default value: See @ref kbase_pm_callback_conf
+ */
+#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
+
+/**
+ * Platform specific configuration functions
+ *
+ * Attached value: pointer to @ref kbase_platform_funcs_conf
+ * Default value: See @ref kbase_platform_funcs_conf
+ */
+#define PLATFORM_FUNCS (NULL)
+
+/** Power model for IPA
+ *
+ * Attached value: pointer to @ref mali_pa_model_ops
+ */
+#define POWER_MODEL_CALLBACKS (NULL)
+
+/**
+ * Secure mode switch
+ *
+ * Attached value: pointer to @ref kbase_secure_ops
+ */
+#define SECURE_CALLBACKS (NULL)
+
+extern struct kbase_pm_callback_conf pm_callbacks;
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
new file mode 100644
index 00000000000..76ffe4a1e59
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c
@@ -0,0 +1,83 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/ioport.h>
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_config.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HARD_RESET_AT_POWER_OFF 0
+
+#ifndef CONFIG_OF
+static struct kbase_io_resources io_resources = {
+ .job_irq_number = 75,
+ .mmu_irq_number = 76,
+ .gpu_irq_number = 77,
+ .io_memory_region = {
+ .start = 0x2F000000,
+ .end = 0x2F000000 + (4096 * 4) - 1}
+};
+#endif
+
+static int pm_callback_power_on(struct kbase_device *kbdev)
+{
+ /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */
+ return 1;
+}
+
+static void pm_callback_power_off(struct kbase_device *kbdev)
+{
+#if HARD_RESET_AT_POWER_OFF
+ /* Cause a GPU hard reset to test whether we have actually idled the GPU
+ * and that we properly reconfigure the GPU on power up.
+ * Usually this would be dangerous, but if the GPU is working correctly it should
+ * be completely safe as the GPU should not be active at this point.
+ * However this is disabled normally because it will most likely interfere with
+ * bus logging etc.
+ */
+ KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0);
+ kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET);
+#endif
+}
+
+struct kbase_pm_callback_conf pm_callbacks = {
+ .power_on_callback = pm_callback_power_on,
+ .power_off_callback = pm_callback_power_off,
+ .power_suspend_callback = NULL,
+ .power_resume_callback = NULL
+};
+
+static struct kbase_platform_config versatile_platform_config = {
+#ifndef CONFIG_OF
+ .io_resources = &io_resources
+#endif
+};
+
+struct kbase_platform_config *kbase_get_platform_config(void)
+{
+ return &versatile_platform_config;
+}
+
+int kbase_platform_early_init(void)
+{
+ /* Nothing needed at this stage */
+ return 0;
+}
+
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
new file mode 100644
index 00000000000..816dff49835
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c
@@ -0,0 +1,71 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#include <linux/io.h>
+#include <mali_kbase.h>
+#include "mali_kbase_cpu_vexpress.h"
+
+#define HZ_IN_MHZ (1000000)
+
+#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000)
+#define MOTHERBOARD_SYS_CFG_START (0x10000000)
+#define SYS_CFGDATA_OFFSET (0x000000A0)
+#define SYS_CFGCTRL_OFFSET (0x000000A4)
+#define SYS_CFGSTAT_OFFSET (0x000000A8)
+
+#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31)
+#define READ_REG_BIT_VALUE (0 << 30)
+#define DCC_DEFAULT_BIT_VALUE (0 << 26)
+#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20)
+#define SITE_DEFAULT_BIT_VALUE (1 << 16)
+#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12)
+#define DEVICE_DEFAULT_BIT_VALUE (2 << 0)
+#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0)
+#define SYS_CFG_ERROR_BIT_VALUE (1 << 1)
+
+#define FEED_REG_BIT_MASK (0x0F)
+#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03)
+#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07)
+#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B)
+#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F)
+#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13)
+
+#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos))
+
+#define CPU_CLOCK_SPEED_UNDEFINED 0
+
+#define CPU_CLOCK_SPEED_6XV7 50
+
+static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED;
+
+static DEFINE_RAW_SPINLOCK(syscfg_lock);
+/**
+ * kbase_get_vendor_specific_cpu_clock_speed
+ * @brief Retrieves the CPU clock speed.
+ * The implementation is platform specific.
+ * @param[out] cpu_clock - the value of CPU clock speed in MHz
+ * @return 0 on success, 1 otherwise
+*/
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock)
+{
+ /* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */
+ *cpu_clock = CPU_CLOCK_SPEED_6XV7;
+
+ return 0;
+}
diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
new file mode 100644
index 00000000000..23647ccb087
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h
@@ -0,0 +1,28 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+#ifndef _KBASE_CPU_VEXPRESS_H_
+#define _KBASE_CPU_VEXPRESS_H_
+
+/**
+ * Versatile Express implementation of @ref kbase_cpu_clk_speed_func.
+ */
+int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock);
+
+#endif /* _KBASE_CPU_VEXPRESS_H_ */
diff --git a/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
new file mode 100644
index 00000000000..5fa9b39c4bc
--- /dev/null
+++ b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h
@@ -0,0 +1,53 @@
+/*
+ *
+ * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+
+
+/**
+ * @file mali_ukk_os.h
+ * Types and definitions that are common for Linux OSs for the kernel side of the
+ * User-Kernel interface.
+ */
+
+#ifndef _UKK_OS_H_ /* Linux version */
+#define _UKK_OS_H_
+
+#include <linux/fs.h>
+
+/**
+ * @addtogroup uk_api User-Kernel Interface API
+ * @{
+ */
+
+/**
+ * @addtogroup uk_api_kernel UKK (Kernel side)
+ * @{
+ */
+
+/**
+ * Internal OS specific data structure associated with each UKK session. Part
+ * of a ukk_session object.
+ */
+typedef struct ukkp_session {
+ int dummy; /**< No internal OS specific data at this time */
+} ukkp_session;
+
+/** @} end group uk_api_kernel */
+
+/** @} end group uk_api */
+
+#endif /* _UKK_OS_H__ */
diff --git a/drivers/gpu/drm/pl111/Kbuild b/drivers/gpu/drm/pl111/Kbuild
new file mode 100644
index 00000000000..f10d58c70df
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Kbuild
@@ -0,0 +1,28 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+pl111_drm-y += pl111_drm_device.o \
+ pl111_drm_connector.o \
+ pl111_drm_crtc.o \
+ pl111_drm_cursor.o \
+ pl111_drm_dma_buf.o \
+ pl111_drm_encoder.o \
+ pl111_drm_fb.o \
+ pl111_drm_gem.o \
+ pl111_drm_pl111.o \
+ pl111_drm_platform.o \
+ pl111_drm_suspend.o \
+ pl111_drm_vma.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/drivers/gpu/drm/pl111/Kconfig b/drivers/gpu/drm/pl111/Kconfig
new file mode 100644
index 00000000000..60b465c56c5
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+config DRM_PL111
+ tristate "DRM Support for PL111 CLCD Controller"
+ depends on DRM
+ select DRM_KMS_HELPER
+ select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+ help
+ Choose this option for DRM support for the PL111 CLCD controller.
+ If M is selected the module will be called pl111_drm.
+
diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile
new file mode 100644
index 00000000000..2869f587266
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Makefile
@@ -0,0 +1,32 @@
+#
+# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# A copy of the licence is included with the program, and can also be obtained
+# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+#
+
+
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all: pl111_drm
+
+pl111_drm:
+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_USES_KDS=y CONFIG_DRM_PL111=m
+
+clean:
+ $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
diff --git a/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/drivers/gpu/drm/pl111/pl111_clcd_ext.h
new file mode 100644
index 00000000000..d3e0086ff02
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_clcd_ext.h
@@ -0,0 +1,95 @@
+/*
+ *
+ * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+/**
+ * pl111_clcd_ext.h
+ * Extended CLCD register definitions
+ */
+
+#ifndef PL111_CLCD_EXT_H_
+#define PL111_CLCD_EXT_H_
+
+/*
+ * PL111 cursor register definitions not defined in the kernel's clcd header.
+ *
+ * TODO MIDEGL-1718: move to include/linux/amba/clcd.h
+ */
+
+#define CLCD_CRSR_IMAGE 0x00000800
+#define CLCD_CRSR_IMAGE_MAX_WORDS 256
+#define CLCD_CRSR_IMAGE_WORDS_PER_LINE 4
+#define CLCD_CRSR_IMAGE_PIXELS_PER_WORD 16
+
+#define CLCD_CRSR_LBBP_COLOR_MASK 0x00000003
+#define CLCD_CRSR_LBBP_BACKGROUND 0x0
+#define CLCD_CRSR_LBBP_FOREGROUND 0x1
+#define CLCD_CRSR_LBBP_TRANSPARENT 0x2
+#define CLCD_CRSR_LBBP_INVERSE 0x3
+
+
+#define CLCD_CRSR_CTRL 0x00000c00
+#define CLCD_CRSR_CONFIG 0x00000c04
+#define CLCD_CRSR_PALETTE_0 0x00000c08
+#define CLCD_CRSR_PALETTE_1 0x00000c0c
+#define CLCD_CRSR_XY 0x00000c10
+#define CLCD_CRSR_CLIP 0x00000c14
+#define CLCD_CRSR_IMSC 0x00000c20
+#define CLCD_CRSR_ICR 0x00000c24
+#define CLCD_CRSR_RIS 0x00000c28
+#define CLCD_MIS 0x00000c2c
+
+#define CRSR_CTRL_CRSR_ON (1 << 0)
+#define CRSR_CTRL_CRSR_MAX 3
+#define CRSR_CTRL_CRSR_NUM_SHIFT 4
+#define CRSR_CTRL_CRSR_NUM_MASK \
+ (CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
+#define CRSR_CTRL_CURSOR_0 0
+#define CRSR_CTRL_CURSOR_1 1
+#define CRSR_CTRL_CURSOR_2 2
+#define CRSR_CTRL_CURSOR_3 3
+
+#define CRSR_CONFIG_CRSR_SIZE (1 << 0)
+#define CRSR_CONFIG_CRSR_FRAME_SYNC (1 << 1)
+
+#define CRSR_PALETTE_RED_SHIFT 0
+#define CRSR_PALETTE_GREEN_SHIFT 8
+#define CRSR_PALETTE_BLUE_SHIFT 16
+
+#define CRSR_PALETTE_RED_MASK 0x000000ff
+#define CRSR_PALETTE_GREEN_MASK 0x0000ff00
+#define CRSR_PALETTE_BLUE_MASK 0x00ff0000
+#define CRSR_PALETTE_MASK (~0xff000000)
+
+#define CRSR_XY_MASK 0x000003ff
+#define CRSR_XY_X_SHIFT 0
+#define CRSR_XY_Y_SHIFT 16
+
+#define CRSR_XY_X_MASK CRSR_XY_MASK
+#define CRSR_XY_Y_MASK (CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
+
+#define CRSR_CLIP_MASK 0x3f
+#define CRSR_CLIP_X_SHIFT 0
+#define CRSR_CLIP_Y_SHIFT 8
+
+#define CRSR_CLIP_X_MASK CRSR_CLIP_MASK
+#define CRSR_CLIP_Y_MASK (CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
+
+#define CRSR_IMSC_CRSR_IM (1<<0)
+#define CRSR_ICR_CRSR_IC (1<<0)
+#define CRSR_RIS_CRSR_RIS (1<<0)
+#define CRSR_MIS_CRSR_MIS (1<<0)
+
+#endif /* PL111_CLCD_EXT_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100644
index 00000000000..64d87b60ff6
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,270 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+#define DRIVER_AUTHOR "ARM Ltd."
+#define DRIVER_NAME "pl111_drm"
+#define DRIVER_DESC "DRM module for PL111"
+#define DRIVER_LICENCE "GPL"
+#define DRIVER_ALIAS "platform:pl111_drm"
+#define DRIVER_DATE "20101111"
+#define DRIVER_VERSION "0.2"
+#define DRIVER_MAJOR 2
+#define DRIVER_MINOR 1
+#define DRIVER_PATCHLEVEL 1
+
+/*
+ * Number of flips allowed in flight at any one time. Any more flips requested
+ * beyond this value will cause the caller to block until earlier flips have
+ * completed.
+ *
+ * For performance reasons, this must be greater than the number of buffers
+ * used in the rendering pipeline. Note that the rendering pipeline can contain
+ * different types of buffer, e.g.:
+ * - 2 final framebuffers
+ * - >2 geometry buffers for GPU use-cases
+ * - >2 vertex buffers for GPU use-cases
+ *
+ * For example, a system using 5 geometry buffers could have 5 flips in flight,
+ * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
+ *
+ * Whilst there may be more intermediate buffers (such as vertex/geometry) than
+ * final framebuffers, KDS is used to ensure that GPU rendering waits for the
+ * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
+ * produce tearing.
+ */
+
+/*
+ * Here, we choose a conservative value. A lower value is most likely
+ * suitable for GPU use-cases.
+ */
+#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
+
+#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
+
+struct pl111_drm_flip_resource;
+
+struct pl111_gem_bo_dma {
+ dma_addr_t fb_dev_addr;
+ void *fb_cpu_addr;
+};
+
+struct pl111_gem_bo_shm {
+ struct page **pages;
+ dma_addr_t *dma_addrs;
+};
+
+struct pl111_gem_bo {
+ struct drm_gem_object gem_object;
+ u32 type;
+ union {
+ struct pl111_gem_bo_dma dma;
+ struct pl111_gem_bo_shm shm;
+ } backing_data;
+ struct sg_table *sgt;
+};
+
+extern struct pl111_drm_dev_private priv;
+
+struct pl111_drm_framebuffer {
+ struct drm_framebuffer fb;
+ struct pl111_gem_bo *bo;
+};
+
+struct pl111_drm_flip_resource {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ /* This is the kds set associated to the dma_buf we want to flip */
+ struct kds_resource_set *kds_res_set;
+#endif
+ struct drm_framebuffer *fb;
+ struct drm_crtc *crtc;
+ struct list_head link;
+ bool page_flip;
+ struct drm_pending_vblank_event *event;
+};
+
+struct pl111_drm_crtc {
+ struct drm_crtc crtc;
+ int crtc_index;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ /* This protects "old_kds_res_set" and "displaying_fb" */
+ spinlock_t current_displaying_lock;
+ /*
+ * When a buffer is displayed its associated kds resource
+ * will be obtained and stored here. Every time a buffer
+ * flip is completed this old kds set is released and assigned
+ * the kds set of the new buffer.
+ */
+ struct kds_resource_set *old_kds_res_set;
+ /*
+ * Stores which frame buffer is currently being displayed by
+ * this CRTC or NULL if nothing is being displayed. It is used
+ * to tell whether we need to obtain a set of kds resources for
+ * exported buffer objects.
+ */
+ struct drm_framebuffer *displaying_fb;
+#endif
+ struct drm_display_mode *new_mode;
+ struct drm_display_mode *current_mode;
+ int last_bpp;
+
+ /*
+ * This spinlock protects "update_queue", "current_update_res"
+ * and calls to do_flip_to_res() which updates the CLCD base
+ * registers.
+ */
+ spinlock_t base_update_lock;
+ /*
+ * The resource that caused a base address update. Only one can be
+ * pending, hence it's != NULL if there's a pending update
+ */
+ struct pl111_drm_flip_resource *current_update_res;
+ /* Queue of things waiting to update the base address */
+ struct list_head update_queue;
+
+ void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
+ struct drm_framebuffer *fb);
+};
+
+struct pl111_drm_connector {
+ struct drm_connector connector;
+};
+
+struct pl111_drm_encoder {
+ struct drm_encoder encoder;
+};
+
+struct pl111_drm_dev_private {
+ struct pl111_drm_crtc *pl111_crtc;
+
+ struct amba_device *amba_dev;
+ unsigned long mmio_start;
+ __u32 mmio_len;
+ void *regs;
+ struct clk *clk;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ struct kds_callback kds_cb;
+ struct kds_callback kds_obtain_current_cb;
+#endif
+ /*
+ * Number of flips that were started in show_framebuffer_on_crtc(),
+ * but haven't completed yet - because we do deferred flipping
+ */
+ atomic_t nr_flips_in_flight;
+ wait_queue_head_t wait_for_flips;
+
+ /*
+ * Used to prevent race between pl111_dma_buf_release and
+ * drm_gem_prime_handle_to_fd
+ */
+ struct mutex export_dma_buf_lock;
+
+ uint32_t number_crtcs;
+
+ /* Cache for flip resources used to avoid kmalloc on each page flip */
+ struct kmem_cache *page_flip_slab;
+};
+
+enum pl111_cursor_size {
+ CURSOR_32X32,
+ CURSOR_64X64
+};
+
+enum pl111_cursor_sync {
+ CURSOR_SYNC_NONE,
+ CURSOR_SYNC_VSYNC
+};
+
+
+/**
+ * Buffer allocation function which is more flexible than dumb_create(),
+ * it allows passing driver specific flags to control the kind of buffer
+ * to be allocated.
+ */
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+
+/****** TODO MIDEGL-1718: this should be moved to uapi/include/drm/pl111_drm.h ********/
+
+/*
+ * Parameters for different buffer objects:
+ * bit [0]: backing storage
+ * (0 -> SHM)
+ * (1 -> DMA)
+ * bit [2:1]: kind of mapping
+ * (0x0 -> uncached)
+ * (0x1 -> write combine)
+ * (0x2 -> cached)
+ */
+#define PL111_BOT_MASK (0x7)
+#define PL111_BOT_SHM (0x0 << 0)
+#define PL111_BOT_DMA (0x1 << 0)
+#define PL111_BOT_UNCACHED (0x0 << 1)
+#define PL111_BOT_WC (0x1 << 1)
+#define PL111_BOT_CACHED (0x2 << 1)
+
+/**
+ * User-desired buffer creation information structure.
+ *
+ * @size: user-desired memory allocation size.
+ * - this size value would be page-aligned internally.
+ * @flags: user request for setting memory type or cache attributes as a bit op
+ * - PL111_BOT_DMA / PL111_BOT_SHM
+ * - PL111_BOT_UNCACHED / PL111_BOT_WC / PL111_BOT_CACHED
+ * @handle: returned a handle to created gem object.
+ * - this handle will be set by gem module of kernel side.
+ */
+struct drm_pl111_gem_create {
+ uint32_t height;
+ uint32_t width;
+ uint32_t bpp;
+ uint32_t flags;
+ /* handle, pitch, size will be returned */
+ uint32_t handle;
+ uint32_t pitch;
+ uint64_t size;
+};
+
+#define DRM_PL111_GEM_CREATE 0x00
+
+#define DRM_IOCTL_PL111_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + \
+ DRM_PL111_GEM_CREATE, struct drm_pl111_gem_create)
+/****************************************************************************/
+
+#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
+ (container_of(drm_fb, struct pl111_drm_framebuffer, fb))
+
+#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
+ (container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
+
+#define PL111_BO_FROM_GEM(gem_obj) \
+ container_of(gem_obj, struct pl111_gem_bo, gem_object)
+
+#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
+
+#define PL111_ENCODER_FROM_ENCODER(x) \
+ container_of(x, struct pl111_drm_encoder, encoder)
+
+#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
+ container_of(x, struct pl111_drm_connector, connector)
+
+#include "pl111_drm_funcs.h"
+
+#endif /* _PL111_DRM_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm_connector.c b/drivers/gpu/drm/pl111/pl111_drm_connector.c
new file mode 100644
index 00000000000..c7c3a226a86
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_connector.c
@@ -0,0 +1,170 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+
+static struct {
+ int w, h, type;
+} pl111_drm_modes[] = {
+ { 640, 480, DRM_MODE_TYPE_PREFERRED},
+ { 800, 600, 0},
+ {1024, 768, 0},
+ { -1, -1, -1}
+};
+
+void pl111_connector_destroy(struct drm_connector *connector)
+{
+ struct pl111_drm_connector *pl111_connector =
+ PL111_CONNECTOR_FROM_CONNECTOR(connector);
+
+ DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+ drm_sysfs_connector_remove(connector);
+ drm_connector_cleanup(connector);
+ kfree(pl111_connector);
+}
+
+enum drm_connector_status pl111_connector_detect(struct drm_connector
+ *connector, bool force)
+{
+ DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+ return connector_status_connected;
+}
+
+void pl111_connector_dpms(struct drm_connector *connector, int mode)
+{
+ DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+}
+
+struct drm_encoder *
+pl111_connector_helper_best_encoder(struct drm_connector *connector)
+{
+ DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+ if (connector->encoder != NULL) {
+ return connector->encoder; /* Return attached encoder */
+ } else {
+ /*
+ * If there is no attached encoder we choose the best candidate
+ * from the list.
+ * For PL111 there is only one encoder so we return the first
+ * one we find.
+ * Other h/w would require a suitable criterion below.
+ */
+ struct drm_encoder *encoder = NULL;
+ struct drm_device *dev = connector->dev;
+
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list,
+ head) {
+ if (1) { /* criterion ? */
+ break;
+ }
+ }
+ return encoder; /* return best candidate encoder */
+ }
+}
+
+int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+ int i = 0;
+ int count = 0;
+
+ DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+ while (pl111_drm_modes[i].w != -1) {
+ struct drm_display_mode *mode =
+ drm_mode_find_dmt(connector->dev,
+ pl111_drm_modes[i].w,
+ pl111_drm_modes[i].h,
+ 60
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+ , false
+#endif
+ );
+
+ if (mode != NULL) {
+ mode->type |= pl111_drm_modes[i].type;
+ drm_mode_probed_add(connector, mode);
+ count++;
+ }
+
+ i++;
+ }
+
+ DRM_DEBUG_KMS("found %d modes\n", count);
+
+ return count;
+}
+
+int pl111_connector_helper_mode_valid(struct drm_connector *connector,
+ struct drm_display_mode *mode)
+{
+ DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+ return MODE_OK;
+}
+
+const struct drm_connector_funcs connector_funcs = {
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .destroy = pl111_connector_destroy,
+ .detect = pl111_connector_detect,
+ .dpms = pl111_connector_dpms,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+ .get_modes = pl111_connector_helper_get_modes,
+ .mode_valid = pl111_connector_helper_mode_valid,
+ .best_encoder = pl111_connector_helper_best_encoder,
+};
+
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
+{
+ struct pl111_drm_connector *pl111_connector;
+
+ pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
+ GFP_KERNEL);
+
+ if (pl111_connector == NULL) {
+ pr_err("Failed to allocated pl111_drm_connector\n");
+ return NULL;
+ }
+
+ drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
+ DRM_MODE_CONNECTOR_DVII);
+
+ drm_connector_helper_add(&pl111_connector->connector,
+ &connector_helper_funcs);
+
+ drm_sysfs_connector_add(&pl111_connector->connector);
+
+ return pl111_connector;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/drivers/gpu/drm/pl111/pl111_drm_crtc.c
new file mode 100644
index 00000000000..ede07ff9fc0
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_crtc.c
@@ -0,0 +1,449 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_crtc.c
+ * Implementation of the CRTC functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static int pl111_crtc_num;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
+{
+ struct drm_device *dev = pl111_crtc->crtc.dev;
+ struct pl111_drm_flip_resource *old_flip_res;
+ struct pl111_gem_bo *bo;
+ unsigned long irq_flags;
+ int flips_in_flight;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ unsigned long flags;
+#endif
+
+ spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+
+ /*
+ * Cache the flip resource that caused the IRQ since it will be
+ * dispatched later. Early return if the IRQ isn't associated to
+ * a base register update.
+ *
+ * TODO MIDBASE-2790: disable IRQs when a flip is not pending.
+ */
+ old_flip_res = pl111_crtc->current_update_res;
+ if (!old_flip_res) {
+ spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+ return;
+ }
+ pl111_crtc->current_update_res = NULL;
+
+ /* Prepare the next flip (if any) of the queue as soon as possible. */
+ if (!list_empty(&pl111_crtc->update_queue)) {
+ struct pl111_drm_flip_resource *flip_res;
+ /* Remove the head of the list */
+ flip_res = list_first_entry(&pl111_crtc->update_queue,
+ struct pl111_drm_flip_resource, link);
+ list_del(&flip_res->link);
+ do_flip_to_res(flip_res);
+ /*
+ * current_update_res will be set, so guarentees that
+ * another flip_res coming in gets queued instead of
+ * handled immediately
+ */
+ }
+ spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+ /* Finalize properly the flip that caused the IRQ */
+ DRM_DEBUG_KMS("DRM Finalizing old_flip_res=%p\n", old_flip_res);
+
+ bo = PL111_BO_FROM_FRAMEBUFFER(old_flip_res->fb);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+ release_kds_resource_and_display(old_flip_res);
+ spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+ /* Release DMA buffer on this flip */
+
+ if (bo->gem_object.export_dma_buf != NULL)
+ dma_buf_put(bo->gem_object.export_dma_buf);
+
+ drm_handle_vblank(dev, pl111_crtc->crtc_index);
+
+ /* Wake up any processes waiting for page flip event */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0))
+ if (old_flip_res->event) {
+ spin_lock_bh(&dev->event_lock);
+ drm_send_vblank_event(dev, pl111_crtc->crtc_index,
+ old_flip_res->event);
+ spin_unlock_bh(&dev->event_lock);
+ }
+#else
+ if (old_flip_res->event) {
+ struct drm_pending_vblank_event *e = old_flip_res->event;
+ struct timeval now;
+ unsigned int seq;
+
+ DRM_DEBUG_KMS("%s: wake up page flip event (%p)\n", __func__,
+ old_flip_res->event);
+
+ spin_lock_bh(&dev->event_lock);
+ seq = drm_vblank_count_and_time(dev, pl111_crtc->crtc_index,
+ &now);
+ e->pipe = pl111_crtc->crtc_index;
+ e->event.sequence = seq;
+ e->event.tv_sec = now.tv_sec;
+ e->event.tv_usec = now.tv_usec;
+
+ list_add_tail(&e->base.link,
+ &e->base.file_priv->event_list);
+
+ wake_up_interruptible(&e->base.file_priv->event_wait);
+ spin_unlock_bh(&dev->event_lock);
+ }
+#endif
+
+ drm_vblank_put(dev, pl111_crtc->crtc_index);
+
+ /*
+ * workqueue.c:process_one_work():
+ * "It is permissible to free the struct work_struct from
+ * inside the function that is called from it"
+ */
+ kmem_cache_free(priv.page_flip_slab, old_flip_res);
+
+ flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
+ if (flips_in_flight == 0 ||
+ flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
+ wake_up(&priv.wait_for_flips);
+
+ DRM_DEBUG_KMS("DRM release flip_res=%p\n", old_flip_res);
+}
+
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
+{
+ struct pl111_drm_flip_resource *flip_res = cb1;
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+ pl111_crtc->show_framebuffer_cb(cb1, cb2);
+}
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb, bool page_flip,
+ struct drm_pending_vblank_event *event)
+{
+ struct pl111_gem_bo *bo;
+ struct pl111_drm_flip_resource *flip_res;
+ int flips_in_flight;
+ int old_flips_in_flight;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+ crtc->fb = fb;
+#else
+ crtc->primary->fb = fb;
+#endif
+
+ bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+ if (bo == NULL) {
+ DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
+ return -EINVAL;
+ }
+
+ /* If this is a full modeset, wait for all outstanding flips to complete
+ * before continuing. This avoids unnecessary complication from being
+ * able to queue up multiple modesets and queues of mixed modesets and
+ * page flips.
+ *
+ * Modesets should be uncommon and will not be performant anyway, so
+ * making them synchronous should have negligible performance impact.
+ */
+ if (!page_flip) {
+ int ret = wait_event_killable(priv.wait_for_flips,
+ atomic_read(&priv.nr_flips_in_flight) == 0);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * There can be more 'early display' flips in flight than there are
+ * buffers, and there is (currently) no explicit bound on the number of
+ * flips. Hence, we need a new allocation for each one.
+ *
+ * Note: this could be optimized down if we knew a bound on the flips,
+ * since an application can only have so many buffers in flight to be
+ * useful/not hog all the memory
+ */
+ flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
+ if (flip_res == NULL) {
+ pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * increment flips in flight, whilst blocking when we reach
+ * NR_FLIPS_IN_FLIGHT_THRESHOLD
+ */
+ do {
+ /*
+ * Note: use of assign-and-then-compare in the condition to set
+ * flips_in_flight
+ */
+ int ret = wait_event_killable(priv.wait_for_flips,
+ (flips_in_flight =
+ atomic_read(&priv.nr_flips_in_flight))
+ < NR_FLIPS_IN_FLIGHT_THRESHOLD);
+ if (ret != 0) {
+ kmem_cache_free(priv.page_flip_slab, flip_res);
+ return ret;
+ }
+
+ old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
+ flips_in_flight, flips_in_flight + 1);
+ } while (old_flips_in_flight != flips_in_flight);
+
+ flip_res->fb = fb;
+ flip_res->crtc = crtc;
+ flip_res->page_flip = page_flip;
+ flip_res->event = event;
+ INIT_LIST_HEAD(&flip_res->link);
+ DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ if (bo->gem_object.export_dma_buf != NULL) {
+ struct dma_buf *buf = bo->gem_object.export_dma_buf;
+ unsigned long shared[1] = { 0 };
+ struct kds_resource *resource_list[1] = {
+ get_dma_buf_kds_resource(buf) };
+ int err;
+
+ get_dma_buf(buf);
+ DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+
+ /* Wait for the KDS resource associated with this buffer */
+ err = kds_async_waitall(&flip_res->kds_res_set,
+ &priv.kds_cb, flip_res, fb, 1, shared,
+ resource_list);
+ BUG_ON(err);
+ } else {
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+ DRM_DEBUG_KMS("No dma_buf for this flip\n");
+
+ /* No dma-buf attached so just call the callback directly */
+ flip_res->kds_res_set = NULL;
+ pl111_crtc->show_framebuffer_cb(flip_res, fb);
+ }
+#else
+ if (bo->gem_object.export_dma_buf != NULL) {
+ struct dma_buf *buf = bo->gem_object.export_dma_buf;
+
+ get_dma_buf(buf);
+ DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+ } else {
+ DRM_DEBUG_KMS("No dma_buf for this flip\n");
+ }
+
+ /* No dma-buf attached to this so just call the callback directly */
+ {
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+ pl111_crtc->show_framebuffer_cb(flip_res, fb);
+ }
+#endif
+
+ /* For the same reasons as the wait at the start of this function,
+ * wait for the modeset to complete before continuing.
+ */
+ if (!page_flip) {
+ int ret = wait_event_killable(priv.wait_for_flips,
+ flips_in_flight == 0);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+ struct drm_pending_vblank_event *event)
+#else
+ struct drm_pending_vblank_event *event,
+ uint32_t flags)
+#endif
+{
+ DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
+ __func__, crtc, fb, event);
+ return show_framebuffer_on_crtc(crtc, fb, true, event);
+}
+
+int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode,
+ int x, int y, struct drm_framebuffer *old_fb)
+{
+ int ret;
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+ struct drm_display_mode *duplicated_mode;
+
+ DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
+ adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+ crtc->fb->bits_per_pixel);
+#else
+ crtc->primary->fb->bits_per_pixel);
+#endif
+
+ duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
+ if (!duplicated_mode)
+ return -ENOMEM;
+
+ pl111_crtc->new_mode = duplicated_mode;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+ ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
+#else
+ ret = show_framebuffer_on_crtc(crtc, crtc->primary->fb, false, NULL);
+#endif
+ if (ret != 0) {
+ pl111_crtc->new_mode = pl111_crtc->current_mode;
+ drm_mode_destroy(crtc->dev, duplicated_mode);
+ }
+
+ return ret;
+}
+
+void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
+{
+ DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+void pl111_crtc_helper_commit(struct drm_crtc *crtc)
+{
+ DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0))
+ const struct drm_display_mode *mode,
+#else
+ struct drm_display_mode *mode,
+#endif
+ struct drm_display_mode *adjusted_mode)
+{
+ DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+#ifdef CONFIG_ARCH_VEXPRESS
+ /*
+ * 1024x768 with more than 16 bits per pixel may not work
+ * correctly on Versatile Express due to bandwidth issues
+ */
+ if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0))
+ crtc->fb->bits_per_pixel > 16) {
+#else
+ crtc->primary->fb->bits_per_pixel > 16) {
+#endif
+ DRM_INFO("*WARNING* 1024x768 at > 16 bpp may suffer corruption\n");
+ }
+#endif
+
+ return true;
+}
+
+void pl111_crtc_helper_disable(struct drm_crtc *crtc)
+{
+ int ret;
+
+ DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+ /* don't disable crtc until no flips in flight as irq will be disabled */
+ ret = wait_event_killable(priv.wait_for_flips, atomic_read(&priv.nr_flips_in_flight) == 0);
+ if(ret) {
+ pr_err("pl111_crtc_helper_disable failed\n");
+ return;
+ }
+ clcd_disable(crtc);
+}
+
+void pl111_crtc_destroy(struct drm_crtc *crtc)
+{
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+ DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+ drm_crtc_cleanup(crtc);
+ kfree(pl111_crtc);
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+ .cursor_set = pl111_crtc_cursor_set,
+ .cursor_move = pl111_crtc_cursor_move,
+ .set_config = drm_crtc_helper_set_config,
+ .page_flip = pl111_crtc_page_flip,
+ .destroy = pl111_crtc_destroy
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+ .mode_set = pl111_crtc_helper_mode_set,
+ .prepare = pl111_crtc_helper_prepare,
+ .commit = pl111_crtc_helper_commit,
+ .mode_fixup = pl111_crtc_helper_mode_fixup,
+ .disable = pl111_crtc_helper_disable,
+};
+
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
+{
+ struct pl111_drm_crtc *pl111_crtc;
+
+ pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
+ if (pl111_crtc == NULL) {
+ pr_err("Failed to allocated pl111_drm_crtc\n");
+ return NULL;
+ }
+
+ drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
+ drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
+
+ pl111_crtc->crtc_index = pl111_crtc_num;
+ pl111_crtc_num++;
+ pl111_crtc->crtc.enabled = 0;
+ pl111_crtc->last_bpp = 0;
+ pl111_crtc->current_update_res = NULL;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ pl111_crtc->displaying_fb = NULL;
+ pl111_crtc->old_kds_res_set = NULL;
+ spin_lock_init(&pl111_crtc->current_displaying_lock);
+#endif
+ pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
+ INIT_LIST_HEAD(&pl111_crtc->update_queue);
+ spin_lock_init(&pl111_crtc->base_update_lock);
+
+ return pl111_crtc;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/drivers/gpu/drm/pl111/pl111_drm_cursor.c
new file mode 100644
index 00000000000..4bf20fec246
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_cursor.c
@@ -0,0 +1,331 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_cursor.c
+ * Implementation of cursor functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_clcd_ext.h"
+#include "pl111_drm.h"
+
+#define PL111_MAX_CURSOR_WIDTH (64)
+#define PL111_MAX_CURSOR_HEIGHT (64)
+
+#define ARGB_2_LBBP_BINARY_THRESHOLD (1 << 7)
+#define ARGB_ALPHA_SHIFT 24
+#define ARGB_ALPHA_MASK (0xff << ARGB_ALPHA_SHIFT)
+#define ARGB_RED_SHIFT 16
+#define ARGB_RED_MASK (0xff << ARGB_RED_SHIFT)
+#define ARGB_GREEN_SHIFT 8
+#define ARGB_GREEN_MASK (0xff << ARGB_GREEN_SHIFT)
+#define ARGB_BLUE_SHIFT 0
+#define ARGB_BLUE_MASK (0xff << ARGB_BLUE_SHIFT)
+
+
+void pl111_set_cursor_size(enum pl111_cursor_size size)
+{
+ u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+ if (size == CURSOR_64X64)
+ reg_data |= CRSR_CONFIG_CRSR_SIZE;
+ else
+ reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
+
+ writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
+{
+ u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+ if (sync == CURSOR_SYNC_VSYNC)
+ reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
+ else
+ reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
+
+ writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor(u32 cursor)
+{
+ u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+ reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
+ reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
+
+ writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_enable(bool enable)
+{
+ u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+ if (enable)
+ reg_data |= CRSR_CTRL_CRSR_ON;
+ else
+ reg_data &= ~CRSR_CTRL_CRSR_ON;
+
+ writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_position(u32 x, u32 y)
+{
+ u32 reg_data = (x & CRSR_XY_MASK) |
+ ((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
+
+ writel(reg_data, priv.regs + CLCD_CRSR_XY);
+}
+
+void pl111_set_cursor_clipping(u32 x, u32 y)
+{
+ u32 reg_data;
+
+ /*
+ * Do not allow setting clipping values larger than
+ * the cursor size since the cursor is already fully hidden
+ * when x,y = PL111_MAX_CURSOR_WIDTH.
+ */
+ if (x > PL111_MAX_CURSOR_WIDTH)
+ x = PL111_MAX_CURSOR_WIDTH;
+ if (y > PL111_MAX_CURSOR_WIDTH)
+ y = PL111_MAX_CURSOR_WIDTH;
+
+ reg_data = (x & CRSR_CLIP_MASK) |
+ ((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
+
+ writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
+}
+
+void pl111_set_cursor_palette(u32 color0, u32 color1)
+{
+ writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
+ writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
+}
+
+void pl111_cursor_enable(void)
+{
+ pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
+ pl111_set_cursor_size(CURSOR_64X64);
+ pl111_set_cursor_palette(0x0, 0x00ffffff);
+ pl111_set_cursor_enable(true);
+}
+
+void pl111_cursor_disable(void)
+{
+ pl111_set_cursor_enable(false);
+}
+
+/* shift required to locate pixel into the correct position in
+ * a cursor LBBP word, indexed by x mod 16.
+ */
+static const unsigned char
+x_mod_16_to_value_shift[CLCD_CRSR_IMAGE_PIXELS_PER_WORD] = {
+ 6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24
+};
+
+/* Pack the pixel value into its correct position in the buffer as specified
+ * for LBBP */
+static inline void
+set_lbbp_pixel(uint32_t *buffer, unsigned int x, unsigned int y,
+ uint32_t value)
+{
+ u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
+ uint32_t shift;
+ uint32_t data;
+
+ shift = x_mod_16_to_value_shift[x % CLCD_CRSR_IMAGE_PIXELS_PER_WORD];
+
+ /* Get the word containing this pixel */
+ cursor_ram = cursor_ram + (x >> CLCD_CRSR_IMAGE_WORDS_PER_LINE) + (y << 2);
+
+ /* Update pixel in cursor RAM */
+ data = readl(cursor_ram);
+ data &= ~(CLCD_CRSR_LBBP_COLOR_MASK << shift);
+ data |= value << shift;
+ writel(data, cursor_ram);
+}
+
+static u32 pl111_argb_to_lbbp(u32 argb_pix)
+{
+ u32 lbbp_pix = CLCD_CRSR_LBBP_TRANSPARENT;
+ u32 alpha = (argb_pix & ARGB_ALPHA_MASK) >> ARGB_ALPHA_SHIFT;
+ u32 red = (argb_pix & ARGB_RED_MASK) >> ARGB_RED_SHIFT;
+ u32 green = (argb_pix & ARGB_GREEN_MASK) >> ARGB_GREEN_SHIFT;
+ u32 blue = (argb_pix & ARGB_BLUE_MASK) >> ARGB_BLUE_SHIFT;
+
+ /*
+ * Converting from 8 pixel transparency to binary transparency
+ * it's the best we can achieve.
+ */
+ if (alpha & ARGB_2_LBBP_BINARY_THRESHOLD) {
+ u32 gray, max, min;
+
+ /*
+ * Convert to gray using the lightness method:
+ * gray = [max(R,G,B) + min(R,G,B)]/2
+ */
+ min = min(red, green);
+ min = min(min, blue);
+ max = max(red, green);
+ max = max(max, blue);
+ gray = (min + max) >> 1; /* divide by 2 */
+ /* Apply binary threshold to the gray value calculated */
+ if (gray & ARGB_2_LBBP_BINARY_THRESHOLD)
+ lbbp_pix = CLCD_CRSR_LBBP_FOREGROUND;
+ else
+ lbbp_pix = CLCD_CRSR_LBBP_BACKGROUND;
+ }
+
+ return lbbp_pix;
+}
+
+/*
+ * The PL111 hardware cursor supports only LBBP which is a 2bpp format but
+ * the cursor format from userspace is ARGB8888 so we need to convert
+ * to LBBP here.
+ */
+static void pl111_set_cursor_image(u32 *data)
+{
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+ /* Add 1 on width to insert trailing NULL */
+ char string_cursor[PL111_MAX_CURSOR_WIDTH + 1];
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+ unsigned int x;
+ unsigned int y;
+
+ for (y = 0; y < PL111_MAX_CURSOR_HEIGHT; y++) {
+ for (x = 0; x < PL111_MAX_CURSOR_WIDTH; x++) {
+ u32 value = pl111_argb_to_lbbp(*data);
+
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+ if (value == CLCD_CRSR_LBBP_TRANSPARENT)
+ string_cursor[x] = 'T';
+ else if (value == CLCD_CRSR_LBBP_FOREGROUND)
+ string_cursor[x] = 'F';
+ else if (value == CLCD_CRSR_LBBP_INVERSE)
+ string_cursor[x] = 'I';
+ else
+ string_cursor[x] = 'B';
+
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+ set_lbbp_pixel(data, x, y, value);
+ ++data;
+ }
+#ifdef ARGB_LBBP_CONVERSION_DEBUG
+ string_cursor[PL111_MAX_CURSOR_WIDTH] = '\0';
+ DRM_INFO("%s\n", string_cursor);
+#endif /* ARGB_LBBP_CONVERSION_DEBUG */
+ }
+}
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+ struct drm_file *file_priv,
+ uint32_t handle,
+ uint32_t width,
+ uint32_t height)
+{
+ struct drm_gem_object *obj;
+ struct pl111_gem_bo *bo;
+
+ DRM_DEBUG_KMS("handle = %u, width = %u, height = %u\n",
+ handle, width, height);
+
+ if (!handle) {
+ pl111_cursor_disable();
+ return 0;
+ }
+
+ if ((width != PL111_MAX_CURSOR_WIDTH) ||
+ (height != PL111_MAX_CURSOR_HEIGHT))
+ return -EINVAL;
+
+ obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+ if (!obj) {
+ DRM_ERROR("Cannot find cursor object for handle = %d\n",
+ handle);
+ return -ENOENT;
+ }
+
+ /*
+ * We expect a PL111_MAX_CURSOR_WIDTH x PL111_MAX_CURSOR_HEIGHT
+ * ARGB888 buffer object in the input.
+ *
+ */
+ if (obj->size < (PL111_MAX_CURSOR_WIDTH * PL111_MAX_CURSOR_HEIGHT * 4)) {
+ DRM_ERROR("Cannot set cursor with an obj size = %d\n",
+ obj->size);
+ drm_gem_object_unreference_unlocked(obj);
+ return -EINVAL;
+ }
+
+ bo = PL111_BO_FROM_GEM(obj);
+ if (!(bo->type & PL111_BOT_DMA)) {
+ DRM_ERROR("Tried to set cursor with non DMA backed obj = %p\n",
+ obj);
+ drm_gem_object_unreference_unlocked(obj);
+ return -EINVAL;
+ }
+
+ pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
+
+ /*
+ * Since we copy the contents of the buffer to the HW cursor internal
+ * memory this GEM object is not needed anymore.
+ */
+ drm_gem_object_unreference_unlocked(obj);
+
+ pl111_cursor_enable();
+
+ return 0;
+}
+
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+ int x, int y)
+{
+ int x_clip = 0;
+ int y_clip = 0;
+
+ DRM_DEBUG("x %d y %d\n", x, y);
+
+ /*
+ * The cursor image is clipped automatically at the screen limits when
+ * it extends beyond the screen image to the right or bottom but
+ * we must clip it using pl111 HW features for negative values.
+ */
+ if (x < 0) {
+ x_clip = -x;
+ x = 0;
+ }
+ if (y < 0) {
+ y_clip = -y;
+ y = 0;
+ }
+
+ pl111_set_cursor_clipping(x_clip, y_clip);
+ pl111_set_cursor_position(x, y);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_device.c b/drivers/gpu/drm/pl111/pl111_drm_device.c
new file mode 100644
index 00000000000..af92498784f
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_device.c
@@ -0,0 +1,336 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_device.c
+ * Implementation of the Linux device driver entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+struct pl111_drm_dev_private priv;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void initial_kds_obtained(void *cb1, void *cb2)
+{
+ wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
+ bool *cb_has_called = (bool *) cb2;
+
+ *cb_has_called = true;
+ wake_up(wait);
+}
+
+/* Must be called from within current_displaying_lock spinlock */
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
+{
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+ pl111_crtc->displaying_fb = flip_res->fb;
+
+ /* Release the previous buffer */
+ if (pl111_crtc->old_kds_res_set != NULL) {
+ /*
+ * Can flip to the same buffer, but must not release the current
+ * resource set
+ */
+ BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
+ kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+ }
+ /* Record the current buffer, to release on the next buffer flip */
+ pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
+}
+#endif
+
+void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+ DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+void pl111_drm_lastclose(struct drm_device *dev)
+{
+ DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+/*
+ * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
+ * and disable_vblank are just no op callbacks.
+ */
+static int pl111_enable_vblank(struct drm_device *dev, int crtc)
+{
+ DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+ return 0;
+}
+
+static void pl111_disable_vblank(struct drm_device *dev, int crtc)
+{
+ DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+}
+
+struct drm_mode_config_funcs mode_config_funcs = {
+ .fb_create = pl111_fb_create,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+ struct drm_mode_config *mode_config;
+ struct pl111_drm_dev_private *priv = dev->dev_private;
+ struct pl111_drm_connector *pl111_connector;
+ struct pl111_drm_encoder *pl111_encoder;
+ int ret = 0;
+
+ if (priv == NULL)
+ return -EINVAL;
+
+ drm_mode_config_init(dev);
+ mode_config = &dev->mode_config;
+ mode_config->funcs = &mode_config_funcs;
+ mode_config->min_width = 1;
+ mode_config->max_width = 1024;
+ mode_config->min_height = 1;
+ mode_config->max_height = 768;
+
+ priv->pl111_crtc = pl111_crtc_create(dev);
+ if (priv->pl111_crtc == NULL) {
+ pr_err("Failed to create pl111_drm_crtc\n");
+ ret = -ENOMEM;
+ goto out_config;
+ }
+
+ priv->number_crtcs = 1;
+
+ pl111_connector = pl111_connector_create(dev);
+ if (pl111_connector == NULL) {
+ pr_err("Failed to create pl111_drm_connector\n");
+ ret = -ENOMEM;
+ goto out_config;
+ }
+
+ pl111_encoder = pl111_encoder_create(dev, 1);
+ if (pl111_encoder == NULL) {
+ pr_err("Failed to create pl111_drm_encoder\n");
+ ret = -ENOMEM;
+ goto out_config;
+ }
+
+ ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
+ &pl111_encoder->encoder);
+ if (ret != 0) {
+ DRM_ERROR("Failed to attach encoder\n");
+ goto out_config;
+ }
+
+ pl111_connector->connector.encoder = &pl111_encoder->encoder;
+
+ goto finish;
+
+out_config:
+ drm_mode_config_cleanup(dev);
+finish:
+ DRM_DEBUG("%s returned %d\n", __func__, ret);
+ return ret;
+}
+
+static void pl111_modeset_fini(struct drm_device *dev)
+{
+ drm_mode_config_cleanup(dev);
+}
+
+static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
+{
+ int ret = 0;
+
+ pr_info("DRM %s\n", __func__);
+
+ mutex_init(&priv.export_dma_buf_lock);
+ atomic_set(&priv.nr_flips_in_flight, 0);
+ init_waitqueue_head(&priv.wait_for_flips);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
+ if (ret != 0) {
+ pr_err("Failed to initialise KDS callback\n");
+ goto finish;
+ }
+
+ ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
+ initial_kds_obtained);
+ if (ret != 0) {
+ pr_err("Failed to init KDS obtain callback\n");
+ kds_callback_term(&priv.kds_cb);
+ goto finish;
+ }
+#endif
+
+ /* Create a cache for page flips */
+ priv.page_flip_slab = kmem_cache_create("page flip slab",
+ sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
+ if (priv.page_flip_slab == NULL) {
+ DRM_ERROR("Failed to create slab\n");
+ ret = -ENOMEM;
+ goto out_kds_callbacks;
+ }
+
+ dev->dev_private = &priv;
+
+ ret = pl111_modeset_init(dev);
+ if (ret != 0) {
+ pr_err("Failed to init modeset\n");
+ goto out_slab;
+ }
+
+ ret = pl111_device_init(dev);
+ if (ret != 0) {
+ DRM_ERROR("Failed to init MMIO and IRQ\n");
+ goto out_modeset;
+ }
+
+ ret = drm_vblank_init(dev, 1);
+ if (ret != 0) {
+ DRM_ERROR("Failed to init vblank\n");
+ goto out_vblank;
+ }
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 13, 0))
+ platform_set_drvdata(dev->platformdev, dev);
+#endif
+
+ goto finish;
+
+out_vblank:
+ pl111_device_fini(dev);
+out_modeset:
+ pl111_modeset_fini(dev);
+out_slab:
+ kmem_cache_destroy(priv.page_flip_slab);
+out_kds_callbacks:
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ kds_callback_term(&priv.kds_obtain_current_cb);
+ kds_callback_term(&priv.kds_cb);
+#endif
+finish:
+ DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
+ return ret;
+}
+
+static int pl111_drm_unload(struct drm_device *dev)
+{
+ pr_info("DRM %s\n", __func__);
+
+ kmem_cache_destroy(priv.page_flip_slab);
+
+ drm_vblank_cleanup(dev);
+ pl111_modeset_fini(dev);
+ pl111_device_fini(dev);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ kds_callback_term(&priv.kds_obtain_current_cb);
+ kds_callback_term(&priv.kds_cb);
+#endif
+ return 0;
+}
+
+static struct vm_operations_struct pl111_gem_vm_ops = {
+ .fault = pl111_gem_fault,
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+ .open = drm_gem_vm_open,
+ .close = drm_gem_vm_close,
+#else
+ .open = pl111_gem_vm_open,
+ .close = pl111_gem_vm_close,
+#endif
+};
+
+static const struct file_operations drm_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ .mmap = pl111_gem_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+};
+
+static struct drm_ioctl_desc pl111_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(PL111_GEM_CREATE, pl111_drm_gem_create_ioctl,
+ DRM_CONTROL_ALLOW | DRM_UNLOCKED),
+};
+
+static struct drm_driver driver = {
+ .driver_features =
+ DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+ .load = pl111_drm_load,
+ .unload = pl111_drm_unload,
+ .context_dtor = NULL,
+ .preclose = pl111_drm_preclose,
+ .lastclose = pl111_drm_lastclose,
+ .suspend = pl111_drm_suspend,
+ .resume = pl111_drm_resume,
+ .get_vblank_counter = drm_vblank_count,
+ .enable_vblank = pl111_enable_vblank,
+ .disable_vblank = pl111_disable_vblank,
+ .ioctls = pl111_ioctls,
+ .fops = &drm_fops,
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+ .dumb_create = pl111_dumb_create,
+ .dumb_destroy = pl111_dumb_destroy,
+ .dumb_map_offset = pl111_dumb_map_offset,
+ .gem_free_object = pl111_gem_free_object,
+ .gem_vm_ops = &pl111_gem_vm_ops,
+ .prime_handle_to_fd = &pl111_prime_handle_to_fd,
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+ .gem_prime_export = &pl111_gem_prime_export,
+ .gem_prime_import = &pl111_gem_prime_import,
+};
+
+int pl111_drm_init(struct platform_device *dev)
+{
+ int ret;
+ pr_info("DRM %s\n", __func__);
+ pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
+ DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
+ driver.num_ioctls = ARRAY_SIZE(pl111_ioctls);
+ ret = 0;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 15, 0))
+ driver.kdriver.platform_device = dev;
+#endif
+ return drm_platform_init(&driver, dev);
+
+}
+
+void pl111_drm_exit(struct platform_device *dev)
+{
+ pr_info("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+ drm_platform_exit(&driver, dev);
+#else
+ drm_put_dev(platform_get_drvdata(dev));
+#endif
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
new file mode 100644
index 00000000000..1131f46b27d
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
@@ -0,0 +1,625 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_dma_buf.c
+ * Implementation of the dma_buf functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+#define export_dma_buf export_dma_buf
+#else
+#define export_dma_buf dma_buf
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void obtain_kds_if_currently_displayed(struct drm_device *dev,
+ struct pl111_gem_bo *bo,
+ struct dma_buf *dma_buf)
+{
+ unsigned long shared[1] = { 0 };
+ struct kds_resource *resource_list[1];
+ struct kds_resource_set *kds_res_set;
+ struct drm_crtc *crtc;
+ bool cb_has_called = false;
+ unsigned long flags;
+ int err;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+ DRM_DEBUG_KMS("Obtaining initial KDS res for bo:%p dma_buf:%p\n",
+ bo, dma_buf);
+
+ resource_list[0] = get_dma_buf_kds_resource(dma_buf);
+ get_dma_buf(dma_buf);
+
+ /*
+ * Can't use kds_waitall(), because kbase will be let through due to
+ * locked ignore'
+ */
+ err = kds_async_waitall(&kds_res_set,
+ &priv.kds_obtain_current_cb, &wake,
+ &cb_has_called, 1, shared, resource_list);
+ BUG_ON(err);
+ wait_event(wake, cb_has_called == true);
+
+ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+ spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+ if (pl111_crtc->displaying_fb) {
+ struct pl111_drm_framebuffer *pl111_fb;
+ struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+ pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+
+ if (pl111_fb->bo == bo) {
+ DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+ DRM_DEBUG_KMS(" is being displayed, keeping\n");
+ /* There shouldn't be a previous buffer to release */
+ BUG_ON(pl111_crtc->old_kds_res_set);
+
+ if (kds_res_set == NULL) {
+ err = kds_async_waitall(&kds_res_set,
+ &priv.kds_obtain_current_cb,
+ &wake, &cb_has_called,
+ 1, shared, resource_list);
+ BUG_ON(err);
+ wait_event(wake, cb_has_called == true);
+ }
+
+ /* Current buffer will need releasing on next flip */
+ pl111_crtc->old_kds_res_set = kds_res_set;
+
+ /*
+ * Clear kds_res_set, so a new kds_res_set is allocated
+ * for additional CRTCs
+ */
+ kds_res_set = NULL;
+ }
+ }
+ spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+ }
+
+ /* kds_res_set will be NULL here if any CRTCs are displaying fb */
+ if (kds_res_set != NULL) {
+ DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo);
+ DRM_DEBUG_KMS(" not being displayed, discarding\n");
+ /* They're not being displayed, release them */
+ kds_resource_set_release(&kds_res_set);
+ }
+
+ dma_buf_put(dma_buf);
+}
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0))
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+ struct vm_area_struct *vma)
+{
+ struct drm_gem_object *obj = buffer->priv;
+ struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+ struct drm_device *dev = obj->dev;
+ int ret;
+
+ DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+ mutex_lock(&dev->struct_mutex);
+ ret = drm_gem_mmap_obj(obj, obj->size, vma);
+ mutex_unlock(&dev->struct_mutex);
+ if (ret)
+ return ret;
+
+ return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#else
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+ struct vm_area_struct *vma)
+{
+ struct drm_gem_object *obj = buffer->priv;
+ struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+ struct drm_device *dev = obj->dev;
+
+ DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+ mutex_lock(&dev->struct_mutex);
+
+ /* Check for valid size. */
+ if (obj->size < vma->vm_end - vma->vm_start)
+ return -EINVAL;
+
+ BUG_ON(!dev->driver->gem_vm_ops);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0))
+ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
+#else
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+#endif
+
+ vma->vm_ops = dev->driver->gem_vm_ops;
+ vma->vm_private_data = obj;
+
+ /* Take a ref for this mapping of the object, so that the fault
+ * handler can dereference the mmap offset's pointer to the object.
+ * This reference is cleaned up by the corresponding vm_close
+ * (which should happen whether the vma was created by this call, or
+ * by a vm_open due to mremap or partial unmap or whatever).
+ */
+ drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+ pl111_drm_vm_open_locked(dev, vma);
+#else
+ drm_vm_open_locked(dev, vma);
+#endif
+
+ mutex_unlock(&dev->struct_mutex);
+
+ return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+#endif /* KERNEL_VERSION */
+
+static void pl111_dma_buf_release(struct dma_buf *buf)
+{
+ /*
+ * Need to release the dma_buf's reference on the gem object it was
+ * exported from, and also clear the gem object's export_dma_buf
+ * pointer to this dma_buf as it no longer exists
+ */
+ struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
+ struct pl111_gem_bo *bo;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ struct drm_crtc *crtc;
+ unsigned long flags;
+#endif
+ bo = PL111_BO_FROM_GEM(obj);
+
+ DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
+ head) {
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+ spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+ if (pl111_crtc->displaying_fb) {
+ struct pl111_drm_framebuffer *pl111_fb;
+ struct drm_framebuffer *fb = pl111_crtc->displaying_fb;
+
+ pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb);
+ if (pl111_fb->bo == bo) {
+ kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+ pl111_crtc->old_kds_res_set = NULL;
+ }
+ }
+ spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+ }
+#endif
+ mutex_lock(&priv.export_dma_buf_lock);
+
+ obj->export_dma_buf = NULL;
+ drm_gem_object_unreference_unlocked(obj);
+
+ mutex_unlock(&priv.export_dma_buf_lock);
+}
+
+static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
+ struct dma_buf_attachment *attach)
+{
+ DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
+ dev, attach);
+
+ attach->priv = dev;
+
+ return 0;
+}
+
+static void pl111_dma_buf_detach(struct dma_buf *buf,
+ struct dma_buf_attachment *attach)
+{
+ DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
+ attach);
+}
+
+/* Heavily from exynos_drm_dmabuf.c */
+static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
+ *attach,
+ enum dma_data_direction
+ direction)
+{
+ struct drm_gem_object *obj = attach->dmabuf->priv;
+ struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+ struct drm_device *dev = obj->dev;
+ int size, n_pages, nents;
+ struct scatterlist *s, *sg;
+ struct sg_table *sgt;
+ int ret, i;
+
+ DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+ attach, bo);
+
+ /*
+ * Nothing to do, if we are trying to map a dmabuf that has been imported.
+ * Just return the existing sgt.
+ */
+ if (obj->import_attach) {
+ BUG_ON(!bo->sgt);
+ return bo->sgt;
+ }
+
+ size = obj->size;
+ n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ if (bo->type & PL111_BOT_DMA) {
+ sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt) {
+ DRM_ERROR("Failed to allocate sg_table\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+ if (ret < 0) {
+ DRM_ERROR("Failed to allocate page table\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ sg_dma_len(sgt->sgl) = size;
+ /* We use DMA coherent mappings for PL111_BOT_DMA so we must
+ * use the virtual address returned at buffer allocation */
+ sg_set_buf(sgt->sgl, bo->backing_data.dma.fb_cpu_addr, size);
+ sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
+ } else { /* PL111_BOT_SHM */
+ struct page **pages;
+ int pg = 0;
+
+ mutex_lock(&dev->struct_mutex);
+ pages = get_pages(obj);
+ if (IS_ERR(pages)) {
+ dev_err(obj->dev->dev, "could not get pages: %ld\n",
+ PTR_ERR(pages));
+ return ERR_CAST(pages);
+ }
+ sgt = drm_prime_pages_to_sg(pages, n_pages);
+ if (sgt == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ pl111_gem_sync_to_dma(bo);
+
+ /*
+ * At this point the pages have been dma-mapped by either
+ * get_pages() for non cached maps or pl111_gem_sync_to_dma()
+ * for cached. So the physical addresses can be assigned
+ * to the sg entries.
+ * drm_prime_pages_to_sg() may have combined contiguous pages
+ * into chunks so we assign the physical address of the first
+ * page of a chunk to the chunk and check that the physical
+ * addresses of the rest of the pages in that chunk are also
+ * contiguous.
+ */
+ sg = sgt->sgl;
+ nents = sgt->nents;
+
+ for_each_sg(sg, s, nents, i) {
+ int j, n_pages_in_chunk = sg_dma_len(s) >> PAGE_SHIFT;
+
+ sg_dma_address(s) = bo->backing_data.shm.dma_addrs[pg];
+
+ for (j = pg+1; j < pg+n_pages_in_chunk; j++) {
+ BUG_ON(bo->backing_data.shm.dma_addrs[j] !=
+ bo->backing_data.shm.dma_addrs[j-1]+PAGE_SIZE);
+ }
+
+ pg += n_pages_in_chunk;
+ }
+
+ mutex_unlock(&dev->struct_mutex);
+ }
+ bo->sgt = sgt;
+ return sgt;
+}
+
+static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+ struct sg_table *sgt,
+ enum dma_data_direction direction)
+{
+ struct drm_gem_object *obj = attach->dmabuf->priv;
+ struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+
+ DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf,
+ attach, bo);
+
+ sg_free_table(sgt);
+ kfree(sgt);
+ bo->sgt = NULL;
+}
+
+/*
+ * There isn't any operation here that can sleep or fail so this callback can
+ * be used for both kmap and kmap_atomic implementations.
+ */
+static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long pageno)
+{
+ struct pl111_gem_bo *bo = dma_buf->priv;
+ void *vaddr = NULL;
+
+ /* Make sure we cannot access outside the memory range */
+ if (((pageno + 1) << PAGE_SHIFT) > bo->gem_object.size)
+ return NULL;
+
+ if (bo->type & PL111_BOT_DMA) {
+ vaddr = (bo->backing_data.dma.fb_cpu_addr +
+ (pageno << PAGE_SHIFT));
+ } else {
+ vaddr = page_address(bo->backing_data.shm.pages[pageno]);
+ }
+
+ return vaddr;
+}
+
+/*
+ * Find a scatterlist that starts in "start" and has "len"
+ * or return a NULL dma_handle.
+ */
+static dma_addr_t pl111_find_matching_sg(struct sg_table *sgt, size_t start,
+ size_t len)
+{
+ struct scatterlist *sg;
+ unsigned int count;
+ size_t size = 0;
+ dma_addr_t dma_handle = 0;
+
+ /* Find a scatterlist that starts in "start" and has "len"
+ * or return error */
+ for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+ if ((size == start) && (len == sg_dma_len(sg))) {
+ dma_handle = sg_dma_address(sg);
+ break;
+ }
+ size += sg_dma_len(sg);
+ }
+ return dma_handle;
+}
+
+static int pl111_dma_buf_begin_cpu(struct dma_buf *dma_buf,
+ size_t start, size_t len,
+ enum dma_data_direction dir)
+{
+ struct pl111_gem_bo *bo = dma_buf->priv;
+ struct sg_table *sgt = bo->sgt;
+ dma_addr_t dma_handle;
+
+ if ((start + len) > bo->gem_object.size)
+ return -EINVAL;
+
+ if (!(bo->type & PL111_BOT_SHM)) {
+ struct device *dev = bo->gem_object.dev->dev;
+
+ dma_handle = pl111_find_matching_sg(sgt, start, len);
+ if (!dma_handle)
+ return -EINVAL;
+
+ dma_sync_single_range_for_cpu(dev, dma_handle, 0, len, dir);
+ }
+ /* PL111_BOT_DMA uses coherents mappings, no need to sync */
+ return 0;
+}
+
+static void pl111_dma_buf_end_cpu(struct dma_buf *dma_buf,
+ size_t start, size_t len,
+ enum dma_data_direction dir)
+{
+ struct pl111_gem_bo *bo = dma_buf->priv;
+ struct sg_table *sgt = bo->sgt;
+ dma_addr_t dma_handle;
+
+ if ((start + len) > bo->gem_object.size)
+ return;
+
+ if (!(bo->type & PL111_BOT_DMA)) {
+ struct device *dev = bo->gem_object.dev->dev;
+
+ dma_handle = pl111_find_matching_sg(sgt, start, len);
+ if (!dma_handle)
+ return;
+
+ dma_sync_single_range_for_device(dev, dma_handle, 0, len, dir);
+ }
+ /* PL111_BOT_DMA uses coherents mappings, no need to sync */
+}
+
+static struct dma_buf_ops pl111_dma_buf_ops = {
+ .release = &pl111_dma_buf_release,
+ .attach = &pl111_dma_buf_attach,
+ .detach = &pl111_dma_buf_detach,
+ .map_dma_buf = &pl111_dma_buf_map_dma_buf,
+ .unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
+ .kmap_atomic = &pl111_dma_buf_kmap,
+ .kmap = &pl111_dma_buf_kmap,
+ .begin_cpu_access = &pl111_dma_buf_begin_cpu,
+ .end_cpu_access = &pl111_dma_buf_end_cpu,
+ .mmap = &pl111_dma_buf_mmap,
+};
+
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf)
+{
+ struct dma_buf_attachment *attachment;
+ struct drm_gem_object *obj;
+ struct pl111_gem_bo *bo;
+ struct scatterlist *sgl;
+ struct sg_table *sgt;
+ dma_addr_t cont_phys;
+ int ret = 0;
+ int i;
+
+ DRM_DEBUG_KMS("DRM %s on dev=%p dma_buf=%p\n", __func__, dev, dma_buf);
+
+ /* is this one of own objects? */
+ if (dma_buf->ops == &pl111_dma_buf_ops) {
+ obj = dma_buf->priv;
+ /* is it from our device? */
+ if (obj->dev == dev) {
+ /*
+ * Importing dmabuf exported from our own gem increases
+ * refcount on gem itself instead of f_count of dmabuf.
+ */
+ drm_gem_object_reference(obj);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+ /* before v3.10.0 we assume the caller has taken a ref on the dma_buf
+ * we don't want it for self-imported buffers so drop it here */
+ dma_buf_put(dma_buf);
+#endif
+
+ return obj;
+ }
+ }
+
+ attachment = dma_buf_attach(dma_buf, dev->dev);
+ if (IS_ERR(attachment))
+ return ERR_CAST(attachment);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+ /* from 3.10.0 we assume the caller has not taken a ref so we take one here */
+ get_dma_buf(dma_buf);
+#endif
+
+ sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);
+ if (IS_ERR_OR_NULL(sgt)) {
+ ret = PTR_ERR(sgt);
+ goto err_buf_detach;
+ }
+
+ bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+ if (!bo) {
+ DRM_ERROR("%s: failed to allocate buffer object.\n", __func__);
+ ret = -ENOMEM;
+ goto err_unmap_attach;
+ }
+
+ /* Find out whether the buffer is contiguous or not */
+ sgl = sgt->sgl;
+ cont_phys = sg_phys(sgl);
+ bo->type |= PL111_BOT_DMA;
+ for_each_sg(sgt->sgl, sgl, sgt->nents, i) {
+ dma_addr_t real_phys = sg_phys(sgl);
+ if (real_phys != cont_phys) {
+ bo->type &= ~PL111_BOT_DMA;
+ break;
+ }
+ cont_phys += (PAGE_SIZE - sgl->offset);
+ }
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+ ret = drm_gem_private_object_init(dev, &bo->gem_object,
+ dma_buf->size);
+ if (ret != 0) {
+ DRM_ERROR("DRM could not import DMA GEM obj\n");
+ goto err_free_buffer;
+ }
+#else
+ drm_gem_private_object_init(dev, &bo->gem_object, dma_buf->size);
+#endif
+
+ if (bo->type & PL111_BOT_DMA) {
+ bo->backing_data.dma.fb_cpu_addr = sg_virt(sgt->sgl);
+ bo->backing_data.dma.fb_dev_addr = sg_phys(sgt->sgl);
+ DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, contiguous import\n", __func__, bo);
+ } else { /* PL111_BOT_SHM */
+ DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, non contiguous import\n", __func__, bo);
+ }
+
+ bo->gem_object.import_attach = attachment;
+ bo->sgt = sgt;
+
+ return &bo->gem_object;
+
+err_free_buffer:
+ kfree(bo);
+ bo = NULL;
+err_unmap_attach:
+ dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
+err_buf_detach:
+ dma_buf_detach(dma_buf, attachment);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0))
+ /* from 3.10.0 we will have taken a ref so drop it here */
+ dma_buf_put(dma_buf);
+#endif
+ return ERR_PTR(ret);
+}
+
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+ struct drm_gem_object *obj, int flags)
+{
+ struct dma_buf *new_buf;
+ struct pl111_gem_bo *bo;
+ size_t size;
+
+ DRM_DEBUG("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
+ size = obj->size;
+
+ new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
+ flags | O_RDWR);
+ bo = PL111_BO_FROM_GEM(new_buf->priv);
+
+ /*
+ * bo->gem_object.export_dma_buf not setup until after gem_prime_export
+ * finishes
+ */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ /*
+ * Ensure that we hold the kds resource if it's the currently
+ * displayed buffer.
+ */
+ obtain_kds_if_currently_displayed(dev, bo, new_buf);
+#endif
+
+ DRM_DEBUG("Created dma_buf %p\n", new_buf);
+
+ return new_buf;
+}
+
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+ uint32_t handle, uint32_t flags, int *prime_fd)
+{
+ int result;
+ /*
+ * This will re-use any existing exports, and calls
+ * driver->gem_prime_export to do the first export when needed
+ */
+ DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
+ file_priv, handle);
+
+ mutex_lock(&priv.export_dma_buf_lock);
+ result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
+ prime_fd);
+ mutex_unlock(&priv.export_dma_buf_lock);
+
+ return result;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/drivers/gpu/drm/pl111/pl111_drm_encoder.c
new file mode 100644
index 00000000000..78c91c08106
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_encoder.c
@@ -0,0 +1,107 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_encoder.c
+ * Implementation of the encoder functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+ return true;
+}
+
+void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
+{
+ DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_commit(struct drm_encoder *encoder)
+{
+ DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
+{
+ DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_disable(struct drm_encoder *encoder)
+{
+ DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_destroy(struct drm_encoder *encoder)
+{
+ struct pl111_drm_encoder *pl111_encoder =
+ PL111_ENCODER_FROM_ENCODER(encoder);
+
+ DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+
+ drm_encoder_cleanup(encoder);
+ kfree(pl111_encoder);
+}
+
+const struct drm_encoder_funcs encoder_funcs = {
+ .destroy = pl111_encoder_destroy,
+};
+
+const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+ .mode_fixup = pl111_encoder_helper_mode_fixup,
+ .prepare = pl111_encoder_helper_prepare,
+ .commit = pl111_encoder_helper_commit,
+ .mode_set = pl111_encoder_helper_mode_set,
+ .disable = pl111_encoder_helper_disable,
+};
+
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+ int possible_crtcs)
+{
+ struct pl111_drm_encoder *pl111_encoder;
+
+ pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
+ if (pl111_encoder == NULL) {
+ pr_err("Failed to allocated pl111_drm_encoder\n");
+ return NULL;
+ }
+
+ drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
+ DRM_MODE_ENCODER_DAC);
+
+ drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
+
+ pl111_encoder->encoder.possible_crtcs = possible_crtcs;
+
+ return pl111_encoder;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_fb.c b/drivers/gpu/drm/pl111/pl111_drm_fb.c
new file mode 100644
index 00000000000..f575c9e5f7c
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_fb.c
@@ -0,0 +1,202 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_fb.c
+ * Implementation of the framebuffer functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_crtc.h>
+#include "pl111_drm.h"
+
+static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+ struct pl111_drm_framebuffer *pl111_fb;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ struct drm_crtc *crtc;
+ unsigned long flags;
+#endif
+ DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
+
+ pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
+
+ /*
+ * Because flips are deferred, wait for all previous flips to complete
+ */
+ wait_event(priv.wait_for_flips,
+ atomic_read(&priv.nr_flips_in_flight) == 0);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ /*
+ * Release KDS resources if it's currently being displayed. Only occurs
+ * when the last framebuffer is destroyed.
+ */
+ list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
+ head) {
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+ spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+ if (pl111_crtc->displaying_fb == framebuffer) {
+ /* Release the current buffers */
+ if (pl111_crtc->old_kds_res_set != NULL) {
+ DRM_DEBUG_KMS("Releasing KDS resources for ");
+ DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
+ kds_resource_set_release(
+ &pl111_crtc->old_kds_res_set);
+ }
+ pl111_crtc->old_kds_res_set = NULL;
+ }
+ spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock,
+ flags);
+ }
+#endif
+ drm_framebuffer_cleanup(framebuffer);
+
+ if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
+ drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
+
+ kfree(pl111_fb);
+
+ DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
+}
+
+static int pl111_fb_create_handle(struct drm_framebuffer *fb,
+ struct drm_file *file_priv,
+ unsigned int *handle)
+{
+ struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+ DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
+
+ if (bo == NULL)
+ return -EINVAL;
+
+ return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+}
+
+const struct drm_framebuffer_funcs fb_funcs = {
+ .destroy = pl111_fb_destroy,
+ .create_handle = pl111_fb_create_handle,
+};
+
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+ struct drm_file *file_priv,
+ struct drm_mode_fb_cmd2 *mode_cmd)
+{
+ struct pl111_drm_framebuffer *pl111_fb = NULL;
+ struct drm_framebuffer *fb = NULL;
+ struct drm_gem_object *gem_obj;
+ struct pl111_gem_bo *bo;
+ int err = 0;
+ size_t min_size;
+ int bpp;
+ int depth;
+
+ pr_info("DRM %s\n", __func__);
+ gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+ if (gem_obj == NULL) {
+ DRM_ERROR("Could not get gem obj from handle to create fb\n");
+ err = -ENOENT;
+ goto error;
+ }
+
+ bo = PL111_BO_FROM_GEM(gem_obj);
+ drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp);
+
+ if (mode_cmd->pitches[0] < mode_cmd->width * (bpp >> 3)) {
+ DRM_ERROR("bad pitch %u for plane 0\n", mode_cmd->pitches[0]);
+ err = -EINVAL;
+ goto error;
+ }
+
+ min_size = (mode_cmd->height - 1) * mode_cmd->pitches[0]
+ + mode_cmd->width * (bpp >> 3);
+
+ if (bo->gem_object.size < min_size) {
+ DRM_ERROR("gem obj size < min size\n");
+ err = -EINVAL;
+ goto error;
+ }
+
+ /* We can't scan out SHM so we can't create an fb for it */
+ if (!(bo->type & PL111_BOT_DMA)) {
+ DRM_ERROR("Can't create FB for non-scanout buffer\n");
+ err = -EINVAL;
+ goto error;
+ }
+
+ switch ((char)(mode_cmd->pixel_format & 0xFF)) {
+ case 'Y':
+ case 'U':
+ case 'V':
+ case 'N':
+ case 'T':
+ DRM_ERROR("YUV formats not supported\n");
+ err = -EINVAL;
+ goto error;
+ }
+
+ pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
+ if (pl111_fb == NULL) {
+ DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
+ err = -ENOMEM;
+ goto error;
+ }
+ fb = &pl111_fb->fb;
+
+ err = drm_framebuffer_init(dev, fb, &fb_funcs);
+ if (err) {
+ DRM_ERROR("drm_framebuffer_init failed\n");
+ kfree(fb);
+ fb = NULL;
+ goto error;
+ }
+
+ drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+ /* The only framebuffer formats supported by pl111
+ * are 16 bpp or 32 bpp with 24 bit depth.
+ * See clcd_enable()
+ */
+ if (!((fb->bits_per_pixel == 16) ||
+ (fb->bits_per_pixel == 32 && fb->depth == 24))) {
+ DRM_DEBUG_KMS("unsupported pixel format bpp=%d, depth=%d\n", fb->bits_per_pixel, fb->depth);
+ drm_framebuffer_cleanup(fb);
+ kfree(fb);
+ fb = NULL;
+ err = -EINVAL;
+ goto error;
+ }
+
+ pl111_fb->bo = bo;
+
+ DRM_DEBUG_KMS("Created fb 0x%p with gem_obj 0x%p physaddr=0x%.8x\n",
+ fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
+
+ return fb;
+
+error:
+ if (gem_obj != NULL)
+ drm_gem_object_unreference_unlocked(gem_obj);
+
+ return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/drivers/gpu/drm/pl111/pl111_drm_funcs.h
new file mode 100644
index 00000000000..494baa0d057
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_funcs.h
@@ -0,0 +1,130 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_funcs.h
+ * Function prototypes for PL111 DRM
+ */
+
+#ifndef PL111_DRM_FUNCS_H_
+#define PL111_DRM_FUNCS_H_
+
+/* Platform Initialisation */
+int pl111_drm_init(struct platform_device *dev);
+void pl111_drm_exit(struct platform_device *dev);
+
+/* KDS Callbacks */
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
+
+/* CRTC Functions */
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
+struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
+void pl111_crtc_destroy(struct drm_crtc *crtc);
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+ struct drm_framebuffer *fb);
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb, bool page_flip,
+ struct drm_pending_vblank_event *event);
+
+/* Common IRQ handler */
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
+
+int pl111_crtc_cursor_set(struct drm_crtc *crtc,
+ struct drm_file *file_priv,
+ uint32_t handle,
+ uint32_t width,
+ uint32_t height);
+int pl111_crtc_cursor_move(struct drm_crtc *crtc,
+ int x, int y);
+
+/* Connector Functions */
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
+void pl111_connector_destroy(struct drm_connector *connector);
+struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
+ *dev);
+
+/* Encoder Functions */
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+ int possible_crtcs);
+struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
+ int possible_crtcs);
+void pl111_encoder_destroy(struct drm_encoder *encoder);
+
+/* Frame Buffer Functions */
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+ struct drm_file *file_priv,
+ struct drm_mode_fb_cmd2 *mode_cmd);
+
+/* VMA Functions */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
+struct page **get_pages(struct drm_gem_object *obj);
+void put_pages(struct drm_gem_object *obj, struct page **pages);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+ struct vm_area_struct *vma);
+void pl111_gem_vm_open(struct vm_area_struct *vma);
+void pl111_gem_vm_close(struct vm_area_struct *vma);
+#endif
+
+/* Suspend Functions */
+int pl111_drm_resume(struct drm_device *dev);
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
+
+/* GEM Functions */
+int pl111_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args);
+int pl111_dumb_destroy(struct drm_file *file_priv,
+ struct drm_device *dev, uint32_t handle);
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+ struct drm_device *dev, uint32_t handle,
+ uint64_t *offset);
+void pl111_gem_free_object(struct drm_gem_object *obj);
+
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+ struct vm_area_struct *vma, size_t size);
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff);
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo);
+
+/* DMA BUF Functions */
+struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf);
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+ uint32_t handle, uint32_t flags, int *prime_fd);
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+ struct drm_gem_object *obj, int flags);
+
+/* Pl111 Functions */
+void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
+ *flip_res, struct drm_framebuffer *fb);
+int clcd_disable(struct drm_crtc *crtc);
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
+int pl111_amba_remove(struct amba_device *dev);
+
+int pl111_device_init(struct drm_device *dev);
+void pl111_device_fini(struct drm_device *dev);
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+ struct clcd_regs *timing);
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+ struct drm_display_mode *mode);
+#endif /* PL111_DRM_FUNCS_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm_gem.c b/drivers/gpu/drm/pl111/pl111_drm_gem.c
new file mode 100644
index 00000000000..13fb2560569
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_gem.c
@@ -0,0 +1,476 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_gem.c
+ * Implementation of the GEM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include "pl111_drm.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+#include <linux/dma-attrs.h>
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0))
+#include <drm/drm_vma_manager.h>
+#endif
+
+void pl111_gem_free_object(struct drm_gem_object *obj)
+{
+ struct pl111_gem_bo *bo;
+ struct drm_device *dev = obj->dev;
+ DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
+
+ bo = PL111_BO_FROM_GEM(obj);
+
+ if (obj->import_attach)
+ drm_prime_gem_destroy(obj, bo->sgt);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+ if (obj->map_list.map != NULL)
+ drm_gem_free_mmap_offset(obj);
+#else
+ drm_gem_free_mmap_offset(obj);
+#endif
+ /*
+ * Only free the backing memory if the object has not been imported.
+ * If it has been imported, the exporter is in charge to free that
+ * once dmabuf's refcount becomes 0.
+ */
+ if (obj->import_attach)
+ goto imported_out;
+
+ if (bo->type & PL111_BOT_DMA) {
+ dma_free_writecombine(dev->dev, obj->size,
+ bo->backing_data.dma.fb_cpu_addr,
+ bo->backing_data.dma.fb_dev_addr);
+ } else if (bo->backing_data.shm.pages != NULL) {
+ put_pages(obj, bo->backing_data.shm.pages);
+ }
+
+imported_out:
+ drm_gem_object_release(obj);
+
+ kfree(bo);
+
+ DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
+}
+
+static int pl111_gem_object_create(struct drm_device *dev, u64 size,
+ u32 flags, struct drm_file *file_priv,
+ u32 *handle)
+{
+ int ret = 0;
+ struct pl111_gem_bo *bo = NULL;
+
+ bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+ if (bo == NULL) {
+ ret = -ENOMEM;
+ goto finish;
+ }
+
+ bo->type = flags;
+
+#ifndef ARCH_HAS_SG_CHAIN
+ /*
+ * If the ARCH can't chain we can't have non-contiguous allocs larger
+ * than a single sg can hold.
+ * In this case we fall back to using contiguous memory
+ */
+ if (!(bo->type & PL111_BOT_DMA)) {
+ long unsigned int n_pages =
+ PAGE_ALIGN(size) >> PAGE_SHIFT;
+ if (n_pages > SG_MAX_SINGLE_ALLOC) {
+ bo->type |= PL111_BOT_DMA;
+ /*
+ * Non-contiguous allocation request changed to
+ * contigous
+ */
+ DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
+ n_pages, SG_MAX_SINGLE_ALLOC);
+ }
+ }
+#endif
+ if (bo->type & PL111_BOT_DMA) {
+ /* scanout compatible - use physically contiguous buffer */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+ DEFINE_DMA_ATTRS(attrs);
+
+ dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+ bo->backing_data.dma.fb_cpu_addr =
+ dma_alloc_attrs(dev->dev, size,
+ &bo->backing_data.dma.fb_dev_addr,
+ GFP_KERNEL,
+ &attrs);
+ if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+ DRM_ERROR("dma_alloc_attrs failed\n");
+ ret = -ENOMEM;
+ goto free_bo;
+ }
+#else
+ bo->backing_data.dma.fb_cpu_addr =
+ dma_alloc_writecombine(dev->dev, size,
+ &bo->backing_data.dma.fb_dev_addr,
+ GFP_KERNEL);
+ if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+ DRM_ERROR("dma_alloc_writecombine failed\n");
+ ret = -ENOMEM;
+ goto free_bo;
+ }
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+ ret = drm_gem_private_object_init(dev, &bo->gem_object,
+ size);
+ if (ret != 0) {
+ DRM_ERROR("DRM could not initialise GEM object\n");
+ goto free_dma;
+ }
+#else
+ drm_gem_private_object_init(dev, &bo->gem_object, size);
+#endif
+
+ } else { /* PL111_BOT_SHM */
+ /* not scanout compatible - use SHM backed object */
+ ret = drm_gem_object_init(dev, &bo->gem_object, size);
+ if (ret != 0) {
+ DRM_ERROR("DRM could not init SHM backed GEM obj\n");
+ ret = -ENOMEM;
+ goto free_bo;
+ }
+ DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
+ }
+
+ DRM_DEBUG("s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
+ size, flags,
+ (bo->type & PL111_BOT_DMA) ? "physaddr" : "shared page array",
+ (bo->type & PL111_BOT_DMA) ?
+ (unsigned long)bo->backing_data.dma.fb_dev_addr:
+ (unsigned long)bo->backing_data.shm.pages,
+ bo->type);
+
+ ret = drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+ if (ret != 0) {
+ DRM_ERROR("DRM failed to create GEM handle\n");
+ goto obj_release;
+ }
+
+ /* drop reference from allocate - handle holds it now */
+ drm_gem_object_unreference_unlocked(&bo->gem_object);
+
+ return 0;
+
+obj_release:
+ drm_gem_object_release(&bo->gem_object);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+free_dma:
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0))
+ if (bo->type & PL111_BOT_DMA) {
+ DEFINE_DMA_ATTRS(attrs);
+
+ dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+ dma_free_attrs(dev->dev, size,
+ bo->backing_data.dma.fb_cpu_addr,
+ bo->backing_data.dma.fb_dev_addr,
+ &attrs);
+ }
+#else
+ if (bo->type & PL111_BOT_DMA)
+ dma_free_writecombine(dev->dev, size,
+ bo->backing_data.dma.fb_cpu_addr,
+ bo->backing_data.dma.fb_dev_addr);
+#endif
+free_bo:
+ kfree(bo);
+finish:
+ return ret;
+}
+
+int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_pl111_gem_create *args = data;
+ uint32_t bytes_pp;
+
+ /* Round bpp up, to allow for case where bpp<8 */
+ bytes_pp = args->bpp >> 3;
+ if (args->bpp & ((1 << 3) - 1))
+ bytes_pp++;
+
+ if (args->flags & ~PL111_BOT_MASK) {
+ DRM_ERROR("wrong flags: 0x%x\n", args->flags);
+ return -EINVAL;
+ }
+
+ args->pitch = ALIGN(args->width * bytes_pp, 64);
+ args->size = PAGE_ALIGN(args->pitch * args->height);
+
+ DRM_DEBUG_KMS("gem_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+ args->width, args->height, args->pitch, args->bpp,
+ bytes_pp, args->size, args->flags);
+
+ return pl111_gem_object_create(dev, args->size, args->flags, file_priv,
+ &args->handle);
+}
+
+int pl111_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+ uint32_t bytes_pp;
+
+ /* Round bpp up, to allow for case where bpp<8 */
+ bytes_pp = args->bpp >> 3;
+ if (args->bpp & ((1 << 3) - 1))
+ bytes_pp++;
+
+ if (args->flags) {
+ DRM_ERROR("flags must be zero: 0x%x\n", args->flags);
+ return -EINVAL;
+ }
+
+ args->pitch = ALIGN(args->width * bytes_pp, 64);
+ args->size = PAGE_ALIGN(args->pitch * args->height);
+
+ DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+ args->width, args->height, args->pitch, args->bpp,
+ bytes_pp, args->size, args->flags);
+
+ return pl111_gem_object_create(dev, args->size,
+ PL111_BOT_DMA | PL111_BOT_UNCACHED,
+ file_priv, &args->handle);
+}
+
+int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
+ uint32_t handle)
+{
+ DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+ file_priv, handle);
+ return drm_gem_handle_delete(file_priv, handle);
+}
+
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+ struct drm_device *dev, uint32_t handle,
+ uint64_t *offset)
+{
+ struct drm_gem_object *obj;
+ int ret = 0;
+ DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+ file_priv, handle);
+
+ /* GEM does all our handle to object mapping */
+ obj = drm_gem_object_lookup(dev, file_priv, handle);
+ if (obj == NULL) {
+ ret = -ENOENT;
+ goto fail;
+ }
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+ if (obj->map_list.map == NULL) {
+ ret = drm_gem_create_mmap_offset(obj);
+ if (ret != 0) {
+ drm_gem_object_unreference_unlocked(obj);
+ goto fail;
+ }
+ }
+#else
+ ret = drm_gem_create_mmap_offset(obj);
+ if (ret != 0) {
+ drm_gem_object_unreference_unlocked(obj);
+ goto fail;
+ }
+#endif
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0))
+ *offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
+#else
+ *offset = drm_vma_node_offset_addr(&obj->vma_node);
+ DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+#endif
+
+ drm_gem_object_unreference_unlocked(obj);
+fail:
+ return ret;
+}
+
+/* sync the buffer for DMA access */
+void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo)
+{
+ struct drm_device *dev = bo->gem_object.dev;
+
+ if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED)) {
+ int i, npages = bo->gem_object.size >> PAGE_SHIFT;
+ struct page **pages = bo->backing_data.shm.pages;
+ bool dirty = false;
+
+ for (i = 0; i < npages; i++) {
+ if (!bo->backing_data.shm.dma_addrs[i]) {
+ DRM_DEBUG("%s: dma map page=%d bo=%p\n", __func__, i, bo);
+ bo->backing_data.shm.dma_addrs[i] =
+ dma_map_page(dev->dev, pages[i], 0,
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dirty = true;
+ }
+ }
+
+ if (dirty) {
+ DRM_DEBUG("%s: zap ptes (and flush cache) bo=%p\n", __func__, bo);
+ /*
+ * TODO MIDEGL-1813
+ *
+ * Use flush_cache_page() and outer_flush_range() to
+ * flush only the user space mappings of the dirty pages
+ */
+ flush_cache_all();
+ outer_flush_all();
+ unmap_mapping_range(bo->gem_object.filp->f_mapping, 0,
+ bo->gem_object.size, 1);
+ }
+ }
+}
+
+void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff)
+{
+ struct drm_device *dev = bo->gem_object.dev;
+
+ /*
+ * TODO MIDEGL-1808
+ *
+ * The following check was meant to detect if the CPU is trying to access
+ * a buffer that is currently mapped for DMA accesses, which is illegal
+ * as described by the DMA-API.
+ *
+ * However, some of our tests are trying to do that, which triggers the message
+ * below and avoids dma-unmapping the pages not to annoy the DMA device but that
+ * leads to the test failing because of caches not being properly flushed.
+ */
+
+ /*
+ if (bo->sgt) {
+ DRM_ERROR("%s: the CPU is trying to access a dma-mapped buffer\n", __func__);
+ return;
+ }
+ */
+
+ if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED) &&
+ bo->backing_data.shm.dma_addrs[pgoff]) {
+ DRM_DEBUG("%s: unmap bo=%p (s=%d), paddr=%08x\n",
+ __func__, bo, bo->gem_object.size,
+ bo->backing_data.shm.dma_addrs[pgoff]);
+ dma_unmap_page(dev->dev, bo->backing_data.shm.dma_addrs[pgoff],
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ bo->backing_data.shm.dma_addrs[pgoff] = 0;
+ }
+}
+
+/* Based on omapdrm driver */
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+ struct vm_area_struct *vma, size_t size)
+{
+ DRM_DEBUG("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p type=%08x\n",
+ __func__, obj, bo, bo->type);
+
+ vma->vm_flags &= ~VM_PFNMAP;
+ vma->vm_flags |= VM_MIXEDMAP;
+
+ if (bo->type & PL111_BOT_WC) {
+ vma->vm_page_prot =
+ pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+ } else if (bo->type & PL111_BOT_CACHED) {
+ /*
+ * Objects that do not have a filp (DMA backed) can't be
+ * mapped as cached now. Write-combine should be enough.
+ */
+ if (WARN_ON(!obj->filp))
+ return -EINVAL;
+
+ /*
+ * As explained in Documentation/dma-buf-sharing.txt
+ * we need this trick so that we can manually zap ptes
+ * in order to fake coherency.
+ */
+ fput(vma->vm_file);
+ get_file(obj->filp);
+ vma->vm_file = obj->filp;
+
+ vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+ } else { /* PL111_BOT_UNCACHED */
+ vma->vm_page_prot =
+ pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+ }
+ return 0;
+}
+
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
+{
+ int ret;
+ struct drm_file *priv = file_priv->private_data;
+ struct drm_device *dev = priv->minor->dev;
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+ struct drm_gem_mm *mm = dev->mm_private;
+ struct drm_hash_item *hash;
+ struct drm_local_map *map = NULL;
+#else
+ struct drm_vma_offset_node *node;
+#endif
+ struct drm_gem_object *obj;
+ struct pl111_gem_bo *bo;
+
+ DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0))
+ drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
+ map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+ obj = map->handle;
+#else
+ node = drm_vma_offset_exact_lookup(dev->vma_offset_manager,
+ vma->vm_pgoff,
+ vma_pages(vma));
+ obj = container_of(node, struct drm_gem_object, vma_node);
+#endif
+ bo = PL111_BO_FROM_GEM(obj);
+
+ DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p bo->type 0x%08x\n", __func__, bo, bo->type);
+
+ /* for an imported buffer we let the exporter handle the mmap */
+ if (obj->import_attach)
+ return dma_buf_mmap(obj->import_attach->dmabuf, vma, 0);
+
+ ret = drm_gem_mmap(file_priv, vma);
+ if (ret < 0) {
+ DRM_ERROR("failed to mmap\n");
+ return ret;
+ }
+
+ /* Our page fault handler uses the page offset calculated from the vma,
+ * so we need to remove the gem cookie offset specified in the call.
+ */
+ vma->vm_pgoff = 0;
+
+ return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/drivers/gpu/drm/pl111/pl111_drm_pl111.c
new file mode 100644
index 00000000000..1d613d0592a
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_pl111.c
@@ -0,0 +1,417 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_pl111.c
+ * PL111 specific functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+/* This can't be called from IRQ context, due to clk_get() and board->enable */
+static int clcd_enable(struct drm_framebuffer *fb)
+{
+ __u32 cntl;
+ struct clcd_board *board;
+
+ pr_info("DRM %s\n", __func__);
+
+ clk_prepare_enable(priv.clk);
+
+ /* Enable and Power Up */
+ cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+ DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
+ if (fb->bits_per_pixel == 16)
+ cntl |= CNTL_LCDBPP16_565;
+ else if (fb->bits_per_pixel == 32 && fb->depth == 24)
+ cntl |= CNTL_LCDBPP24;
+ else
+ BUG_ON(1);
+
+ cntl |= CNTL_BGR;
+
+ writel(cntl, priv.regs + CLCD_PL111_CNTL);
+
+ if (priv.amba_dev->dev.platform_data) {
+ board = priv.amba_dev->dev.platform_data;
+
+ if (board->enable)
+ board->enable(NULL);
+ }
+
+ /* Enable Interrupts */
+ writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
+
+ return 0;
+}
+
+int clcd_disable(struct drm_crtc *crtc)
+{
+ struct clcd_board *board;
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ unsigned long flags;
+#endif
+
+ pr_info("DRM %s\n", __func__);
+
+ /* Disable Interrupts */
+ writel(0x00000000, priv.regs + CLCD_PL111_IENB);
+
+ if (priv.amba_dev->dev.platform_data) {
+ board = priv.amba_dev->dev.platform_data;
+
+ if (board->disable)
+ board->disable(NULL);
+ }
+
+ /* Disable and Power Down */
+ writel(0, priv.regs + CLCD_PL111_CNTL);
+
+ /* Disable clock */
+ clk_disable_unprepare(priv.clk);
+
+ pl111_crtc->last_bpp = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+ spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags);
+ /* Release the previous buffers */
+ if (pl111_crtc->old_kds_res_set != NULL)
+ kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+
+ pl111_crtc->old_kds_res_set = NULL;
+ spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags);
+#endif
+ return 0;
+}
+
+/*
+ * To avoid a possible race where "pl111_crtc->current_update_res" has
+ * been updated (non NULL) but the corresponding scanout buffer has not been
+ * written to the base registers we must always call this function holding
+ * the "base_update_lock" spinlock with IRQs disabled (spin_lock_irqsave()).
+ */
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
+{
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+ struct drm_framebuffer *fb;
+ struct pl111_gem_bo *bo;
+ size_t min_size;
+ fb = flip_res->fb;
+ bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+
+
+ min_size = (fb->height - 1) * fb->pitches[0]
+ + fb->width * (fb->bits_per_pixel >> 3);
+
+ BUG_ON(bo->gem_object.size < min_size);
+
+ /* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+ BUG_ON(bo->type != PL111_BOT_DMA);
+
+ /*
+ * Note the buffer for releasing after IRQ, and don't allow any more
+ * updates until then.
+ *
+ * This clcd controller latches the new address on next vsync. Address
+ * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
+ * wait for that before releasing the previous buffer's kds
+ * resources. Otherwise, we'll allow writers to write to the old buffer
+ * whilst it is still being displayed
+ */
+ pl111_crtc->current_update_res = flip_res;
+
+ DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
+ fb, bo, bo->backing_data.dma.fb_dev_addr);
+
+ if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
+ DRM_ERROR("Could not get vblank reference for crtc %d\n",
+ pl111_crtc->crtc_index);
+
+ /* Set the scanout buffer */
+ writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
+ writel(bo->backing_data.dma.fb_dev_addr +
+ ((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
+}
+
+void
+show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
+ struct drm_framebuffer *fb)
+{
+ unsigned long irq_flags;
+ struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+ spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+ if (list_empty(&pl111_crtc->update_queue) &&
+ !pl111_crtc->current_update_res) {
+ do_flip_to_res(flip_res);
+ } else {
+ /*
+ * Enqueue the update to occur on a future IRQ
+ * This only happens on triple-or-greater buffering
+ */
+ DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
+ fb);
+ list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
+ }
+ spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+ if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
+ pl111_crtc->last_bpp != fb->bits_per_pixel ||
+ !drm_mode_equal(pl111_crtc->new_mode,
+ pl111_crtc->current_mode))) {
+ struct clcd_regs timing;
+
+ pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
+
+ DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
+ timing.tim0, timing.tim1, timing.tim2,
+ timing.tim3, timing.pixclock);
+
+ /* This is the actual mode setting part */
+ clk_set_rate(priv.clk, timing.pixclock);
+
+ writel(timing.tim0, priv.regs + CLCD_TIM0);
+ writel(timing.tim1, priv.regs + CLCD_TIM1);
+ writel(timing.tim2, priv.regs + CLCD_TIM2);
+ writel(timing.tim3, priv.regs + CLCD_TIM3);
+
+ clcd_enable(fb);
+ pl111_crtc->last_bpp = fb->bits_per_pixel;
+ }
+
+ if (!flip_res->page_flip) {
+ drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
+ pl111_crtc->current_mode = pl111_crtc->new_mode;
+ pl111_crtc->new_mode = NULL;
+ }
+
+ BUG_ON(!pl111_crtc->current_mode);
+
+ /*
+ * If IRQs weren't enabled before, they are now. This will eventually
+ * cause flip_res to be released via pl111_common_irq, which updates
+ * every time the Base Address is latched (i.e. every frame, regardless
+ * of whether we update the base address or not)
+ */
+}
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+ u32 irq_stat;
+ struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
+
+ irq_stat = readl(priv.regs + CLCD_PL111_MIS);
+
+ if (!irq_stat)
+ return IRQ_NONE;
+
+ if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE)
+ pl111_common_irq(pl111_crtc);
+
+ /* Clear the interrupt once done */
+ writel(irq_stat, priv.regs + CLCD_PL111_ICR);
+
+ return IRQ_HANDLED;
+}
+
+int pl111_device_init(struct drm_device *dev)
+{
+ struct pl111_drm_dev_private *priv = dev->dev_private;
+ int ret;
+
+ if (priv == NULL) {
+ pr_err("%s no private data\n", __func__);
+ return -EINVAL;
+ }
+
+ if (priv->amba_dev == NULL) {
+ pr_err("%s no amba device found\n", __func__);
+ return -EINVAL;
+ }
+
+ /* set up MMIO for register access */
+ priv->mmio_start = priv->amba_dev->res.start;
+ priv->mmio_len = resource_size(&priv->amba_dev->res);
+
+ DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
+ priv->mmio_len);
+
+ priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
+ if (priv->regs == NULL) {
+ pr_err("%s failed mmio\n", __func__);
+ return -EINVAL;
+ }
+
+ /* turn off interrupts */
+ writel(0, priv->regs + CLCD_PL111_IENB);
+
+ ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
+ "pl111_irq_handler", NULL);
+ if (ret != 0) {
+ pr_err("%s failed %d\n", __func__, ret);
+ goto out_mmio;
+ }
+
+ goto finish;
+
+out_mmio:
+ iounmap(priv->regs);
+finish:
+ DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
+ return ret;
+}
+
+void pl111_device_fini(struct drm_device *dev)
+{
+ struct pl111_drm_dev_private *priv = dev->dev_private;
+ u32 cntl;
+
+ if (priv == NULL || priv->regs == NULL)
+ return;
+
+ free_irq(priv->amba_dev->irq[0], NULL);
+
+ cntl = readl(priv->regs + CLCD_PL111_CNTL);
+
+ cntl &= ~CNTL_LCDEN;
+ writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+ cntl &= ~CNTL_LCDPWR;
+ writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+ iounmap(priv->regs);
+}
+
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
+{
+ struct clcd_board *board = dev->dev.platform_data;
+ int ret;
+ pr_info("DRM %s\n", __func__);
+
+ if (!board)
+ dev_warn(&dev->dev, "board data not available\n");
+
+ ret = amba_request_regions(dev, NULL);
+ if (ret != 0) {
+ DRM_ERROR("CLCD: unable to reserve regs region\n");
+ goto out;
+ }
+
+ priv.amba_dev = dev;
+
+ priv.clk = clk_get(&priv.amba_dev->dev, NULL);
+ if (IS_ERR(priv.clk)) {
+ DRM_ERROR("CLCD: unable to get clk.\n");
+ ret = PTR_ERR(priv.clk);
+ goto clk_err;
+ }
+
+ return 0;
+
+clk_err:
+ amba_release_regions(dev);
+out:
+ return ret;
+}
+
+int pl111_amba_remove(struct amba_device *dev)
+{
+ DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+ clk_put(priv.clk);
+
+ amba_release_regions(dev);
+
+ priv.amba_dev = NULL;
+
+ return 0;
+}
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+ struct clcd_regs *timing)
+{
+ unsigned int ppl, hsw, hfp, hbp;
+ unsigned int lpp, vsw, vfp, vbp;
+ unsigned int cpl;
+
+ memset(timing, 0, sizeof(struct clcd_regs));
+
+ ppl = (mode->hdisplay / 16) - 1;
+ hsw = mode->hsync_end - mode->hsync_start - 1;
+ hfp = mode->hsync_start - mode->hdisplay - 1;
+ hbp = mode->htotal - mode->hsync_end - 1;
+
+ lpp = mode->vdisplay - 1;
+ vsw = mode->vsync_end - mode->vsync_start - 1;
+ vfp = mode->vsync_start - mode->vdisplay;
+ vbp = mode->vtotal - mode->vsync_end;
+
+ cpl = mode->hdisplay - 1;
+
+ timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
+ timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
+ timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
+ timing->tim3 = 0;
+
+ timing->pixclock = mode->clock * 1000;
+}
+
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+ struct drm_display_mode *mode)
+{
+ unsigned int ppl, hsw, hfp, hbp;
+ unsigned int lpp, vsw, vfp, vbp;
+
+ ppl = (timing->tim0 >> 2) & 0x3f;
+ hsw = (timing->tim0 >> 8) & 0xff;
+ hfp = (timing->tim0 >> 16) & 0xff;
+ hbp = (timing->tim0 >> 24) & 0xff;
+
+ lpp = timing->tim1 & 0x3ff;
+ vsw = (timing->tim1 >> 10) & 0x3f;
+ vfp = (timing->tim1 >> 16) & 0xff;
+ vbp = (timing->tim1 >> 24) & 0xff;
+
+ mode->hdisplay = (ppl + 1) * 16;
+ mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
+ mode->hsync_end = ((ppl + 1) * 16) + hfp + hsw + 2;
+ mode->htotal = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
+ mode->hskew = 0;
+
+ mode->vdisplay = lpp + 1;
+ mode->vsync_start = lpp + vfp + 1;
+ mode->vsync_end = lpp + vfp + vsw + 2;
+ mode->vtotal = lpp + vfp + vsw + vbp + 2;
+
+ mode->flags = 0;
+
+ mode->width_mm = 0;
+ mode->height_mm = 0;
+
+ mode->clock = timing->pixclock / 1000;
+ mode->hsync = timing->pixclock / mode->htotal;
+ mode->vrefresh = mode->hsync / mode->vtotal;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_platform.c b/drivers/gpu/drm/pl111/pl111_drm_platform.c
new file mode 100644
index 00000000000..9d5ec0cb175
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_platform.c
@@ -0,0 +1,151 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_platform.c
+ * Implementation of the Linux platform device entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+static int pl111_platform_drm_suspend(struct platform_device *dev,
+ pm_message_t state)
+{
+ pr_info("DRM %s\n", __func__);
+ return 0;
+}
+
+static int pl111_platform_drm_resume(struct platform_device *dev)
+{
+ pr_info("DRM %s\n", __func__);
+ return 0;
+}
+
+int pl111_platform_drm_probe(struct platform_device *dev)
+{
+ pr_info("DRM %s\n", __func__);
+ return pl111_drm_init(dev);
+}
+
+static int pl111_platform_drm_remove(struct platform_device *dev)
+{
+ pr_info("DRM %s\n", __func__);
+ pl111_drm_exit(dev);
+
+ return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+ {
+ .id = 0x00041110,
+ .mask = 0x000ffffe,
+ },
+ {0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+ .drv = {
+ .name = "clcd-pl11x",
+ },
+ .probe = pl111_amba_probe,
+ .remove = pl111_amba_remove,
+ .id_table = pl111_id_table,
+};
+
+static struct platform_driver platform_drm_driver = {
+ .probe = pl111_platform_drm_probe,
+ .remove = pl111_platform_drm_remove,
+ .suspend = pl111_platform_drm_suspend,
+ .resume = pl111_platform_drm_resume,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = DRIVER_NAME,
+ },
+};
+
+static const struct platform_device_info pl111_drm_pdevinfo = {
+ .name = DRIVER_NAME,
+ .id = -1,
+ .dma_mask = ~0UL
+};
+
+static struct platform_device *pl111_drm_device;
+
+static int __init pl111_platform_drm_init(void)
+{
+ int ret;
+
+ pr_info("DRM %s\n", __func__);
+
+ pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
+ if (pl111_drm_device == NULL) {
+ pr_err("DRM platform_device_register_full() failed\n");
+ return -ENOMEM;
+ }
+
+ ret = amba_driver_register(&pl111_amba_driver);
+ if (ret != 0) {
+ pr_err("DRM amba_driver_register() failed %d\n", ret);
+ goto err_amba_reg;
+ }
+
+ ret = platform_driver_register(&platform_drm_driver);
+ if (ret != 0) {
+ pr_err("DRM platform_driver_register() failed %d\n", ret);
+ goto err_pdrv_reg;
+ }
+
+ return 0;
+
+err_pdrv_reg:
+ amba_driver_unregister(&pl111_amba_driver);
+err_amba_reg:
+ platform_device_unregister(pl111_drm_device);
+
+ return ret;
+}
+
+static void __exit pl111_platform_drm_exit(void)
+{
+ pr_info("DRM %s\n", __func__);
+
+ platform_device_unregister(pl111_drm_device);
+ amba_driver_unregister(&pl111_amba_driver);
+ platform_driver_unregister(&platform_drm_driver);
+}
+
+#ifdef MODULE
+module_init(pl111_platform_drm_init);
+#else
+late_initcall(pl111_platform_drm_init);
+#endif
+module_exit(pl111_platform_drm_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE(DRIVER_LICENCE);
+MODULE_ALIAS(DRIVER_ALIAS);
diff --git a/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/drivers/gpu/drm/pl111/pl111_drm_suspend.c
new file mode 100644
index 00000000000..d033566a579
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_suspend.c
@@ -0,0 +1,43 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_suspend.c
+ * Implementation of the suspend/resume functions for PL111 DRM
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+ pr_info("DRM %s\n", __func__);
+ return 0;
+}
+
+int pl111_drm_resume(struct drm_device *dev)
+{
+ pr_info("DRM %s\n", __func__);
+ return 0;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_vma.c b/drivers/gpu/drm/pl111/pl111_drm_vma.c
new file mode 100644
index 00000000000..ff602efd6d6
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_vma.c
@@ -0,0 +1,308 @@
+/*
+ *
+ * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+
+
+/**
+ * pl111_drm_vma.c
+ * Implementation of the VM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ */
+static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+ bool dirty, bool accessed)
+{
+ int i, npages;
+ struct pl111_gem_bo *bo;
+ npages = obj->size >> PAGE_SHIFT;
+ bo = PL111_BO_FROM_GEM(obj);
+ for (i = 0; i < npages; i++) {
+ if (dirty)
+ set_page_dirty(pages[i]);
+ if (accessed)
+ mark_page_accessed(pages[i]);
+ /* Undo the reference we took when populating the table */
+ page_cache_release(pages[i]);
+ }
+ drm_free_large(pages);
+}
+
+void put_pages(struct drm_gem_object *obj, struct page **pages)
+{
+ int i, npages;
+ struct pl111_gem_bo *bo;
+ npages = obj->size >> PAGE_SHIFT;
+ bo = PL111_BO_FROM_GEM(obj);
+ _drm_gem_put_pages(obj, pages, true, true);
+ if (bo->backing_data.shm.dma_addrs) {
+ for (i = 0; i < npages; i++) {
+ /* Filter pages unmapped because of CPU accesses */
+ if (!bo->backing_data.shm.dma_addrs[i])
+ continue;
+ if (!dma_mapping_error(obj->dev->dev,
+ bo->backing_data.shm.dma_addrs[i])) {
+ dma_unmap_page(obj->dev->dev,
+ bo->backing_data.shm.dma_addrs[i],
+ PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ }
+ }
+ kfree(bo->backing_data.shm.dma_addrs);
+ bo->backing_data.shm.dma_addrs = NULL;
+ }
+}
+
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
+ gfp_t gfpmask)
+{
+ struct inode *inode;
+ struct address_space *mapping;
+ struct page *p, **pages;
+ int i, npages;
+
+ /* This is the shared memory object that backs the GEM resource */
+ inode = obj->filp->f_path.dentry->d_inode;
+ mapping = inode->i_mapping;
+
+ npages = obj->size >> PAGE_SHIFT;
+
+ pages = drm_malloc_ab(npages, sizeof(struct page *));
+ if (pages == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ gfpmask |= mapping_gfp_mask(mapping);
+
+ for (i = 0; i < npages; i++) {
+ p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+ if (IS_ERR(p))
+ goto fail;
+ pages[i] = p;
+
+ /*
+ * There is a hypothetical issue w/ drivers that require
+ * buffer memory in the low 4GB.. if the pages are un-
+ * pinned, and swapped out, they can end up swapped back
+ * in above 4GB. If pages are already in memory, then
+ * shmem_read_mapping_page_gfp will ignore the gfpmask,
+ * even if the already in-memory page disobeys the mask.
+ *
+ * It is only a theoretical issue today, because none of
+ * the devices with this limitation can be populated with
+ * enough memory to trigger the issue. But this BUG_ON()
+ * is here as a reminder in case the problem with
+ * shmem_read_mapping_page_gfp() isn't solved by the time
+ * it does become a real issue.
+ *
+ * See this thread: http://lkml.org/lkml/2011/7/11/238
+ */
+ BUG_ON((gfpmask & __GFP_DMA32) &&
+ (page_to_pfn(p) >= 0x00100000UL));
+ }
+
+ return pages;
+
+fail:
+ while (i--)
+ page_cache_release(pages[i]);
+
+ drm_free_large(pages);
+ return ERR_PTR(PTR_ERR(p));
+}
+
+struct page **get_pages(struct drm_gem_object *obj)
+{
+ struct pl111_gem_bo *bo;
+ bo = PL111_BO_FROM_GEM(obj);
+
+ if (bo->backing_data.shm.pages == NULL) {
+ struct page **p;
+ int npages = obj->size >> PAGE_SHIFT;
+ int i;
+
+ p = _drm_gem_get_pages(obj, GFP_KERNEL);
+ if (IS_ERR(p))
+ return ERR_PTR(-ENOMEM);
+
+ bo->backing_data.shm.pages = p;
+
+ if (bo->backing_data.shm.dma_addrs == NULL) {
+ bo->backing_data.shm.dma_addrs =
+ kzalloc(npages * sizeof(dma_addr_t),
+ GFP_KERNEL);
+ if (bo->backing_data.shm.dma_addrs == NULL)
+ goto error_out;
+ }
+
+ if (!(bo->type & PL111_BOT_CACHED)) {
+ for (i = 0; i < npages; ++i) {
+ bo->backing_data.shm.dma_addrs[i] =
+ dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(obj->dev->dev,
+ bo->backing_data.shm.dma_addrs[i]))
+ goto error_out;
+ }
+ }
+ }
+
+ return bo->backing_data.shm.pages;
+
+error_out:
+ put_pages(obj, bo->backing_data.shm.pages);
+ bo->backing_data.shm.pages = NULL;
+ return ERR_PTR(-ENOMEM);
+}
+
+/* END drivers/staging/omapdrm/omap_gem_helpers.c */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page **pages;
+ unsigned long pfn;
+ struct drm_gem_object *obj = vma->vm_private_data;
+ struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+ struct drm_device *dev = obj->dev;
+ int ret;
+
+ mutex_lock(&dev->struct_mutex);
+
+ /*
+ * Our mmap calls setup a valid vma->vm_pgoff
+ * so we can use vmf->pgoff
+ */
+
+ if (bo->type & PL111_BOT_DMA) {
+ pfn = (bo->backing_data.dma.fb_dev_addr >> PAGE_SHIFT) +
+ vmf->pgoff;
+ } else { /* PL111_BOT_SHM */
+ pages = get_pages(obj);
+ if (IS_ERR(pages)) {
+ dev_err(obj->dev->dev,
+ "could not get pages: %ld\n", PTR_ERR(pages));
+ ret = PTR_ERR(pages);
+ goto error;
+ }
+ pfn = page_to_pfn(pages[vmf->pgoff]);
+ pl111_gem_sync_to_cpu(bo, vmf->pgoff);
+ }
+
+ DRM_DEBUG("bo=%p physaddr=0x%.8x for offset 0x%x\n",
+ bo, PFN_PHYS(pfn), PFN_PHYS(vmf->pgoff));
+
+ ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+error:
+ mutex_unlock(&dev->struct_mutex);
+
+ switch (ret) {
+ case 0:
+ case -ERESTARTSYS:
+ case -EINTR:
+ case -EBUSY:
+ return VM_FAULT_NOPAGE;
+ case -ENOMEM:
+ return VM_FAULT_OOM;
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+}
+
+/*
+ * The core drm_vm_ functions in kernel 3.4 are not ready
+ * to handle dma_buf cases where vma->vm_file->private_data
+ * cannot be accessed to get the device.
+ *
+ * We use these functions from 3.5 instead where the device
+ * pointer is passed explicitly.
+ *
+ * However they aren't exported from the kernel until 3.10
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0))
+void pl111_drm_vm_open_locked(struct drm_device *dev,
+ struct vm_area_struct *vma)
+{
+ struct drm_vma_entry *vma_entry;
+
+ DRM_DEBUG("0x%08lx,0x%08lx\n",
+ vma->vm_start, vma->vm_end - vma->vm_start);
+ atomic_inc(&dev->vma_count);
+
+ vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL);
+ if (vma_entry) {
+ vma_entry->vma = vma;
+ vma_entry->pid = current->pid;
+ list_add(&vma_entry->head, &dev->vmalist);
+ }
+}
+
+void pl111_drm_vm_close_locked(struct drm_device *dev,
+ struct vm_area_struct *vma)
+{
+ struct drm_vma_entry *pt, *temp;
+
+ DRM_DEBUG("0x%08lx,0x%08lx\n",
+ vma->vm_start, vma->vm_end - vma->vm_start);
+ atomic_dec(&dev->vma_count);
+
+ list_for_each_entry_safe(pt, temp, &dev->vmalist, head) {
+ if (pt->vma == vma) {
+ list_del(&pt->head);
+ kfree(pt);
+ break;
+ }
+ }
+}
+
+void pl111_gem_vm_open(struct vm_area_struct *vma)
+{
+ struct drm_gem_object *obj = vma->vm_private_data;
+
+ drm_gem_object_reference(obj);
+
+ mutex_lock(&obj->dev->struct_mutex);
+ pl111_drm_vm_open_locked(obj->dev, vma);
+ mutex_unlock(&obj->dev->struct_mutex);
+}
+
+void pl111_gem_vm_close(struct vm_area_struct *vma)
+{
+ struct drm_gem_object *obj = vma->vm_private_data;
+ struct drm_device *dev = obj->dev;
+
+ mutex_lock(&dev->struct_mutex);
+ pl111_drm_vm_close_locked(obj->dev, vma);
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+}
+#endif