diff options
Diffstat (limited to 'drivers/gpu')
205 files changed, 58813 insertions, 0 deletions
diff --git a/drivers/gpu/arm/Kbuild b/drivers/gpu/arm/Kbuild new file mode 100644 index 00000000000..19c7e9a2659 --- /dev/null +++ b/drivers/gpu/arm/Kbuild @@ -0,0 +1,17 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +obj-$(CONFIG_MALI_MIDGARD) += midgard/ diff --git a/drivers/gpu/arm/Kconfig b/drivers/gpu/arm/Kconfig new file mode 100644 index 00000000000..1f30eb541d6 --- /dev/null +++ b/drivers/gpu/arm/Kconfig @@ -0,0 +1,19 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +menu "ARM GPU Configuration" +source "drivers/gpu/arm/midgard/Kconfig" +endmenu diff --git a/drivers/gpu/arm/midgard/Kbuild b/drivers/gpu/arm/midgard/Kbuild new file mode 100644 index 00000000000..f4384983140 --- /dev/null +++ b/drivers/gpu/arm/midgard/Kbuild @@ -0,0 +1,241 @@ +# +# (C) COPYRIGHT 2012,2014 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +# Driver version string which is returned to userspace via an ioctl +MALI_RELEASE_NAME ?= "r6p0-02rel0" + +# Paths required for build +KBASE_PATH = $(src) +KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy +UMP_PATH = $(src)/../../../base + +ifeq ($(CONFIG_MALI_ERROR_INJECTION),y) +MALI_ERROR_INJECT_ON = 1 +endif + +# Set up defaults if not defined by build system +MALI_CUSTOMER_RELEASE ?= 1 +MALI_UNIT_TEST ?= 0 +MALI_KERNEL_TEST_API ?= 0 +MALI_ERROR_INJECT_ON ?= 0 +MALI_MOCK_TEST ?= 0 +MALI_COVERAGE ?= 0 +MALI_INSTRUMENTATION_LEVEL ?= 0 +# This workaround is for what seems to be a compiler bug we observed in +# GCC 4.7 on AOSP 4.3. The bug caused an intermittent failure compiling +# the "_Pragma" syntax, where an error message is returned: +# +# "internal compiler error: unspellable token PRAGMA" +# +# This regression has thus far only been seen on the GCC 4.7 compiler bundled +# with AOSP 4.3.0. So this makefile, intended for in-tree kernel builds +# which are not known to be used with AOSP, is hardcoded to disable the +# workaround, i.e. set the define to 0. +MALI_GCC_WORKAROUND_MIDCOM_4598 ?= 0 + +# Set up our defines, which will be passed to gcc +DEFINES = \ + -DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \ + -DMALI_KERNEL_TEST_API=$(MALI_KERNEL_TEST_API) \ + -DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \ + -DMALI_ERROR_INJECT_ON=$(MALI_ERROR_INJECT_ON) \ + -DMALI_MOCK_TEST=$(MALI_MOCK_TEST) \ + -DMALI_COVERAGE=$(MALI_COVERAGE) \ + -DMALI_INSTRUMENTATION_LEVEL=$(MALI_INSTRUMENTATION_LEVEL) \ + -DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \ + -DMALI_GCC_WORKAROUND_MIDCOM_4598=$(MALI_GCC_WORKAROUND_MIDCOM_4598) + +ifeq ($(KBUILD_EXTMOD),) +# in-tree +DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=../../$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME) +else +# out-of-tree +DEFINES +=-DMALI_KBASE_THIRDPARTY_PATH=$(src)/platform/$(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME) +endif + +DEFINES += -I$(srctree)/drivers/staging/android + +# Use our defines when compiling +ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux +subdir-ccflags-y += $(DEFINES) -I$(KBASE_PATH) -I$(KBASE_PLATFORM_PATH) -I$(OSK_PATH) -I$(UMP_PATH) -I$(srctree)/include/linux + +SRC := \ + mali_kbase_device.c \ + mali_kbase_cache_policy.c \ + mali_kbase_mem.c \ + mali_kbase_mmu.c \ + mali_kbase_jd.c \ + mali_kbase_jd_debugfs.c \ + mali_kbase_jm.c \ + mali_kbase_cpuprops.c \ + mali_kbase_gpuprops.c \ + mali_kbase_js.c \ + mali_kbase_js_ctx_attr.c \ + mali_kbase_event.c \ + mali_kbase_context.c \ + mali_kbase_pm.c \ + mali_kbase_config.c \ + mali_kbase_security.c \ + mali_kbase_instr.c \ + mali_kbase_vinstr.c \ + mali_kbase_softjobs.c \ + mali_kbase_10969_workaround.c \ + mali_kbase_hw.c \ + mali_kbase_utility.c \ + mali_kbase_debug.c \ + mali_kbase_trace_timeline.c \ + mali_kbase_gpu_memory_debugfs.c \ + mali_kbase_mem_linux.c \ + mali_kbase_core_linux.c \ + mali_kbase_sync.c \ + mali_kbase_sync_user.c \ + mali_kbase_replay.c \ + mali_kbase_mem_profile_debugfs.c \ + mali_kbase_mmu_mode_lpae.c \ + mali_kbase_disjoint_events.c \ + mali_kbase_gator_api.c \ + mali_kbase_debug_mem_view.c \ + mali_kbase_smc.c + +ifeq ($(CONFIG_MALI_MIPE_ENABLED),y) + SRC += mali_kbase_tlstream.c + ifeq ($(MALI_UNIT_TEST),1) + SRC += mali_kbase_tlstream_test.c + endif +endif + +# Job Scheduler Policy: Completely Fair Scheduler +SRC += mali_kbase_js_policy_cfs.c + +ifeq ($(CONFIG_MACH_MANTA),y) + SRC += mali_kbase_mem_alloc_carveout.c +else + SRC += mali_kbase_mem_alloc.c +endif + +ccflags-y += -I$(KBASE_PATH) + +# in-tree/out-of-tree logic needs to be slightly different to determine if a file is present +ifeq ($(KBUILD_EXTMOD),) +# in-tree +MALI_METRICS_PATH = $(srctree)/drivers/gpu/arm/midgard +else +# out-of-tree +MALI_METRICS_PATH = $(KBUILD_EXTMOD) +endif + +ifeq ($(CONFIG_MALI_PLATFORM_FAKE),y) + SRC += mali_kbase_platform_fake.c + + ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS),y) + SRC += platform/vexpress/mali_kbase_config_vexpress.c \ + platform/vexpress/mali_kbase_cpu_vexpress.c + ccflags-y += -I$(src)/platform/vexpress + endif + + ifeq ($(CONFIG_MALI_PLATFORM_RTSM_VE),y) + SRC += platform/rtsm_ve/mali_kbase_config_vexpress.c + ccflags-y += -I$(src)/platform/rtsm_ve + endif + + ifeq ($(CONFIG_MALI_PLATFORM_JUNO),y) + SRC += platform/juno/mali_kbase_config_vexpress.c + ccflags-y += -I$(src)/platform/juno + endif + + ifeq ($(CONFIG_MALI_PLATFORM_JUNO_SOC),y) + SRC += platform/juno_soc/mali_kbase_config_juno_soc.c + ccflags-y += -I$(src)/platform/juno_soc + endif + + ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_1XV7_A57),y) + SRC += platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c + ccflags-y += -I$(src)/platform/vexpress_1xv7_a57 + endif + + ifeq ($(CONFIG_MALI_PLATFORM_VEXPRESS_6XVIRTEX7_10MHZ),y) + SRC += platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c \ + platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c + ccflags-y += -I$(src)/platform/vexpress_6xvirtex7_10mhz + endif + + ifeq ($(CONFIG_MALI_PLATFORM_A7_KIPLING),y) + SRC += platform/a7_kipling/mali_kbase_config_a7_kipling.c \ + platform/a7_kipling/mali_kbase_cpu_a7_kipling.c + ccflags-y += -I$(src)/platform/a7_kipling + endif + + ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y) + # remove begin and end quotes from the Kconfig string type + platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)) + MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name) + ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR) + ifeq ($(CONFIG_MALI_MIDGARD),m) + include $(src)/platform/$(platform_name)/Kbuild + else ifeq ($(CONFIG_MALI_MIDGARD),y) + obj-$(CONFIG_MALI_MIDGARD) += platform/ + endif + endif +endif # CONFIG_MALI_PLATFORM_FAKE=y + +ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y) +# remove begin and end quotes from the Kconfig string type +platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)) +MALI_PLATFORM_THIRDPARTY_DIR := platform/$(platform_name) +ccflags-y += -I$(src)/$(MALI_PLATFORM_THIRDPARTY_DIR) +ifeq ($(CONFIG_MALI_MIDGARD),m) +include $(src)/platform/$(platform_name)/Kbuild +else ifeq ($(CONFIG_MALI_MIDGARD),y) +obj-$(CONFIG_MALI_MIDGARD) += platform/ +endif +endif + +ifeq ($(CONFIG_MALI_PLATFORM_DEVICETREE),y) + SRC += platform/devicetree/mali_kbase_runtime_pm.c + SRC += platform/devicetree/mali_kbase_config_devicetree.c + ccflags-y += -I$(src)/platform/devicetree +endif + +# Tell the Linux build system from which .o file to create the kernel module +obj-$(CONFIG_MALI_MIDGARD) += mali_kbase.o + +# Tell the Linux build system to enable building of our .c files +mali_kbase-y := $(SRC:.c=.o) + +ifneq ($(wildcard $(src)/internal/Kbuild),) +ifeq ($(MALI_CUSTOMER_RELEASE),0) +# This include may set MALI_BACKEND_PATH and CONFIG_MALI_BACKEND_REAL +include $(src)/internal/Kbuild +mali_kbase-y += $(INTERNAL:.c=.o) +endif +endif + +MALI_BACKEND_PATH ?= backend +CONFIG_MALI_BACKEND ?= gpu +CONFIG_MALI_BACKEND_REAL ?= $(CONFIG_MALI_BACKEND) + +ifeq ($(MALI_MOCK_TEST),1) +ifeq ($(CONFIG_MALI_BACKEND_REAL),gpu) +# Test functionality +mali_kbase-y += tests/internal/src/mock/mali_kbase_pm_driver_mock.o +endif +endif + +include $(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL)/Kbuild +mali_kbase-y += $(BACKEND:.c=.o) + +ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL) +subdir-ccflags-y += -I$(src)/$(MALI_BACKEND_PATH)/$(CONFIG_MALI_BACKEND_REAL) diff --git a/drivers/gpu/arm/midgard/Kconfig b/drivers/gpu/arm/midgard/Kconfig new file mode 100644 index 00000000000..15430438004 --- /dev/null +++ b/drivers/gpu/arm/midgard/Kconfig @@ -0,0 +1,209 @@ +# +# (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +menuconfig MALI_MIDGARD + tristate "Mali Midgard series support" + default n + help + Enable this option to build support for a ARM Mali Midgard GPU. + + To compile this driver as a module, choose M here: + this will generate a single module, called mali_kbase. + +choice + prompt "Streamline support" + depends on MALI_MIDGARD + default MALI_TIMELINE_DISABLED + help + Select streamline support configuration. + +config MALI_TIMELINE_DISABLED + bool "Streamline support disabled" + help + Disable support for ARM Streamline Performance Analyzer. + + Timeline support will not be included in + kernel code. + Debug stream will not be generated. + +config MALI_GATOR_SUPPORT + bool "Streamline support via Gator" + help + Adds diagnostic support for use with the ARM Streamline Performance Analyzer. + You will need the Gator device driver already loaded before loading this driver when enabling + Streamline debug support. + +config MALI_MIPE_ENABLED + bool "Streamline support via MIPE" + help + Adds diagnostic support for use with the ARM Streamline Performance Analyzer. + + Stream will be transmitted directly to Mali GPU library. + Compatible version of the library is required to read debug stream generated by kernel. + +endchoice + +config MALI_MIDGARD_DVFS + bool "Enable DVFS" + depends on MALI_MIDGARD + default n + help + Choose this option to enable DVFS in the Mali Midgard DDK. + +config MALI_MIDGARD_RT_PM + bool "Enable Runtime power management" + depends on MALI_MIDGARD + depends on PM_RUNTIME + default n + help + Choose this option to enable runtime power management in the Mali Midgard DDK. + +config MALI_MIDGARD_ENABLE_TRACE + bool "Enable kbase tracing" + depends on MALI_MIDGARD + default n + help + Enables tracing in kbase. Trace log available through + the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled + +config MALI_MIDGARD_DEBUG_SYS + bool "Enable sysfs for the Mali Midgard DDK " + depends on MALI_MIDGARD && SYSFS + default n + help + Enables sysfs for the Mali Midgard DDK. Set/Monitor the Mali Midgard DDK + +config MALI_DEVFREQ + bool "devfreq support for Mali" + depends on MALI_MIDGARD && PM_DEVFREQ + help + Support devfreq for Mali. + + Using the devfreq framework and, by default, the simpleondemand + governor, the frequency of Mali will be dynamically selected from the + available OPPs. + + +# MALI_EXPERT configuration options + +menuconfig MALI_EXPERT + depends on MALI_MIDGARD + bool "Enable Expert Settings" + default n + help + Enabling this option and modifying the default settings may produce a driver with performance or + other limitations. + +config MALI_DEBUG_SHADER_SPLIT_FS + bool "Allow mapping of shader cores via sysfs" + depends on MALI_MIDGARD && MALI_MIDGARD_DEBUG_SYS && MALI_EXPERT + default n + help + Select this option to provide a sysfs entry for runtime configuration of shader + core affinity masks. + +config MALI_PLATFORM_FAKE + bool "Enable fake platform device support" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + When you start to work with the Mali Midgard series device driver the platform-specific code of + the Linux kernel for your platform may not be complete. In this situation the kernel device driver + supports creating the platform device outside of the Linux platform-specific code. + Enable this option if would like to use a platform device configuration from within the device driver. + +choice + prompt "Platform configuration" + depends on MALI_MIDGARD && MALI_EXPERT + default MALI_PLATFORM_VEXPRESS + help + Select the SOC platform that contains a Mali Midgard GPU + +config MALI_PLATFORM_VEXPRESS + depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4) + bool "Versatile Express" +config MALI_PLATFORM_VEXPRESS_VIRTEX7_40MHZ + depends on ARCH_VEXPRESS && (ARCH_VEXPRESS_CA9X4 || ARCH_VEXPRESS_CA15X4) + bool "Versatile Express w/Virtex7 @ 40Mhz" +config MALI_PLATFORM_GOLDFISH + depends on ARCH_GOLDFISH + bool "Android Goldfish virtual CPU" +config MALI_PLATFORM_PBX + depends on ARCH_REALVIEW && REALVIEW_EB_A9MP && MACH_REALVIEW_PBX + bool "Realview PBX-A9" +config MALI_PLATFORM_THIRDPARTY + bool "Third Party Platform" +endchoice + +config MALI_PLATFORM_THIRDPARTY_NAME + depends on MALI_MIDGARD && MALI_PLATFORM_THIRDPARTY && MALI_EXPERT + string "Third party platform name" + help + Enter the name of a third party platform that is supported. The third part configuration + file must be in midgard/config/tpip/mali_kbase_config_xxx.c where xxx is the name + specified here. + +config MALI_DEBUG + bool "Debug build" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Select this option for increased checking and reporting of errors. + +config MALI_NO_MALI + bool "No Mali" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + This can be used to test the driver in a simulated environment + whereby the hardware is not physically present. If the hardware is physically + present it will not be used. This can be used to test the majority of the + driver without needing actual hardware or for software benchmarking. + All calls to the simulated hardware will complete immediately as if the hardware + completed the task. + +config MALI_ERROR_INJECT + bool "Error injection" + depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI + default n + help + Enables insertion of errors to test module failure and recovery mechanisms. + +config MALI_TRACE_TIMELINE + bool "Timeline tracing" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Enables timeline tracing through the kernel tracepoint system. + +config MALI_SYSTEM_TRACE + bool "Enable system event tracing support" + depends on MALI_MIDGARD && MALI_EXPERT + default n + help + Choose this option to enable system trace events for each + kbase event. This is typically used for debugging but has + minimal overhead when not in use. Enable only if you know what + you are doing. + +config MALI_GPU_TRACEPOINTS + bool "Enable GPU tracepoints" + depends on MALI_MIDGARD && ANDROID + select GPU_TRACEPOINTS + help + Enables GPU tracepoints using Android trace event definitions. + +source "drivers/gpu/arm/midgard/platform/Kconfig" diff --git a/drivers/gpu/arm/midgard/Makefile b/drivers/gpu/arm/midgard/Makefile new file mode 100644 index 00000000000..ded089243e6 --- /dev/null +++ b/drivers/gpu/arm/midgard/Makefile @@ -0,0 +1,37 @@ +# +# (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +KDIR ?= /lib/modules/$(shell uname -r)/build + +UMP_PATH_RELATIVE = $(CURDIR)/../../../base/ump +KBASE_PATH_RELATIVE = $(CURDIR) +KDS_PATH_RELATIVE = $(CURDIR)/../../../.. +EXTRA_SYMBOLS = $(UMP_PATH_RELATIVE)/src/Module.symvers + +ifeq ($(MALI_UNIT_TEST), 1) + EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/tests/internal/src/kernel_assert_module/linux/Module.symvers + EXTRA_SYMBOLS += $(KBASE_PATH_RELATIVE)/../../../../tests/kutf/Module.symvers +endif + +# GPL driver supports KDS +EXTRA_SYMBOLS += $(KDS_PATH_RELATIVE)/drivers/base/kds/Module.symvers + +# we get the symbols from modules using KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions +all: + $(MAKE) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../../include -I$(CURDIR)/../../../../tests/include $(SCONS_CFLAGS)" $(SCONS_CONFIGS) KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules + +clean: + $(MAKE) -C $(KDIR) M=$(CURDIR) clean diff --git a/drivers/gpu/arm/midgard/Makefile.kbase b/drivers/gpu/arm/midgard/Makefile.kbase new file mode 100644 index 00000000000..2bef9c25eae --- /dev/null +++ b/drivers/gpu/arm/midgard/Makefile.kbase @@ -0,0 +1,17 @@ +# +# (C) COPYRIGHT 2010 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + +EXTRA_CFLAGS += -I$(ROOT) -I$(KBASE_PATH) -I$(OSK_PATH)/src/linux/include -I$(KBASE_PATH)/platform_$(PLATFORM) + diff --git a/drivers/gpu/arm/midgard/backend/gpu/Kbuild b/drivers/gpu/arm/midgard/backend/gpu/Kbuild new file mode 100644 index 00000000000..05a7a826c5d --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/Kbuild @@ -0,0 +1,64 @@ +# +# (C) COPYRIGHT 2014 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + +BACKEND += \ + backend/gpu/mali_kbase_cache_policy_backend.c \ + backend/gpu/mali_kbase_device_hw.c \ + backend/gpu/mali_kbase_gpu.c \ + backend/gpu/mali_kbase_gpuprops_backend.c \ + backend/gpu/mali_kbase_irq_linux.c \ + backend/gpu/mali_kbase_instr_backend.c \ + backend/gpu/mali_kbase_jm_as.c \ + backend/gpu/mali_kbase_jm_hw.c \ + backend/gpu/mali_kbase_jm_rb.c \ + backend/gpu/mali_kbase_js_affinity.c \ + backend/gpu/mali_kbase_js_backend.c \ + backend/gpu/mali_kbase_mmu_hw_direct.c \ + backend/gpu/mali_kbase_pm_backend.c \ + backend/gpu/mali_kbase_pm_driver.c \ + backend/gpu/mali_kbase_pm_metrics.c \ + backend/gpu/mali_kbase_pm_ca.c \ + backend/gpu/mali_kbase_pm_ca_fixed.c \ + backend/gpu/mali_kbase_pm_always_on.c \ + backend/gpu/mali_kbase_pm_coarse_demand.c \ + backend/gpu/mali_kbase_pm_demand.c \ + backend/gpu/mali_kbase_pm_policy.c \ + backend/gpu/mali_kbase_time.c + +ifeq ($(MALI_CUSTOMER_RELEASE),0) +BACKEND += \ + backend/gpu/mali_kbase_pm_ca_random.c \ + backend/gpu/mali_kbase_pm_demand_always_powered.c \ + backend/gpu/mali_kbase_pm_fast_start.c +endif + +ifeq ($(CONFIG_MALI_DEVFREQ),y) +BACKEND += backend/gpu/mali_kbase_devfreq.c +endif + +ifeq ($(CONFIG_MALI_NO_MALI),y) + # Dummy model + BACKEND += backend/gpu/mali_kbase_model_dummy.c + BACKEND += backend/gpu/mali_kbase_model_linux.c + # HW error simulation + BACKEND += backend/gpu/mali_kbase_model_error_generator.c +endif + +ifeq ($(wildcard $(MALI_METRICS_PATH)/backend/gpu/mali_kbase_pm_metrics_linux.c),) + BACKEND += backend/gpu/mali_kbase_pm_metrics_dummy.c +else + BACKEND += backend/gpu/mali_kbase_pm_metrics_linux.c +endif + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h new file mode 100644 index 00000000000..c8ae87eb84a --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_backend_config.h @@ -0,0 +1,29 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Backend specific configuration + */ + +#ifndef _KBASE_BACKEND_CONFIG_H_ +#define _KBASE_BACKEND_CONFIG_H_ + +/* Enable GPU reset API */ +#define KBASE_GPU_RESET_EN 1 + +#endif /* _KBASE_BACKEND_CONFIG_H_ */ + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c new file mode 100644 index 00000000000..92a14fa1bae --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.c @@ -0,0 +1,22 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include "backend/gpu/mali_kbase_cache_policy_backend.h" +#include <backend/gpu/mali_kbase_pm_internal.h> +#include <backend/gpu/mali_kbase_device_internal.h> + + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h new file mode 100644 index 00000000000..42069fc88a1 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_cache_policy_backend.h @@ -0,0 +1,26 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +#ifndef _KBASE_CACHE_POLICY_BACKEND_H_ +#define _KBASE_CACHE_POLICY_BACKEND_H_ + +#include "mali_kbase.h" +#include "mali_base_kernel.h" + + +#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c new file mode 100644 index 00000000000..3791457f605 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.c @@ -0,0 +1,284 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase.h> +#include <mali_kbase_config_defaults.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +#include <linux/clk.h> +#include <linux/devfreq.h> +#ifdef CONFIG_DEVFREQ_THERMAL +#include <linux/devfreq_cooling.h> +#endif + +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) +#include <linux/pm_opp.h> +#else /* Linux >= 3.13 */ +/* In 3.13 the OPP include header file, types, and functions were all + * renamed. Use the old filename for the include, and define the new names to + * the old, when an old kernel is detected. + */ +#include <linux/opp.h> +#define dev_pm_opp opp +#define dev_pm_opp_get_voltage opp_get_voltage +#define dev_pm_opp_get_opp_count opp_get_opp_count +#define dev_pm_opp_find_freq_ceil opp_find_freq_ceil +#endif /* Linux >= 3.13 */ + + +static int +kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + struct dev_pm_opp *opp; + unsigned long freq = 0; + unsigned long voltage; + int err; + + freq = *target_freq; + + rcu_read_lock(); + opp = devfreq_recommended_opp(dev, &freq, flags); + voltage = dev_pm_opp_get_voltage(opp); + rcu_read_unlock(); + if (IS_ERR_OR_NULL(opp)) { + dev_err(dev, "Failed to get opp (%ld)\n", PTR_ERR(opp)); + return PTR_ERR(opp); + } + + /* + * Only update if there is a change of frequency + */ + if (kbdev->current_freq == freq) { + *target_freq = freq; + return 0; + } + +#ifdef CONFIG_REGULATOR + if (kbdev->regulator && kbdev->current_voltage != voltage + && kbdev->current_freq < freq) { + err = regulator_set_voltage(kbdev->regulator, voltage, voltage); + if (err) { + dev_err(dev, "Failed to increase voltage (%d)\n", err); + return err; + } + } +#endif + + err = clk_set_rate(kbdev->clock, freq); + if (err) { + dev_err(dev, "Failed to set clock %lu (target %lu)\n", + freq, *target_freq); + return err; + } + +#ifdef CONFIG_REGULATOR + if (kbdev->regulator && kbdev->current_voltage != voltage + && kbdev->current_freq > freq) { + err = regulator_set_voltage(kbdev->regulator, voltage, voltage); + if (err) { + dev_err(dev, "Failed to decrease voltage (%d)\n", err); + return err; + } + } +#endif + + *target_freq = freq; + kbdev->current_voltage = voltage; + kbdev->current_freq = freq; + + kbase_pm_reset_dvfs_utilisation(kbdev); + + return err; +} + +static int +kbase_devfreq_cur_freq(struct device *dev, unsigned long *freq) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + *freq = kbdev->current_freq; + + return 0; +} + +static int +kbase_devfreq_status(struct device *dev, struct devfreq_dev_status *stat) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + stat->current_frequency = kbdev->current_freq; + + kbase_pm_get_dvfs_utilisation(kbdev, + &stat->total_time, &stat->busy_time); + + stat->private_data = NULL; + +#ifdef CONFIG_DEVFREQ_THERMAL + memcpy(&kbdev->devfreq_cooling->last_status, stat, sizeof(*stat)); +#endif + + return 0; +} + +static int kbase_devfreq_init_freq_table(struct kbase_device *kbdev, + struct devfreq_dev_profile *dp) +{ + int count; + int i = 0; + unsigned long freq = 0; + struct dev_pm_opp *opp; + + rcu_read_lock(); + count = dev_pm_opp_get_opp_count(kbdev->dev); + if (count < 0) { + rcu_read_unlock(); + return count; + } + rcu_read_unlock(); + + dp->freq_table = kmalloc_array(count, sizeof(dp->freq_table[0]), + GFP_KERNEL); + if (!dp->freq_table) + return -ENOMEM; + + rcu_read_lock(); + for (i = 0; i < count; i++, freq++) { + opp = dev_pm_opp_find_freq_ceil(kbdev->dev, &freq); + if (IS_ERR(opp)) + break; + + dp->freq_table[i] = freq; + } + rcu_read_unlock(); + + if (count != i) + dev_warn(kbdev->dev, "Unable to enumerate all OPPs (%d!=%d\n", + count, i); + + dp->max_state = i; + + return 0; +} + +static void kbase_devfreq_term_freq_table(struct kbase_device *kbdev) +{ + struct devfreq_dev_profile *dp = kbdev->devfreq->profile; + + kfree(dp->freq_table); +} + +static void kbase_devfreq_exit(struct device *dev) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + + kbase_devfreq_term_freq_table(kbdev); +} + +int kbase_devfreq_init(struct kbase_device *kbdev) +{ +#ifdef CONFIG_DEVFREQ_THERMAL + struct devfreq_cooling_ops *callbacks = POWER_MODEL_CALLBACKS; +#endif + struct devfreq_dev_profile *dp; + int err; + + dev_dbg(kbdev->dev, "Init Mali devfreq\n"); + + if (!kbdev->clock) + return -ENODEV; + + kbdev->current_freq = clk_get_rate(kbdev->clock); + + dp = &kbdev->devfreq_profile; + + dp->initial_freq = kbdev->current_freq; + dp->polling_ms = 100; + dp->target = kbase_devfreq_target; + dp->get_dev_status = kbase_devfreq_status; + dp->get_cur_freq = kbase_devfreq_cur_freq; + dp->exit = kbase_devfreq_exit; + + if (kbase_devfreq_init_freq_table(kbdev, dp)) + return -EFAULT; + + kbdev->devfreq = devfreq_add_device(kbdev->dev, dp, + "simple_ondemand", NULL); + if (IS_ERR(kbdev->devfreq)) { + kbase_devfreq_term_freq_table(kbdev); + return PTR_ERR(kbdev->devfreq); + } + + err = devfreq_register_opp_notifier(kbdev->dev, kbdev->devfreq); + if (err) { + dev_err(kbdev->dev, + "Failed to register OPP notifier (%d)\n", err); + goto opp_notifier_failed; + } + +#ifdef CONFIG_DEVFREQ_THERMAL + if (callbacks) { + + kbdev->devfreq_cooling = of_devfreq_cooling_register_power( + kbdev->dev->of_node, + kbdev->devfreq, + callbacks); + if (IS_ERR_OR_NULL(kbdev->devfreq_cooling)) { + err = PTR_ERR(kbdev->devfreq_cooling); + dev_err(kbdev->dev, + "Failed to register cooling device (%d)\n", + err); + goto cooling_failed; + } + } +#endif + + return 0; + +#ifdef CONFIG_DEVFREQ_THERMAL +cooling_failed: + devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); +#endif /* CONFIG_DEVFREQ_THERMAL */ +opp_notifier_failed: + err = devfreq_remove_device(kbdev->devfreq); + if (err) + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); + else + kbdev->devfreq = NULL; + + return err; +} + +void kbase_devfreq_term(struct kbase_device *kbdev) +{ + int err; + + dev_dbg(kbdev->dev, "Term Mali devfreq\n"); + +#ifdef CONFIG_DEVFREQ_THERMAL + devfreq_cooling_unregister(kbdev->devfreq_cooling); +#endif + + devfreq_unregister_opp_notifier(kbdev->dev, kbdev->devfreq); + + err = devfreq_remove_device(kbdev->devfreq); + if (err) + dev_err(kbdev->dev, "Failed to terminate devfreq (%d)\n", err); + else + kbdev->devfreq = NULL; +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h new file mode 100644 index 00000000000..c0bf8b15b3b --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_devfreq.h @@ -0,0 +1,24 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _BASE_DEVFREQ_H_ +#define _BASE_DEVFREQ_H_ + +int kbase_devfreq_init(struct kbase_device *kbdev); +void kbase_devfreq_term(struct kbase_device *kbdev); + +#endif /* _BASE_DEVFREQ_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c new file mode 100644 index 00000000000..83d5ec9f7a9 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_hw.c @@ -0,0 +1,116 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * + */ +#include <mali_kbase.h> +#include <backend/gpu/mali_kbase_instr_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +#include <backend/gpu/mali_kbase_device_internal.h> + +#if !defined(CONFIG_MALI_NO_MALI) +void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, + struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + dev_dbg(kbdev->dev, "w: reg %04x val %08x", offset, value); + writel(value, kbdev->reg + offset); + if (kctx && kctx->jctx.tb) + kbase_device_trace_register_access(kctx, REG_WRITE, offset, + value); +} + +KBASE_EXPORT_TEST_API(kbase_reg_write); + +u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, + struct kbase_context *kctx) +{ + u32 val; + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + KBASE_DEBUG_ASSERT(kctx == NULL || kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kbdev->dev != NULL); + val = readl(kbdev->reg + offset); + dev_dbg(kbdev->dev, "r: reg %04x val %08x", offset, val); + if (kctx && kctx->jctx.tb) + kbase_device_trace_register_access(kctx, REG_READ, offset, val); + return val; +} + +KBASE_EXPORT_TEST_API(kbase_reg_read); +#endif /* !defined(CONFIG_MALI_NO_MALI) */ + +/** + * kbase_report_gpu_fault - Report a GPU fault. + * @kbdev: Kbase device pointer + * @multiple: Zero if only GPU_FAULT was raised, non-zero if MULTIPLE_GPU_FAULTS + * was also set + * + * This function is called from the interrupt handler when a GPU fault occurs. + * It reports the details of the fault using dev_warn(). + */ +static void kbase_report_gpu_fault(struct kbase_device *kbdev, int multiple) +{ + u32 status; + u64 address; + + status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL); + address = (u64) kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_HI), NULL) << 32; + address |= kbase_reg_read(kbdev, + GPU_CONTROL_REG(GPU_FAULTADDRESS_LO), NULL); + + dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) at 0x%016llx", + status & 0xFF, + kbase_exception_name(kbdev, status), + address); + if (multiple) + dev_warn(kbdev->dev, "There were multiple GPU faults - some have not been reported\n"); +} + +void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val) +{ + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ, NULL, NULL, 0u, val); + if (val & GPU_FAULT) + kbase_report_gpu_fault(kbdev, val & MULTIPLE_GPU_FAULTS); + + if (val & RESET_COMPLETED) + kbase_pm_reset_done(kbdev); + + if (val & PRFCNT_SAMPLE_COMPLETED) + kbase_instr_hwcnt_sample_done(kbdev); + + if (val & CLEAN_CACHES_COMPLETED) + kbase_clean_caches_done(kbdev); + + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val, NULL); + + /* kbase_pm_check_transitions must be called after the IRQ has been + * cleared. This is because it might trigger further power transitions + * and we don't want to miss the interrupt raised to notify us that + * these further transitions have finished. + */ + if (val & POWER_CHANGED_ALL) + kbase_pm_power_changed(kbdev); + + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, NULL, 0u, val); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h new file mode 100644 index 00000000000..5b20445932f --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_device_internal.h @@ -0,0 +1,67 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Backend-specific HW access device APIs + */ + +#ifndef _KBASE_DEVICE_INTERNAL_H_ +#define _KBASE_DEVICE_INTERNAL_H_ + +/** + * kbase_reg_write - write to GPU register + * @kbdev: Kbase device pointer + * @offset: Offset of register + * @value: Value to write + * @kctx: Kbase context pointer. May be NULL + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If + * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr + * != KBASEP_AS_NR_INVALID). + */ +void kbase_reg_write(struct kbase_device *kbdev, u16 offset, u32 value, + struct kbase_context *kctx); + +/** + * kbase_reg_read - read from GPU register + * @kbdev: Kbase device pointer + * @offset: Offset of register + * @kctx: Kbase context pointer. May be NULL + * + * Caller must ensure the GPU is powered (@kbdev->pm.gpu_powered != false). If + * @kctx is not NULL then the caller must ensure it is scheduled (@kctx->as_nr + * != KBASEP_AS_NR_INVALID). + * + * Return: Value in desired register + */ +u32 kbase_reg_read(struct kbase_device *kbdev, u16 offset, + struct kbase_context *kctx); + + +/** + * kbase_gpu_interrupt - GPU interrupt handler + * @kbdev: Kbase device pointer + * @val: The value of the GPU IRQ status register which triggered the call + * + * This function is called from the interrupt handler when a GPU irq is to be + * handled. + */ +void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val); + +#endif /* _KBASE_DEVICE_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c new file mode 100644 index 00000000000..72a98d0f795 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpu.c @@ -0,0 +1,124 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend APIs + */ +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_jm.h> +#include <mali_kbase_hwaccess_backend.h> +#include <backend/gpu/mali_kbase_irq_internal.h> +#include <backend/gpu/mali_kbase_jm_internal.h> +#include <backend/gpu/mali_kbase_js_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +int kbase_backend_early_init(struct kbase_device *kbdev) +{ + int err; + + err = kbasep_platform_device_init(kbdev); + if (err) + return err; + + /* Ensure we can access the GPU registers */ + kbase_pm_register_access_enable(kbdev); + + /* Find out GPU properties based on the GPU feature registers */ + kbase_gpuprops_set(kbdev); + + /* We're done accessing the GPU registers for now. */ + kbase_pm_register_access_disable(kbdev); + + err = kbase_hwaccess_pm_init(kbdev); + if (err) + goto fail_pm; + + err = kbase_install_interrupts(kbdev); + if (err) + goto fail_interrupts; + + return 0; + +fail_interrupts: + kbase_hwaccess_pm_term(kbdev); +fail_pm: + kbasep_platform_device_term(kbdev); + + return err; +} + +void kbase_backend_early_term(struct kbase_device *kbdev) +{ + kbase_release_interrupts(kbdev); + kbase_hwaccess_pm_term(kbdev); + kbasep_platform_device_term(kbdev); +} + +int kbase_backend_late_init(struct kbase_device *kbdev) +{ + int err; + + err = kbase_hwaccess_pm_powerup(kbdev, PM_HW_ISSUES_DETECT); + if (err) + return err; + + err = kbase_backend_timer_init(kbdev); + if (err) + goto fail_timer; + +/* Currently disabled on the prototype */ +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI + if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { + dev_err(kbdev->dev, "Interrupt assigment check failed.\n"); + err = -EINVAL; + goto fail_interrupt_test; + } +#endif /* !CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + + err = kbase_job_slot_init(kbdev); + if (err) + goto fail_job_slot; + + init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); + + return 0; + +fail_job_slot: +/* Currently disabled on the prototype */ +#ifdef CONFIG_MALI_DEBUG +#ifndef CONFIG_MALI_NO_MALI +fail_interrupt_test: +#endif /* !CONFIG_MALI_NO_MALI */ +#endif /* CONFIG_MALI_DEBUG */ + kbase_backend_timer_term(kbdev); +fail_timer: + kbase_hwaccess_pm_halt(kbdev); + + return err; +} + +void kbase_backend_late_term(struct kbase_device *kbdev) +{ + kbase_job_slot_halt(kbdev); + kbase_job_slot_term(kbdev); + kbase_backend_timer_term(kbdev); + kbase_hwaccess_pm_halt(kbdev); +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c new file mode 100644 index 00000000000..bc7235f5833 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_gpuprops_backend.c @@ -0,0 +1,85 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Base kernel property query backend APIs + */ + +#include <mali_kbase.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> +#include <mali_kbase_hwaccess_gpuprops.h> + +void kbase_backend_gpuprops_get(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump) +{ + int i; + + /* Fill regdump with the content of the relevant registers */ + regdump->gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID), NULL); + + regdump->l2_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_FEATURES), NULL); + regdump->suspend_size = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SUSPEND_SIZE), NULL); + regdump->tiler_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_FEATURES), NULL); + regdump->mem_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MEM_FEATURES), NULL); + regdump->mmu_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(MMU_FEATURES), NULL); + regdump->as_present = kbase_reg_read(kbdev, + GPU_CONTROL_REG(AS_PRESENT), NULL); + regdump->js_present = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JS_PRESENT), NULL); + + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + regdump->js_features[i] = kbase_reg_read(kbdev, + GPU_CONTROL_REG(JS_FEATURES_REG(i)), NULL); + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + regdump->texture_features[i] = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TEXTURE_FEATURES_REG(i)), NULL); + + regdump->thread_max_threads = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_THREADS), NULL); + regdump->thread_max_workgroup_size = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_WORKGROUP_SIZE), + NULL); + regdump->thread_max_barrier_size = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_MAX_BARRIER_SIZE), NULL); + regdump->thread_features = kbase_reg_read(kbdev, + GPU_CONTROL_REG(THREAD_FEATURES), NULL); + + regdump->shader_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_LO), NULL); + regdump->shader_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(SHADER_PRESENT_HI), NULL); + + regdump->tiler_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_PRESENT_LO), NULL); + regdump->tiler_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_PRESENT_HI), NULL); + + regdump->l2_present_lo = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_LO), NULL); + regdump->l2_present_hi = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_PRESENT_HI), NULL); +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c new file mode 100644 index 00000000000..2c987071a77 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_backend.c @@ -0,0 +1,536 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * GPU backend instrumentation APIs. + */ + +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> +#include <backend/gpu/mali_kbase_instr_internal.h> + +/** + * kbasep_instr_hwcnt_cacheclean - Issue Cache Clean & Invalidate command to + * hardware + * + * @kbdev: Kbase device + */ +static void kbasep_instr_hwcnt_cacheclean(struct kbase_device *kbdev) +{ + unsigned long flags; + unsigned long pm_flags; + u32 irq_mask; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Wait for any reset to complete */ + while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.cache_clean_wait, + kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_RESETTING); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_REQUEST_CLEAN); + + /* Enable interrupt */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask | CLEAN_CACHES_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + + /* clean&invalidate the caches so we're sure the mmu tables for the dump + * buffer is valid */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES, NULL); + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANING; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + +int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *setup) +{ + unsigned long flags, pm_flags; + int err = -EINVAL; + struct kbasep_js_device_data *js_devdata; + u32 irq_mask; + int ret; + u64 shader_cores_needed; + + KBASE_DEBUG_ASSERT(NULL == kbdev->hwcnt.suspended_kctx); + + shader_cores_needed = kbase_pm_get_present_cores(kbdev, + KBASE_PM_CORE_SHADER); + + js_devdata = &kbdev->js_data; + + /* alignment failure */ + if ((setup->dump_buffer == 0ULL) || (setup->dump_buffer & (2048 - 1))) + goto out_err; + + /* Override core availability policy to ensure all cores are available + */ + kbase_pm_ca_instr_enable(kbdev); + + /* Request the cores early on synchronously - we'll release them on any + * errors (e.g. instrumentation already active) */ + kbase_pm_request_cores_sync(kbdev, true, shader_cores_needed); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { + /* Instrumentation is already enabled */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out_unrequest_cores; + } + + /* Enable interrupt */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask | + PRFCNT_SAMPLE_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + + /* In use, this context is the owner */ + kbdev->hwcnt.kctx = kctx; + /* Remember the dump address so we can reprogram it later */ + kbdev->hwcnt.addr = setup->dump_buffer; + /* Remember all the settings for suspend/resume */ + if (&kbdev->hwcnt.suspended_state != setup) + memcpy(&kbdev->hwcnt.suspended_state, setup, + sizeof(kbdev->hwcnt.suspended_state)); + + /* Request the clean */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; + kbdev->hwcnt.backend.triggered = 0; + /* Clean&invalidate the caches so we're sure the mmu tables for the dump + * buffer is valid */ + ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, + &kbdev->hwcnt.backend.cache_clean_work); + KBASE_DEBUG_ASSERT(ret); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Wait for cacheclean to complete */ + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_IDLE); + + kbase_pm_request_l2_caches(kbdev); + + /* Configure */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) + | PRFCNT_CONFIG_MODE_OFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + setup->dump_buffer & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), + setup->dump_buffer >> 32, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), + setup->jm_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), + setup->shader_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), + setup->mmu_l2_bm, kctx); + /* Due to PRLAM-8186 we need to disable the Tiler before we enable the + * HW counter dump. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), 0, + kctx); + else + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + setup->tiler_bm, kctx); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | + PRFCNT_CONFIG_MODE_MANUAL, kctx); + + /* If HW has PRLAM-8186 we can now re-enable the tiler HW counters dump + */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + setup->tiler_bm, kctx); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + err = 0; + + dev_dbg(kbdev->dev, "HW counters dumping set-up for context %p", kctx); + return err; + out_unrequest_cores: + kbase_pm_unrequest_cores(kbdev, true, shader_cores_needed); + out_err: + return err; +} + +int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx) +{ + unsigned long flags, pm_flags; + int err = -EINVAL; + u32 irq_mask; + struct kbase_device *kbdev = kctx->kbdev; + + while (1) { + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DISABLED) { + /* Instrumentation is not enabled */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out; + } + + if (kbdev->hwcnt.kctx != kctx) { + /* Instrumentation has been setup for another context */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + goto out; + } + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) + break; + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Ongoing dump/setup - wait for its completion */ + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + } + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + kbdev->hwcnt.backend.triggered = 0; + + /* Disable interrupt */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~PRFCNT_SAMPLE_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + + /* Disable the counters */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), 0, kctx); + + kbdev->hwcnt.kctx = NULL; + kbdev->hwcnt.addr = 0ULL; + + kbase_pm_ca_instr_disable(kbdev); + + kbase_pm_unrequest_cores(kbdev, true, + kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER)); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + kbase_pm_release_l2_caches(kbdev); + + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", + kctx); + + err = 0; + + out: + return err; +} + +int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx) +{ + unsigned long flags; + int err = -EINVAL; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.kctx != kctx) { + /* The instrumentation has been setup for another context */ + goto unlock; + } + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_IDLE) { + /* HW counters are disabled or another dump is ongoing, or we're + * resetting */ + goto unlock; + } + + kbdev->hwcnt.backend.triggered = 0; + + /* Mark that we're dumping - the PF handler can signal that we faulted + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DUMPING; + + /* Reconfigure the dump address */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + kbdev->hwcnt.addr & 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), + kbdev->hwcnt.addr >> 32, NULL); + + /* Start dumping */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_SAMPLE, NULL, NULL, + kbdev->hwcnt.addr, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_PRFCNT_SAMPLE, kctx); + + dev_dbg(kbdev->dev, "HW counters dumping done for context %p", kctx); + + err = 0; + + unlock: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_request_dump); + +bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, + bool * const success) +{ + unsigned long flags; + bool complete = false; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_IDLE) { + *success = true; + complete = true; + } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + *success = false; + complete = true; + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + return complete; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete); + +void kbasep_cache_clean_worker(struct work_struct *data) +{ + struct kbase_device *kbdev; + unsigned long flags; + + kbdev = container_of(data, struct kbase_device, + hwcnt.backend.cache_clean_work); + + mutex_lock(&kbdev->cacheclean_lock); + kbasep_instr_hwcnt_cacheclean(kbdev); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Wait for our condition, and any reset to complete */ + while (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING || + kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_CLEANING) { + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.cache_clean_wait, + (kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_RESETTING && + kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_CLEANING)); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_CLEANED); + + /* All finished and idle */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + mutex_unlock(&kbdev->cacheclean_lock); +} + +void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + } else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) { + int ret; + /* Always clean and invalidate the cache after a successful dump + */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN; + ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq, + &kbdev->hwcnt.backend.cache_clean_work); + KBASE_DEBUG_ASSERT(ret); + } + /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a reset, + * and the instrumentation state hasn't been restored yet - + * kbasep_reset_timeout_worker() will do the rest of the work */ + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); +} + +void kbase_clean_caches_done(struct kbase_device *kbdev) +{ + u32 irq_mask; + + if (kbdev->hwcnt.backend.state != KBASE_INSTR_STATE_DISABLED) { + unsigned long flags; + unsigned long pm_flags; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Disable interrupt */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, pm_flags); + irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), + irq_mask & ~CLEAN_CACHES_COMPLETED, NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, pm_flags); + + /* Wakeup... */ + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_CLEANING) { + /* Only wake if we weren't resetting */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; + wake_up(&kbdev->hwcnt.backend.cache_clean_wait); + } + /* NOTE: In the state KBASE_INSTR_STATE_RESETTING, We're in a + * reset, and the instrumentation state hasn't been restored yet + * - kbasep_reset_timeout_worker() will do the rest of the work + */ + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + } +} + +int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + unsigned long flags; + int err; + + /* Wait for dump & cacheclean to complete */ + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_FAULT) { + err = -EINVAL; + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + } else { + /* Dump done */ + KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_IDLE); + err = 0; + } + + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + return err; +} + +int kbase_instr_hwcnt_clear(struct kbase_context *kctx) +{ + unsigned long flags; + int err = -EINVAL; + struct kbase_device *kbdev = kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + + /* Check it's the context previously set up and we're not already + * dumping */ + if (kbdev->hwcnt.kctx != kctx || kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_IDLE) + goto out; + + /* Clear the counters */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_PRFCNT_CLEAR, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_PRFCNT_CLEAR, kctx); + + err = 0; + +out: + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_clear); + +int kbase_instr_backend_init(struct kbase_device *kbdev) +{ + int ret = 0; + + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED; + + init_waitqueue_head(&kbdev->hwcnt.backend.wait); + init_waitqueue_head(&kbdev->hwcnt.backend.cache_clean_wait); + INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work, + kbasep_cache_clean_worker); + kbdev->hwcnt.backend.triggered = 0; + + kbdev->hwcnt.backend.cache_clean_wq = + alloc_workqueue("Mali cache cleaning workqueue", 0, 1); + if (NULL == kbdev->hwcnt.backend.cache_clean_wq) + ret = -EINVAL; + + return ret; +} + +void kbase_instr_backend_term(struct kbase_device *kbdev) +{ + destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq); +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h new file mode 100644 index 00000000000..23bd80a5a15 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_defs.h @@ -0,0 +1,62 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Backend-specific instrumentation definitions + */ + +#ifndef _KBASE_INSTR_DEFS_H_ +#define _KBASE_INSTR_DEFS_H_ + +/* + * Instrumentation State Machine States + */ +enum kbase_instr_state { + /* State where instrumentation is not active */ + KBASE_INSTR_STATE_DISABLED = 0, + /* State machine is active and ready for a command. */ + KBASE_INSTR_STATE_IDLE, + /* Hardware is currently dumping a frame. */ + KBASE_INSTR_STATE_DUMPING, + /* We've requested a clean to occur on a workqueue */ + KBASE_INSTR_STATE_REQUEST_CLEAN, + /* Hardware is currently cleaning and invalidating caches. */ + KBASE_INSTR_STATE_CLEANING, + /* Cache clean completed, and either a) a dump is complete, or + * b) instrumentation can now be setup. */ + KBASE_INSTR_STATE_CLEANED, + /* kbasep_reset_timeout_worker() has started (but not compelted) a + * reset. This generally indicates the current action should be aborted, + * and kbasep_reset_timeout_worker() will handle the cleanup */ + KBASE_INSTR_STATE_RESETTING, + /* An error has occured during DUMPING (page fault). */ + KBASE_INSTR_STATE_FAULT +}; + +/* Structure used for instrumentation and HW counters dumping */ +struct kbase_instr_backend { + wait_queue_head_t wait; + int triggered; + + enum kbase_instr_state state; + wait_queue_head_t cache_clean_wait; + struct workqueue_struct *cache_clean_wq; + struct work_struct cache_clean_work; +}; + +#endif /* _KBASE_INSTR_DEFS_H_ */ + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h new file mode 100644 index 00000000000..e96aeae786e --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_instr_internal.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Backend-specific HW access instrumentation APIs + */ + +#ifndef _KBASE_INSTR_INTERNAL_H_ +#define _KBASE_INSTR_INTERNAL_H_ + +/** + * kbasep_cache_clean_worker() - Workqueue for handling cache cleaning + * @data: a &struct work_struct + */ +void kbasep_cache_clean_worker(struct work_struct *data); + +/** + * kbase_clean_caches_done() - Cache clean interrupt received + * @kbdev: Kbase device + */ +void kbase_clean_caches_done(struct kbase_device *kbdev); + +/** + * kbase_instr_hwcnt_sample_done() - Dump complete interrupt received + * @kbdev: Kbase device + */ +void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev); + +#endif /* _KBASE_INSTR_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h new file mode 100644 index 00000000000..8781561e73d --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_internal.h @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Backend specific IRQ APIs + */ + +#ifndef _KBASE_IRQ_INTERNAL_H_ +#define _KBASE_IRQ_INTERNAL_H_ + +int kbase_install_interrupts(struct kbase_device *kbdev); + +void kbase_release_interrupts(struct kbase_device *kbdev); + +/** + * kbase_synchronize_irqs - Ensure that all IRQ handlers have completed + * execution + * @kbdev: The kbase device + */ +void kbase_synchronize_irqs(struct kbase_device *kbdev); + +int kbasep_common_test_interrupt_handlers( + struct kbase_device * const kbdev); + +#endif /* _KBASE_IRQ_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c new file mode 100644 index 00000000000..49c72f90aac --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_irq_linux.c @@ -0,0 +1,471 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_irq_internal.h> + +#include <linux/interrupt.h> + +#if !defined(CONFIG_MALI_NO_MALI) + +/* GPU IRQ Tags */ +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 + + +static void *kbase_tag(void *ptr, u32 tag) +{ + return (void *)(((uintptr_t) ptr) | tag); +} + +static void *kbase_untag(void *ptr) +{ + return (void *)(((uintptr_t) ptr) & ~3); +} + + + + +static irqreturn_t kbase_job_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_job_done(kbdev, val); + + return IRQ_HANDLED; +} + +KBASE_EXPORT_TEST_API(kbase_job_irq_handler); + +static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return IRQ_NONE; + } + + atomic_inc(&kbdev->faults_pending); + + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_warn(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!val) { + atomic_dec(&kbdev->faults_pending); + return IRQ_NONE; + } + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_mmu_interrupt(kbdev, val); + + atomic_dec(&kbdev->faults_pending); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_gpu_irq_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_STATUS), NULL); + +#ifdef CONFIG_MALI_DEBUG + if (!kbdev->pm.backend.driver_ready_for_irqs) + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x before driver is ready\n", + __func__, irq, val); +#endif /* CONFIG_MALI_DEBUG */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbase_gpu_interrupt(kbdev, val); + + return IRQ_HANDLED; +} +static irq_handler_t kbase_handler_table[] = { + [JOB_IRQ_TAG] = kbase_job_irq_handler, + [MMU_IRQ_TAG] = kbase_mmu_irq_handler, + [GPU_IRQ_TAG] = kbase_gpu_irq_handler, +}; + + +#ifdef CONFIG_MALI_DEBUG +#define JOB_IRQ_HANDLER JOB_IRQ_TAG +#define MMU_IRQ_HANDLER MMU_IRQ_TAG +#define GPU_IRQ_HANDLER GPU_IRQ_TAG + +/** + * kbase_set_custom_irq_handler - Set a custom IRQ handler + * @kbdev: Device for which the handler is to be registered + * @custom_handler: Handler to be registered + * @irq_type: Interrupt type + * + * Registers given interrupt handler for requested interrupt type + * In the case where irq handler is not specified, the default handler shall be + * registered + * + * Return: 0 case success, error code otherwise + */ +int kbase_set_custom_irq_handler(struct kbase_device *kbdev, + irq_handler_t custom_handler, + int irq_type) +{ + int result = 0; + irq_handler_t requested_irq_handler = NULL; + + KBASE_DEBUG_ASSERT((JOB_IRQ_HANDLER <= irq_type) && + (GPU_IRQ_HANDLER >= irq_type)); + + /* Release previous handler */ + if (kbdev->irqs[irq_type].irq) + free_irq(kbdev->irqs[irq_type].irq, kbase_tag(kbdev, irq_type)); + + requested_irq_handler = (NULL != custom_handler) ? custom_handler : + kbase_handler_table[irq_type]; + + if (0 != request_irq(kbdev->irqs[irq_type].irq, + requested_irq_handler, + kbdev->irqs[irq_type].flags | IRQF_SHARED, + dev_name(kbdev->dev), kbase_tag(kbdev, irq_type))) { + result = -EINVAL; + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", + kbdev->irqs[irq_type].irq, irq_type); +#ifdef CONFIG_SPARSE_IRQ + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); +#endif /* CONFIG_SPARSE_IRQ */ + } + + return result; +} + +KBASE_EXPORT_TEST_API(kbase_set_custom_irq_handler); + +/* test correct interrupt assigment and reception by cpu */ +struct kbasep_irq_test { + struct hrtimer timer; + wait_queue_head_t wait; + int triggered; + u32 timeout; +}; + +static struct kbasep_irq_test kbasep_irq_test_data; + +#define IRQ_TEST_TIMEOUT 500 + +static irqreturn_t kbase_job_irq_test_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS), NULL); + + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbasep_irq_test_data.triggered = 1; + wake_up(&kbasep_irq_test_data.wait); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val, NULL); + + return IRQ_HANDLED; +} + +static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) +{ + unsigned long flags; + struct kbase_device *kbdev = kbase_untag(data); + u32 val; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!kbdev->pm.backend.gpu_powered) { + /* GPU is turned off - IRQ is not for us */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return IRQ_NONE; + } + + val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS), NULL); + + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (!val) + return IRQ_NONE; + + dev_dbg(kbdev->dev, "%s: irq %d irqstatus 0x%x\n", __func__, irq, val); + + kbasep_irq_test_data.triggered = 1; + wake_up(&kbasep_irq_test_data.wait); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val, NULL); + + return IRQ_HANDLED; +} + +static enum hrtimer_restart kbasep_test_interrupt_timeout(struct hrtimer *timer) +{ + struct kbasep_irq_test *test_data = container_of(timer, + struct kbasep_irq_test, timer); + + test_data->timeout = 1; + test_data->triggered = 1; + wake_up(&test_data->wait); + return HRTIMER_NORESTART; +} + +static int kbasep_common_test_interrupt( + struct kbase_device * const kbdev, u32 tag) +{ + int err = 0; + irq_handler_t test_handler; + + u32 old_mask_val; + u16 mask_offset; + u16 rawstat_offset; + + switch (tag) { + case JOB_IRQ_TAG: + test_handler = kbase_job_irq_test_handler; + rawstat_offset = JOB_CONTROL_REG(JOB_IRQ_RAWSTAT); + mask_offset = JOB_CONTROL_REG(JOB_IRQ_MASK); + break; + case MMU_IRQ_TAG: + test_handler = kbase_mmu_irq_test_handler; + rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); + mask_offset = MMU_REG(MMU_IRQ_MASK); + break; + case GPU_IRQ_TAG: + /* already tested by pm_driver - bail out */ + default: + return 0; + } + + /* store old mask */ + old_mask_val = kbase_reg_read(kbdev, mask_offset, NULL); + /* mask interrupts */ + kbase_reg_write(kbdev, mask_offset, 0x0, NULL); + + if (kbdev->irqs[tag].irq) { + /* release original handler and install test handler */ + if (kbase_set_custom_irq_handler(kbdev, test_handler, tag) != 0) { + err = -EINVAL; + } else { + kbasep_irq_test_data.timeout = 0; + hrtimer_init(&kbasep_irq_test_data.timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbasep_irq_test_data.timer.function = + kbasep_test_interrupt_timeout; + + /* trigger interrupt */ + kbase_reg_write(kbdev, mask_offset, 0x1, NULL); + kbase_reg_write(kbdev, rawstat_offset, 0x1, NULL); + + hrtimer_start(&kbasep_irq_test_data.timer, + HR_TIMER_DELAY_MSEC(IRQ_TEST_TIMEOUT), + HRTIMER_MODE_REL); + + wait_event(kbasep_irq_test_data.wait, + kbasep_irq_test_data.triggered != 0); + + if (kbasep_irq_test_data.timeout != 0) { + dev_err(kbdev->dev, "Interrupt %d (index %d) didn't reach CPU.\n", + kbdev->irqs[tag].irq, tag); + err = -EINVAL; + } else { + dev_dbg(kbdev->dev, "Interrupt %d (index %d) reached CPU.\n", + kbdev->irqs[tag].irq, tag); + } + + hrtimer_cancel(&kbasep_irq_test_data.timer); + kbasep_irq_test_data.triggered = 0; + + /* mask interrupts */ + kbase_reg_write(kbdev, mask_offset, 0x0, NULL); + + /* release test handler */ + free_irq(kbdev->irqs[tag].irq, kbase_tag(kbdev, tag)); + } + + /* restore original interrupt */ + if (request_irq(kbdev->irqs[tag].irq, kbase_handler_table[tag], + kbdev->irqs[tag].flags | IRQF_SHARED, + dev_name(kbdev->dev), kbase_tag(kbdev, tag))) { + dev_err(kbdev->dev, "Can't restore original interrupt %d (index %d)\n", + kbdev->irqs[tag].irq, tag); + err = -EINVAL; + } + } + /* restore old mask */ + kbase_reg_write(kbdev, mask_offset, old_mask_val, NULL); + + return err; +} + +int kbasep_common_test_interrupt_handlers( + struct kbase_device * const kbdev) +{ + int err; + + init_waitqueue_head(&kbasep_irq_test_data.wait); + kbasep_irq_test_data.triggered = 0; + + /* A suspend won't happen during startup/insmod */ + kbase_pm_context_active(kbdev); + + err = kbasep_common_test_interrupt(kbdev, JOB_IRQ_TAG); + if (err) { + dev_err(kbdev->dev, "Interrupt JOB_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + + err = kbasep_common_test_interrupt(kbdev, MMU_IRQ_TAG); + if (err) { + dev_err(kbdev->dev, "Interrupt MMU_IRQ didn't reach CPU. Check interrupt assignments.\n"); + goto out; + } + + dev_dbg(kbdev->dev, "Interrupts are correctly assigned.\n"); + + out: + kbase_pm_context_idle(kbdev); + + return err; +} +#endif /* CONFIG_MALI_DEBUG */ + +int kbase_install_interrupts(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + int err; + u32 i; + + for (i = 0; i < nr; i++) { + err = request_irq(kbdev->irqs[i].irq, kbase_handler_table[i], + kbdev->irqs[i].flags | IRQF_SHARED, + dev_name(kbdev->dev), + kbase_tag(kbdev, i)); + if (err) { + dev_err(kbdev->dev, "Can't request interrupt %d (index %d)\n", + kbdev->irqs[i].irq, i); +#ifdef CONFIG_SPARSE_IRQ + dev_err(kbdev->dev, "You have CONFIG_SPARSE_IRQ support enabled - is the interrupt number correct for this configuration?\n"); +#endif /* CONFIG_SPARSE_IRQ */ + goto release; + } + } + + return 0; + + release: + while (i-- > 0) + free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + + return err; +} + +void kbase_release_interrupts(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + u32 i; + + for (i = 0; i < nr; i++) { + if (kbdev->irqs[i].irq) + free_irq(kbdev->irqs[i].irq, kbase_tag(kbdev, i)); + } +} + +void kbase_synchronize_irqs(struct kbase_device *kbdev) +{ + u32 nr = ARRAY_SIZE(kbase_handler_table); + u32 i; + + for (i = 0; i < nr; i++) { + if (kbdev->irqs[i].irq) + synchronize_irq(kbdev->irqs[i].irq); + } +} + +#endif /* !defined(CONFIG_MALI_NO_MALI) */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c new file mode 100644 index 00000000000..f2167887229 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_as.c @@ -0,0 +1,385 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register backend context / address space management + */ + +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_jm.h> + +/** + * assign_and_activate_kctx_addr_space - Assign an AS to a context + * @kbdev: Kbase device + * @kctx: Kbase context + * @current_as: Address Space to assign + * + * Assign an Address Space (AS) to a context, and add the context to the Policy. + * + * This includes + * setting up the global runpool_irq structure and the context on the AS, + * Activating the MMU on the AS, + * Allowing jobs to be submitted on the AS. + * + * Context: + * kbasep_js_kctx_info.jsctx_mutex held, + * kbasep_js_device_data.runpool_mutex held, + * AS transaction mutex held, + * Runpool IRQ lock held + */ +static void assign_and_activate_kctx_addr_space(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_as *current_as) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbasep_js_per_as_data *js_per_as_data; + int as_nr = current_as->number; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(¤t_as->transaction_mutex); + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + /* Attribute handling */ + kbasep_js_ctx_attr_runpool_retain_ctx(kbdev, kctx); + + /* Assign addr space */ + kctx->as_nr = as_nr; + + /* If the GPU is currently powered, activate this address space on the + * MMU */ + if (kbdev->pm.backend.gpu_powered) + kbase_mmu_update(kctx); + /* If the GPU was not powered then the MMU will be reprogrammed on the + * next pm_context_active() */ + + /* Allow it to run jobs */ + kbasep_js_set_submit_allowed(js_devdata, kctx); + + /* Book-keeping */ + js_per_as_data->kctx = kctx; + js_per_as_data->as_busy_refcount = 0; + + kbase_js_runpool_inc_context_count(kbdev, kctx); +} + +/** + * release_addr_space - Release an address space + * @kbdev: Kbase device + * @kctx_as_nr: Address space of context to release + * @kctx: Context being released + * + * Context: kbasep_js_device_data.runpool_mutex must be held + * + * Release an address space, making it available for being picked again. + */ +static void release_addr_space(struct kbase_device *kbdev, int kctx_as_nr, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + u16 as_bit = (1u << kctx_as_nr); + + js_devdata = &kbdev->js_data; + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* The address space must not already be free */ + KBASE_DEBUG_ASSERT(!(js_devdata->as_free & as_bit)); + + js_devdata->as_free |= as_bit; + + kbase_js_runpool_dec_context_count(kbdev, kctx); +} + +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + int i; + + if (kbdev->hwaccess.active_kctx == kctx) { + /* Context is already active */ + return true; + } + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbasep_js_per_as_data *js_per_as_data = + &kbdev->js_data.runpool_irq.per_as_data[i]; + + if (js_per_as_data->kctx == kctx) { + /* Context already has ASID - mark as active */ + return true; + } + } + + /* Context does not have address space assigned */ + return false; +} + +void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_per_as_data *js_per_as_data; + int as_nr = kctx->as_nr; + + if (as_nr == KBASEP_AS_NR_INVALID) { + WARN(1, "Attempting to release context without ASID\n"); + return; + } + + lockdep_assert_held(&kbdev->as[as_nr].transaction_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr]; + if (js_per_as_data->as_busy_refcount != 0) { + WARN(1, "Attempting to release active ASID\n"); + return; + } + + /* Release context from address space */ + js_per_as_data->kctx = NULL; + + kbasep_js_clear_submit_allowed(&kbdev->js_data, kctx); + /* If the GPU is currently powered, de-activate this address space on + * the MMU */ + if (kbdev->pm.backend.gpu_powered) + kbase_mmu_disable(kctx); + /* If the GPU was not powered then the MMU will be reprogrammed on the + * next pm_context_active() */ + + release_addr_space(kbdev, as_nr, kctx); + kctx->as_nr = KBASEP_AS_NR_INVALID; +} + +void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ +} + +void kbase_backend_release_free_address_space(struct kbase_device *kbdev, + int as_nr) +{ + struct kbasep_js_device_data *js_devdata; + + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&js_devdata->runpool_mutex); + + js_devdata->as_free |= (1 << as_nr); +} + +/** + * check_is_runpool_full - check whether the runpool is full for a specified + * context + * @kbdev: Kbase device + * @kctx: Kbase context + * + * If kctx == NULL, then this makes the least restrictive check on the + * runpool. A specific context that is supplied immediately after could fail + * the check, even under the same conditions. + * + * Therefore, once a context is obtained you \b must re-check it with this + * function, since the return value could change to false. + * + * Context: + * In all cases, the caller must hold kbasep_js_device_data.runpool_mutex. + * When kctx != NULL the caller must hold the + * kbasep_js_kctx_info.ctx.jsctx_mutex. + * When kctx == NULL, then the caller need not hold any jsctx_mutex locks (but + * it doesn't do any harm to do so). + * + * Return: true if the runpool is full + */ +static bool check_is_runpool_full(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + bool is_runpool_full; + + js_devdata = &kbdev->js_data; + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Regardless of whether a context is submitting or not, can't have more + * than there are HW address spaces */ + is_runpool_full = (bool) (js_devdata->nr_all_contexts_running >= + kbdev->nr_hw_address_spaces); + + if (kctx != NULL && (kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + /* Contexts that submit might use less of the address spaces + * available, due to HW workarounds. In which case, the runpool + * is also full when the number of submitting contexts exceeds + * the number of submittable address spaces. + * + * Both checks must be made: can have nr_user_address_spaces == + * nr_hw_address spaces, and at the same time can have + * nr_user_contexts_running < nr_all_contexts_running. */ + is_runpool_full |= (bool) + (js_devdata->nr_user_contexts_running >= + kbdev->nr_user_address_spaces); + } + + return is_runpool_full; +} + +int kbase_backend_find_free_address_space(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; + int i; + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + /* First try to find a free address space */ + if (check_is_runpool_full(kbdev, kctx)) + i = -1; + else + i = ffs(js_devdata->as_free) - 1; + + if (i >= 0 && i < kbdev->nr_hw_address_spaces) { + js_devdata->as_free &= ~(1 << i); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return i; + } + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* No address space currently free, see if we can release one */ + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbasep_js_per_as_data *js_per_as_data; + struct kbasep_js_kctx_info *as_js_kctx_info; + struct kbase_context *as_kctx; + + js_per_as_data = &kbdev->js_data.runpool_irq.per_as_data[i]; + as_kctx = js_per_as_data->kctx; + as_js_kctx_info = &as_kctx->jctx.sched_info; + + /* Don't release privileged or active contexts, or contexts with + * jobs running */ + if (as_kctx && !(as_kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_PRIVILEGED) && + js_per_as_data->as_busy_refcount == 0) { + if (!kbasep_js_runpool_retain_ctx_nolock(kbdev, + as_kctx)) { + WARN(1, "Failed to retain active context\n"); + + spin_unlock_irqrestore( + &js_devdata->runpool_irq.lock, + flags); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return KBASEP_AS_NR_INVALID; + } + + kbasep_js_clear_submit_allowed(js_devdata, as_kctx); + + /* Drop and retake locks to take the jsctx_mutex on the + * context we're about to release without violating lock + * ordering + */ + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + + /* Release context from address space */ + mutex_lock(&as_js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + kbasep_js_runpool_release_ctx_nolock(kbdev, as_kctx); + + if (!as_js_kctx_info->ctx.is_scheduled) { + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, + as_kctx, + true); + + js_devdata->as_free &= ~(1 << i); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + + return i; + } + + /* Context was retained while locks were dropped, + * continue looking for free AS */ + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&as_js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + } + } + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return KBASEP_AS_NR_INVALID; +} + +bool kbase_backend_use_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int as_nr) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + struct kbase_as *new_address_space = NULL; + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + if (kbdev->hwaccess.active_kctx == kctx || + kctx->as_nr != KBASEP_AS_NR_INVALID || + as_nr == KBASEP_AS_NR_INVALID) { + WARN(1, "Invalid parameters to use_ctx()\n"); + return false; + } + + new_address_space = &kbdev->as[as_nr]; + + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&new_address_space->transaction_mutex); + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + assign_and_activate_kctx_addr_space(kbdev, kctx, new_address_space); + + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_PRIVILEGED) != 0) { + /* We need to retain it to keep the corresponding address space + */ + kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + } + + return true; +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h new file mode 100644 index 00000000000..1598fce4c28 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_defs.h @@ -0,0 +1,112 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend specific definitions + */ + +#ifndef _KBASE_HWACCESS_GPU_DEFS_H_ +#define _KBASE_HWACCESS_GPU_DEFS_H_ + +/* SLOT_RB_SIZE must be < 256 */ +#define SLOT_RB_SIZE 2 +#define SLOT_RB_MASK (SLOT_RB_SIZE - 1) + +/** + * struct rb_entry - Ringbuffer entry + * @katom: Atom associated with this entry + */ +struct rb_entry { + struct kbase_jd_atom *katom; +}; + +/** + * struct slot_rb - Slot ringbuffer + * @entries: Ringbuffer entries + * @last_context: The last context to submit a job on this slot + * @read_idx: Current read index of buffer + * @write_idx: Current write index of buffer + * @job_chain_flag: Flag used to implement jobchain disambiguation + */ +struct slot_rb { + struct rb_entry entries[SLOT_RB_SIZE]; + + struct kbase_context *last_context; + + u8 read_idx; + u8 write_idx; + + u8 job_chain_flag; +}; + +/** + * struct kbase_backend_data - GPU backend specific data for HW access layer + * @slot_rb: Slot ringbuffers + * @rmu_workaround_flag: When PRLAM-8987 is present, this flag determines + * whether slots 0/1 or slot 2 are currently being + * pulled from + * @scheduling_timer: The timer tick used for rescheduling jobs + * @timer_running: Is the timer running? The runpool_mutex must be + * held whilst modifying this. + * + * The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when + * accessing this structure + */ +struct kbase_backend_data { + struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS]; + + bool rmu_workaround_flag; + + struct hrtimer scheduling_timer; + + bool timer_running; + + /* Set when we're about to reset the GPU */ + atomic_t reset_gpu; + +/* The GPU reset isn't pending */ +#define KBASE_RESET_GPU_NOT_PENDING 0 +/* kbase_prepare_to_reset_gpu has been called */ +#define KBASE_RESET_GPU_PREPARED 1 +/* kbase_reset_gpu has been called - the reset will now definitely happen + * within the timeout period */ +#define KBASE_RESET_GPU_COMMITTED 2 +/* The GPU reset process is currently occuring (timeout has expired or + * kbasep_try_reset_gpu_early was called) */ +#define KBASE_RESET_GPU_HAPPENING 3 + + /* Work queue and work item for performing the reset in */ + struct workqueue_struct *reset_workq; + struct work_struct reset_work; + wait_queue_head_t reset_wait; + struct hrtimer reset_timer; +}; + +/** + * struct kbase_jd_atom_backend - GPU backend specific katom data + */ +struct kbase_jd_atom_backend { +}; + +/** + * struct kbase_context_backend - GPU backend specific context data + */ +struct kbase_context_backend { +}; + +#endif /* _KBASE_HWACCESS_GPU_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c new file mode 100644 index 00000000000..508394ea025 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_hw.c @@ -0,0 +1,1477 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Base kernel job manager APIs + */ + +#include <mali_kbase.h> +#include <mali_kbase_config.h> +#include <mali_midg_regmap.h> +#if defined(CONFIG_MALI_GATOR_SUPPORT) +#include <mali_kbase_gator.h> +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include <mali_kbase_tlstream.h> +#endif +#include <mali_kbase_hw.h> +#include <mali_kbase_config_defaults.h> +#include <mali_kbase_hwaccess_jm.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_irq_internal.h> +#include <backend/gpu/mali_kbase_js_affinity.h> +#include <backend/gpu/mali_kbase_jm_internal.h> + +#define beenthere(kctx, f, a...) \ + dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS +u64 mali_js0_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; +u64 mali_js1_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; +u64 mali_js2_affinity_mask = 0xFFFFFFFFFFFFFFFFULL; +#endif + +#if KBASE_GPU_RESET_EN +static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev); +static void kbasep_reset_timeout_worker(struct work_struct *data); +static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer); +#endif /* KBASE_GPU_RESET_EN */ + +static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, int js, + struct kbase_context *kctx) +{ + return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), kctx); +} + +void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js) +{ + struct kbase_context *kctx; + u32 cfg; + u64 jc_head = katom->jc; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(katom); + + kctx = katom->kctx; + + /* Command register must be available */ + KBASE_DEBUG_ASSERT(kbasep_jm_is_js_free(kbdev, js, kctx)); + /* Affinity is not violating */ + kbase_js_debug_log_current_affinities(kbdev); + KBASE_DEBUG_ASSERT(!kbase_js_affinity_would_violate(kbdev, js, + katom->affinity)); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), + jc_head & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), + jc_head >> 32, kctx); + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + { + u64 mask; + u32 value; + + if (0 == js) + mask = mali_js0_affinity_mask; + else if (1 == js) + mask = mali_js1_affinity_mask; + else + mask = mali_js2_affinity_mask; + + value = katom->affinity & (mask & 0xFFFFFFFF); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), + value, kctx); + + value = (katom->affinity >> 32) & ((mask>>32) & 0xFFFFFFFF); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), + value, kctx); + } +#else + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_LO), + katom->affinity & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_AFFINITY_NEXT_HI), + katom->affinity >> 32, kctx); +#endif + + /* start MMU, medium priority, cache clean/flush on end, clean/flush on + * start */ + cfg = kctx->as_nr; + + cfg |= JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE; + cfg |= JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE; + + cfg |= JS_CONFIG_START_MMU; + cfg |= JS_CONFIG_THREAD_PRI(8); + + if (kbase_hw_has_feature(kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + if (!kbdev->hwaccess.backend.slot_rb[js].job_chain_flag) { + cfg |= JS_CONFIG_JOB_CHAIN_FLAG; + katom->atom_flags |= KBASE_KATOM_FLAGS_JOBCHAIN; + kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = + true; + } else { + katom->atom_flags &= ~KBASE_KATOM_FLAGS_JOBCHAIN; + kbdev->hwaccess.backend.slot_rb[js].job_chain_flag = + false; + } + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_CONFIG_NEXT), cfg, kctx); + + + /* Write an approximate start timestamp. + * It's approximate because there might be a job in the HEAD register. + * In such cases, we'll try to make a better approximation in the IRQ + * handler (up to the KBASE_JS_IRQ_THROTTLE_TIME_US). */ + katom->start_timestamp = ktime_get(); + + /* GO ! */ + dev_dbg(kbdev->dev, "JS: Submitting atom %p from ctx %p to js[%d] with head=0x%llx, affinity=0x%llx", + katom, kctx, js, jc_head, katom->affinity); + + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_SUBMIT, kctx, katom, jc_head, js, + (u32) katom->affinity); + +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_job_slots_event( + GATOR_MAKE_EVENT(GATOR_JOB_SLOT_START, js), + kctx, kbase_jd_atom_id(kctx, katom)); +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_ret_atom_lpu( + katom, + &kbdev->gpu_props.props.raw_props.js_features[js]); +#endif +#ifdef CONFIG_GPU_TRACEPOINTS + if (kbase_backend_nr_atoms_submitted(kbdev, js) == 1) { + /* If this is the only job on the slot, trace it as starting */ + char js_string[16]; + + trace_gpu_sched_switch( + kbasep_make_job_slot_string(js, js_string), + ktime_to_ns(katom->start_timestamp), + (u32)katom->kctx, 0, katom->work_id); + kbdev->hwaccess.backend.slot_rb[js].last_context = katom->kctx; + } +#endif + kbase_timeline_job_slot_submit(kbdev, kctx, katom, js); + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), + JS_COMMAND_START, katom->kctx); +} + +/** + * kbasep_job_slot_update_head_start_timestamp - Update timestamp + * @kbdev: kbase device + * @js: job slot + * @end_timestamp: timestamp + * + * Update the start_timestamp of the job currently in the HEAD, based on the + * fact that we got an IRQ for the previous set of completed jobs. + * + * The estimate also takes into account the %KBASE_JS_IRQ_THROTTLE_TIME_US and + * the time the job was submitted, to work out the best estimate (which might + * still result in an over-estimate to the calculated time spent) + */ +static void kbasep_job_slot_update_head_start_timestamp( + struct kbase_device *kbdev, + int js, + ktime_t end_timestamp) +{ + if (kbase_backend_nr_atoms_on_slot(kbdev, js) > 0) { + struct kbase_jd_atom *katom; + ktime_t new_timestamp; + ktime_t timestamp_diff; + /* The atom in the HEAD */ + katom = kbase_gpu_inspect(kbdev, js, 0); + + KBASE_DEBUG_ASSERT(katom != NULL); + + /* Account for any IRQ Throttle time - makes an overestimate of + * the time spent by the job */ + new_timestamp = ktime_sub_ns(end_timestamp, + KBASE_JS_IRQ_THROTTLE_TIME_US * 1000); + timestamp_diff = ktime_sub(new_timestamp, + katom->start_timestamp); + if (ktime_to_ns(timestamp_diff) >= 0) { + /* Only update the timestamp if it's a better estimate + * than what's currently stored. This is because our + * estimate that accounts for the throttle time may be + * too much of an overestimate */ + katom->start_timestamp = new_timestamp; + } + } +} + +void kbase_job_done(struct kbase_device *kbdev, u32 done) +{ + unsigned long flags; + int i; + u32 count = 0; + ktime_t end_timestamp = ktime_get(); + struct kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + + KBASE_TRACE_ADD(kbdev, JM_IRQ, NULL, NULL, 0, done); + + memset(&kbdev->slot_submit_count_irq[0], 0, + sizeof(kbdev->slot_submit_count_irq)); + + /* write irq throttle register, this will prevent irqs from occurring + * until the given number of gpu clock cycles have passed */ + { + int irq_throttle_cycles = + atomic_read(&kbdev->irq_throttle_cycles); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), + irq_throttle_cycles, NULL); + } + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + while (done) { + u32 failed = done >> 16; + + /* treat failed slots as finished slots */ + u32 finished = (done & 0xFFFF) | failed; + + /* Note: This is inherently unfair, as we always check + * for lower numbered interrupts before the higher + * numbered ones.*/ + i = ffs(finished) - 1; + KBASE_DEBUG_ASSERT(i >= 0); + + do { + int nr_done; + u32 active; + u32 completion_code = BASE_JD_EVENT_DONE;/* assume OK */ + u64 job_tail = 0; + + if (failed & (1u << i)) { + /* read out the job slot status code if the job + * slot reported failure */ + completion_code = kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_STATUS), NULL); + + switch (completion_code) { + case BASE_JD_EVENT_STOPPED: +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_job_slots_event( + GATOR_MAKE_EVENT( + GATOR_JOB_SLOT_SOFT_STOPPED, i), + NULL, 0); +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_job_softstop(i); +#endif + /* Soft-stopped job - read the value of + * JS<n>_TAIL so that the job chain can + * be resumed */ + job_tail = (u64)kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_TAIL_LO), + NULL) | + ((u64)kbase_reg_read(kbdev, + JOB_SLOT_REG(i, JS_TAIL_HI), + NULL) << 32); + break; + case BASE_JD_EVENT_NOT_STARTED: + /* PRLAM-10673 can cause a TERMINATED + * job to come back as NOT_STARTED, but + * the error interrupt helps us detect + * it */ + completion_code = + BASE_JD_EVENT_TERMINATED; + /* fall through */ + default: + dev_warn(kbdev->dev, "error detected from slot %d, job status 0x%08x (%s)", + i, completion_code, + kbase_exception_name + (kbdev, + completion_code)); + } + + kbase_gpu_irq_evict(kbdev, i); + } + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), + done & ((1 << i) | (1 << (i + 16))), + NULL); + active = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_JS_STATE), + NULL); + + if (((active >> i) & 1) == 0 && + (((done >> (i + 16)) & 1) == 0)) { + /* There is a potential race we must work + * around: + * + * 1. A job slot has a job in both current and + * next registers + * 2. The job in current completes + * successfully, the IRQ handler reads + * RAWSTAT and calls this function with the + * relevant bit set in "done" + * 3. The job in the next registers becomes the + * current job on the GPU + * 4. Sometime before the JOB_IRQ_CLEAR line + * above the job on the GPU _fails_ + * 5. The IRQ_CLEAR clears the done bit but not + * the failed bit. This atomically sets + * JOB_IRQ_JS_STATE. However since both jobs + * have now completed the relevant bits for + * the slot are set to 0. + * + * If we now did nothing then we'd incorrectly + * assume that _both_ jobs had completed + * successfully (since we haven't yet observed + * the fail bit being set in RAWSTAT). + * + * So at this point if there are no active jobs + * left we check to see if RAWSTAT has a failure + * bit set for the job slot. If it does we know + * that there has been a new failure that we + * didn't previously know about, so we make sure + * that we record this in active (but we wait + * for the next loop to deal with it). + * + * If we were handling a job failure (i.e. done + * has the relevant high bit set) then we know + * that the value read back from + * JOB_IRQ_JS_STATE is the correct number of + * remaining jobs because the failed job will + * have prevented any futher jobs from starting + * execution. + */ + u32 rawstat = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); + + if ((rawstat >> (i + 16)) & 1) { + /* There is a failed job that we've + * missed - add it back to active */ + active |= (1u << i); + } + } + + dev_dbg(kbdev->dev, "Job ended with status 0x%08X\n", + completion_code); + + nr_done = kbase_backend_nr_atoms_submitted(kbdev, i); + nr_done -= (active >> i) & 1; + nr_done -= (active >> (i + 16)) & 1; + + if (nr_done <= 0) { + dev_warn(kbdev->dev, "Spurious interrupt on slot %d", + i); + + goto spurious; + } + + count += nr_done; + + while (nr_done) { + if (nr_done == 1) { + kbase_gpu_complete_hw(kbdev, i, + completion_code, + job_tail, + &end_timestamp); + kbase_jm_try_kick_all(kbdev); + } else { + /* More than one job has completed. + * Since this is not the last job being + * reported this time it must have + * passed. This is because the hardware + * will not allow further jobs in a job + * slot to complete until the failed job + * is cleared from the IRQ status. + */ + kbase_gpu_complete_hw(kbdev, i, + BASE_JD_EVENT_DONE, + 0, + &end_timestamp); + } + nr_done--; + } + spurious: + done = kbase_reg_read(kbdev, + JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10883)) { + /* Workaround for missing interrupt caused by + * PRLAM-10883 */ + if (((active >> i) & 1) && (0 == + kbase_reg_read(kbdev, + JOB_SLOT_REG(i, + JS_STATUS), NULL))) { + /* Force job slot to be processed again + */ + done |= (1u << i); + } + } + + failed = done >> 16; + finished = (done & 0xFFFF) | failed; + } while (finished & (1 << i)); + + kbasep_job_slot_update_head_start_timestamp(kbdev, i, + end_timestamp); + } + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); +#if KBASE_GPU_RESET_EN + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_COMMITTED) { + /* If we're trying to reset the GPU then we might be able to do + * it early (without waiting for a timeout) because some jobs + * have completed + */ + kbasep_try_reset_gpu_early(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + KBASE_TRACE_ADD(kbdev, JM_IRQ_END, NULL, NULL, 0, count); +} +KBASE_EXPORT_TEST_API(kbase_job_done); + +static bool kbasep_soft_stop_allowed(struct kbase_device *kbdev, + u16 core_reqs) +{ + bool soft_stops_allowed = true; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) { + if ((core_reqs & BASE_JD_REQ_T) != 0) + soft_stops_allowed = false; + } + return soft_stops_allowed; +} + +static bool kbasep_hard_stop_allowed(struct kbase_device *kbdev, + u16 core_reqs) +{ + bool hard_stops_allowed = true; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8394)) { + if ((core_reqs & BASE_JD_REQ_T) != 0) + hard_stops_allowed = false; + } + return hard_stops_allowed; +} + +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, + int js, + u32 action, + u16 core_reqs, + struct kbase_jd_atom *target_katom) +{ + struct kbase_context *kctx = target_katom->kctx; +#if KBASE_TRACE_ENABLE + u32 status_reg_before; + u64 job_in_head_before; + u32 status_reg_after; + + KBASE_DEBUG_ASSERT(!(action & (~JS_COMMAND_MASK))); + + /* Check the head pointer */ + job_in_head_before = ((u64) kbase_reg_read(kbdev, + JOB_SLOT_REG(js, JS_HEAD_LO), NULL)) + | (((u64) kbase_reg_read(kbdev, + JOB_SLOT_REG(js, JS_HEAD_HI), NULL)) + << 32); + status_reg_before = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), + NULL); +#endif + + if (action == JS_COMMAND_SOFT_STOP) { + bool soft_stop_allowed = kbasep_soft_stop_allowed(kbdev, + core_reqs); + + if (!soft_stop_allowed) { +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kbdev->dev, "Attempt made to soft-stop a job that cannot be soft-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); +#endif /* CONFIG_MALI_DEBUG */ + return; + } + + /* We are about to issue a soft stop, so mark the atom as having + * been soft stopped */ + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED; + } + + if (action == JS_COMMAND_HARD_STOP) { + bool hard_stop_allowed = kbasep_hard_stop_allowed(kbdev, + core_reqs); + + if (!hard_stop_allowed) { + /* Jobs can be hard-stopped for the following reasons: + * * CFS decides the job has been running too long (and + * soft-stop has not occurred). In this case the GPU + * will be reset by CFS if the job remains on the + * GPU. + * + * * The context is destroyed, kbase_jd_zap_context + * will attempt to hard-stop the job. However it also + * has a watchdog which will cause the GPU to be + * reset if the job remains on the GPU. + * + * * An (unhandled) MMU fault occurred. As long as + * BASE_HW_ISSUE_8245 is defined then the GPU will be + * reset. + * + * All three cases result in the GPU being reset if the + * hard-stop fails, so it is safe to just return and + * ignore the hard-stop request. + */ + dev_warn(kbdev->dev, "Attempt made to hard-stop a job that cannot be hard-stopped. core_reqs = 0x%X", + (unsigned int)core_reqs); + return; + } + target_katom->atom_flags |= KBASE_KATOM_FLAG_BEEN_HARD_STOPPED; + } + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316) && + action == JS_COMMAND_SOFT_STOP) { + int i; + + for (i = 0; i < kbase_backend_nr_atoms_submitted(kbdev, js); + i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + + KBASE_DEBUG_ASSERT(katom); + + /* For HW_ISSUE_8316, only 'bad' jobs attacking the + * system can cause this issue: normally, all memory + * should be allocated in multiples of 4 pages, and + * growable memory should be changed size in multiples + * of 4 pages. + * + * Whilst such 'bad' jobs can be cleared by a GPU reset, + * the locking up of a uTLB entry caused by the bad job + * could also stall other ASs, meaning that other ASs' + * jobs don't complete in the 'grace' period before the + * reset. We don't want to lose other ASs' jobs when + * they would normally complete fine, so we must 'poke' + * the MMU regularly to help other ASs complete */ + kbase_as_poking_timer_retain_atom(kbdev, katom->kctx, + katom); + } + } + + if (kbase_hw_has_feature(kbdev, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION)) { + if (action == JS_COMMAND_SOFT_STOP) + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_SOFT_STOP_1 : + JS_COMMAND_SOFT_STOP_0; + else + action = (target_katom->atom_flags & + KBASE_KATOM_FLAGS_JOBCHAIN) ? + JS_COMMAND_HARD_STOP_1 : + JS_COMMAND_HARD_STOP_0; + } + + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND), action, kctx); + +#if KBASE_TRACE_ENABLE + status_reg_after = kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_STATUS), + NULL); + if (status_reg_after == BASE_JD_EVENT_ACTIVE) { + struct kbase_jd_atom *head; + struct kbase_context *head_kctx; + + head = kbase_gpu_inspect(kbdev, js, 0); + head_kctx = head->kctx; + + if (status_reg_before == BASE_JD_EVENT_ACTIVE) + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, head_kctx, + head, job_in_head_before, js); + else + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, + 0, js); + + switch (action) { + case JS_COMMAND_SOFT_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, head_kctx, + head, head->jc, js); + break; + case JS_COMMAND_SOFT_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, head_kctx, + head, head->jc, js); + break; + case JS_COMMAND_SOFT_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, head_kctx, + head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, head_kctx, + head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, head_kctx, + head, head->jc, js); + break; + case JS_COMMAND_HARD_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, head_kctx, + head, head->jc, js); + break; + default: + BUG(); + break; + } + } else { + if (status_reg_before == BASE_JD_EVENT_ACTIVE) + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, + job_in_head_before, js); + else + KBASE_TRACE_ADD_SLOT(kbdev, JM_CHECK_HEAD, NULL, NULL, + 0, js); + + switch (action) { + case JS_COMMAND_SOFT_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP, NULL, NULL, 0, + js); + break; + case JS_COMMAND_SOFT_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_0, NULL, NULL, + 0, js); + break; + case JS_COMMAND_SOFT_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_SOFTSTOP_1, NULL, NULL, + 0, js); + break; + case JS_COMMAND_HARD_STOP: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP, NULL, NULL, 0, + js); + break; + case JS_COMMAND_HARD_STOP_0: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_0, NULL, NULL, + 0, js); + break; + case JS_COMMAND_HARD_STOP_1: + KBASE_TRACE_ADD_SLOT(kbdev, JM_HARDSTOP_1, NULL, NULL, + 0, js); + break; + default: + BUG(); + break; + } + } +#endif +} + +void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx) +{ + unsigned long flags; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + int i; + + KBASE_DEBUG_ASSERT(kctx != NULL); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + /* Cancel any remaining running jobs for this kctx */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* Invalidate all jobs in context, to prevent re-submitting */ + for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { + if (!work_pending(&kctx->jctx.atoms[i].work)) + kctx->jctx.atoms[i].event_code = + BASE_JD_EVENT_JOB_CANCELLED; + } + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_hardstop(kctx, i, NULL); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); +} + +void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, + struct kbase_jd_atom *target_katom) +{ + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + int js = target_katom->slot_nr; + int priority = target_katom->sched_priority; + int i; + + KBASE_DEBUG_ASSERT(kctx != NULL); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + for (i = 0; i < kbase_backend_nr_atoms_on_slot(kbdev, js); i++) { + struct kbase_jd_atom *katom; + + katom = kbase_gpu_inspect(kbdev, js, i); + if (!katom) + continue; + + if (katom->kctx != kctx) + continue; + + if (katom->sched_priority > priority) + kbase_job_slot_softstop(kbdev, js, katom); + } +} + +struct zap_reset_data { + /* The stages are: + * 1. The timer has never been called + * 2. The zap has timed out, all slots are soft-stopped - the GPU reset + * will happen. The GPU has been reset when + * kbdev->hwaccess.backend.reset_waitq is signalled + * + * (-1 - The timer has been cancelled) + */ + int stage; + struct kbase_device *kbdev; + struct hrtimer timer; + spinlock_t lock; /* protects updates to stage member */ +}; + +static enum hrtimer_restart zap_timeout_callback(struct hrtimer *timer) +{ + struct zap_reset_data *reset_data = container_of(timer, + struct zap_reset_data, timer); + struct kbase_device *kbdev = reset_data->kbdev; + unsigned long flags; + + spin_lock_irqsave(&reset_data->lock, flags); + + if (reset_data->stage == -1) + goto out; + +#if KBASE_GPU_RESET_EN + if (kbase_prepare_to_reset_gpu(kbdev)) { + dev_err(kbdev->dev, "Issueing GPU soft-reset because jobs failed to be killed (within %d ms) as part of context termination (e.g. process exit)\n", + ZAP_TIMEOUT); + kbase_reset_gpu(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + reset_data->stage = 2; + + out: + spin_unlock_irqrestore(&reset_data->lock, flags); + + return HRTIMER_NORESTART; +} + +void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct zap_reset_data reset_data; + unsigned long flags; + + hrtimer_init_on_stack(&reset_data.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + reset_data.timer.function = zap_timeout_callback; + + spin_lock_init(&reset_data.lock); + + reset_data.kbdev = kbdev; + reset_data.stage = 1; + + hrtimer_start(&reset_data.timer, HR_TIMER_DELAY_MSEC(ZAP_TIMEOUT), + HRTIMER_MODE_REL); + + /* Wait for all jobs to finish, and for the context to be not-scheduled + * (due to kbase_job_zap_context(), we also guarentee it's not in the JS + * policy queue either */ + wait_event(kctx->jctx.zero_jobs_wait, kctx->jctx.job_nr == 0); + wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, + kctx->jctx.sched_info.ctx.is_scheduled == false); + + spin_lock_irqsave(&reset_data.lock, flags); + if (reset_data.stage == 1) { + /* The timer hasn't run yet - so cancel it */ + reset_data.stage = -1; + } + spin_unlock_irqrestore(&reset_data.lock, flags); + + hrtimer_cancel(&reset_data.timer); + + if (reset_data.stage == 2) { + /* The reset has already started. + * Wait for the reset to complete + */ + wait_event(kbdev->hwaccess.backend.reset_wait, + atomic_read(&kbdev->hwaccess.backend.reset_gpu) + == KBASE_RESET_GPU_NOT_PENDING); + } + destroy_hrtimer_on_stack(&reset_data.timer); + + dev_dbg(kbdev->dev, "Zap: Finished Context %p", kctx); + + /* Ensure that the signallers of the waitqs have finished */ + mutex_lock(&kctx->jctx.lock); + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&kctx->jctx.lock); +} + +int kbase_job_slot_init(struct kbase_device *kbdev) +{ +#if KBASE_GPU_RESET_EN + kbdev->hwaccess.backend.reset_workq = alloc_workqueue( + "Mali reset workqueue", 0, 1); + if (NULL == kbdev->hwaccess.backend.reset_workq) + return -EINVAL; + + KBASE_DEBUG_ASSERT(0 == + object_is_on_stack(&kbdev->hwaccess.backend.reset_work)); + INIT_WORK(&kbdev->hwaccess.backend.reset_work, + kbasep_reset_timeout_worker); + + hrtimer_init(&kbdev->hwaccess.backend.reset_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbdev->hwaccess.backend.reset_timer.function = + kbasep_reset_timer_callback; +#endif + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_job_slot_init); + +void kbase_job_slot_halt(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbase_job_slot_term(struct kbase_device *kbdev) +{ +#if KBASE_GPU_RESET_EN + destroy_workqueue(kbdev->hwaccess.backend.reset_workq); +#endif +} +KBASE_EXPORT_TEST_API(kbase_job_slot_term); + +/** + * kbase_job_slot_softstop_swflags - Soft-stop a job with flags + * @kbdev: The kbase device + * @js: The job slot to soft-stop + * @target_katom: The job that should be soft-stopped (or NULL for any job) + * @sw_flags: Flags to pass in about the soft-stop + * + * Context: + * The job slot lock must be held when calling this function. + * The job slot must not already be in the process of being soft-stopped. + * + * Soft-stop the specified job slot, with extra information about the stop + * + * Where possible any job in the next register is evicted before the soft-stop. + */ +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom, u32 sw_flags) +{ + KBASE_DEBUG_ASSERT(!(sw_flags & JS_COMMAND_MASK)); + kbase_backend_soft_hard_stop_slot(kbdev, NULL, js, target_katom, + JS_COMMAND_SOFT_STOP | sw_flags); +} + +/** + * kbase_job_slot_softstop - Soft-stop the specified job slot + * @kbdev: The kbase device + * @js: The job slot to soft-stop + * @target_katom: The job that should be soft-stopped (or NULL for any job) + * Context: + * The job slot lock must be held when calling this function. + * The job slot must not already be in the process of being soft-stopped. + * + * Where possible any job in the next register is evicted before the soft-stop. + */ +void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom) +{ + kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u); +} + +/** + * kbase_job_slot_hardstop - Hard-stop the specified job slot + * @kctx: The kbase context that contains the job(s) that should + * be hard-stopped + * @js: The job slot to hard-stop + * @target_katom: The job that should be hard-stopped (or NULL for all + * jobs from the context) + * Context: + * The job slot lock must be held when calling this function. + */ +void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + + bool stopped = kbase_backend_soft_hard_stop_slot(kbdev, kctx, js, + target_katom, + JS_COMMAND_HARD_STOP); +#if KBASE_GPU_RESET_EN + if (stopped && (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_8401) || + kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_9510) || + (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_T76X_3542) && + (target_katom == NULL || target_katom->core_req & + BASE_JD_REQ_FS_AFBC)))) { + /* MIDBASE-2916 if a fragment job with AFBC encoding is + * hardstopped, ensure to do a soft reset also in order to + * clear the GPU status. + * Workaround for HW issue 8401 has an issue,so after + * hard-stopping just reset the GPU. This will ensure that the + * jobs leave the GPU.*/ + if (kbase_prepare_to_reset_gpu_locked(kbdev)) { + dev_err(kbdev->dev, "Issueing GPU soft-reset after hard stopping due to hardware issue"); + kbase_reset_gpu_locked(kbdev); + } + } +#endif +} + +/** + * kbase_job_check_enter_disjoint - potentiall enter disjoint mode + * @kbdev: kbase device + * @action: the event which has occurred + * @core_reqs: core requirements of the atom + * @target_katom: the atom which is being affected + * + * For a certain soft/hard-stop action, work out whether to enter disjoint + * state. + * + * This does not register multiple disjoint events if the atom has already + * started a disjoint period + * + * @core_reqs can be supplied as 0 if the atom had not started on the hardware + * (and so a 'real' soft/hard-stop was not required, but it still interrupted + * flow, perhaps on another context) + * + * kbase_job_check_leave_disjoint() should be used to end the disjoint + * state when the soft/hard-stop action is complete + */ +void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, + u16 core_reqs, struct kbase_jd_atom *target_katom) +{ + u32 hw_action = action & JS_COMMAND_MASK; + + /* For hard-stop, don't enter if hard-stop not allowed */ + if (hw_action == JS_COMMAND_HARD_STOP && + !kbasep_hard_stop_allowed(kbdev, core_reqs)) + return; + + /* For soft-stop, don't enter if soft-stop not allowed, or isn't + * causing disjoint */ + if (hw_action == JS_COMMAND_SOFT_STOP && + !(kbasep_soft_stop_allowed(kbdev, core_reqs) && + (action & JS_COMMAND_SW_CAUSES_DISJOINT))) + return; + + /* Nothing to do if already logged disjoint state on this atom */ + if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) + return; + + target_katom->atom_flags |= KBASE_KATOM_FLAG_IN_DISJOINT; + kbase_disjoint_state_up(kbdev); +} + +/** + * kbase_job_check_enter_disjoint - potentially leave disjoint state + * @kbdev: kbase device + * @target_katom: atom which is finishing + * + * Work out whether to leave disjoint state when finishing an atom that was + * originated by kbase_job_check_enter_disjoint(). + */ +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom) +{ + if (target_katom->atom_flags & KBASE_KATOM_FLAG_IN_DISJOINT) { + target_katom->atom_flags &= ~KBASE_KATOM_FLAG_IN_DISJOINT; + kbase_disjoint_state_down(kbdev); + } +} + + +#if KBASE_GPU_RESET_EN +static void kbase_debug_dump_registers(struct kbase_device *kbdev) +{ + int i; + + dev_err(kbdev->dev, "Register state:"); + dev_err(kbdev->dev, " GPU_IRQ_RAWSTAT=0x%08x GPU_STATUS=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS), NULL)); + dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x JOB_IRQ_JS_STATE=0x%08x JOB_IRQ_THROTTLE=0x%08x", + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT), NULL), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_JS_STATE), NULL), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_THROTTLE), NULL)); + for (i = 0; i < 3; i++) { + dev_err(kbdev->dev, " JS%d_STATUS=0x%08x JS%d_HEAD_LO=0x%08x", + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_STATUS), + NULL), + i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO), + NULL)); + } + dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS), NULL)); + dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), NULL), + kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), NULL), + kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL)); + dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1), NULL)); + dev_err(kbdev->dev, " SHADER_CONFIG=0x%08x L2_MMU_CONFIG=0x%08x", + kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), NULL), + kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), NULL)); +} + +static void kbasep_save_hwcnt_setup(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *hwcnt_setup) +{ + hwcnt_setup->dump_buffer = + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), kctx) & + 0xffffffff; + hwcnt_setup->dump_buffer |= (u64) + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), kctx) << + 32; + hwcnt_setup->jm_bm = + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), kctx); + hwcnt_setup->shader_bm = + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), kctx); + hwcnt_setup->tiler_bm = + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), kctx); + hwcnt_setup->mmu_l2_bm = + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), kctx); +} + +static void kbasep_reset_timeout_worker(struct work_struct *data) +{ + unsigned long flags; + struct kbase_device *kbdev; + int i; + ktime_t end_timestamp = ktime_get(); + struct kbasep_js_device_data *js_devdata; + struct kbase_uk_hwcnt_setup hwcnt_setup = { {0} }; + enum kbase_instr_state bckp_state; + bool try_schedule = false; + bool restore_hwc = false; + + u32 mmu_irq_mask; + + KBASE_DEBUG_ASSERT(data); + + kbdev = container_of(data, struct kbase_device, + hwaccess.backend.reset_work); + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + + KBASE_TRACE_ADD(kbdev, JM_BEGIN_RESET_WORKER, NULL, NULL, 0u, 0); + + /* Make sure the timer has completed - this cannot be done from + * interrupt context, so this cannot be done within + * kbasep_try_reset_gpu_early. */ + hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer); + + if (kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* This would re-activate the GPU. Since it's already idle, + * there's no need to reset it */ + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + kbase_disjoint_state_down(kbdev); + wake_up(&kbdev->hwaccess.backend.reset_wait); + return; + } + + mutex_lock(&kbdev->pm.lock); + /* We hold the pm lock, so there ought to be a current policy */ + KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); + + /* All slot have been soft-stopped and we've waited + * SOFT_STOP_RESET_TIMEOUT for the slots to clear, at this point we + * assume that anything that is still left on the GPU is stuck there and + * we'll kill it when we reset the GPU */ + + dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)", + RESET_TIMEOUT); + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + + if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_RESETTING) { + /* the same interrupt handler preempted itself */ + /* GPU is being reset */ + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + wait_event(kbdev->hwcnt.backend.wait, + kbdev->hwcnt.backend.triggered != 0); + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + } + /* Save the HW counters setup */ + if (kbdev->hwcnt.kctx != NULL) { + struct kbase_context *kctx = kbdev->hwcnt.kctx; + + if (kctx->jctx.sched_info.ctx.is_scheduled) { + kbasep_save_hwcnt_setup(kbdev, kctx, &hwcnt_setup); + + restore_hwc = true; + } + } + + /* Output the state of some interesting registers to help in the + * debugging of GPU resets */ + kbase_debug_dump_registers(kbdev); + + bckp_state = kbdev->hwcnt.backend.state; + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_RESETTING; + kbdev->hwcnt.backend.triggered = 0; + + mmu_irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); + /* Disable IRQ to avoid IRQ handlers to kick in after releasing the + * spinlock; this also clears any outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Ensure that any IRQ handlers have finished + * Must be done without any locks IRQ handlers will take */ + kbase_synchronize_irqs(kbdev); + + /* Reset the GPU */ + kbase_pm_init_hw(kbdev, 0); + kbase_pm_enable_interrupts_mmu_mask(kbdev, mmu_irq_mask); + /* IRQs were re-enabled by kbase_pm_init_hw, and GPU is still powered */ + + spin_lock_irqsave(&kbdev->hwcnt.lock, flags); + /* Restore the HW counters setup */ + if (restore_hwc) { + struct kbase_context *kctx = kbdev->hwcnt.kctx; + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | + PRFCNT_CONFIG_MODE_OFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_LO), + hwcnt_setup.dump_buffer & 0xFFFFFFFF, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_BASE_HI), + hwcnt_setup.dump_buffer >> 32, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN), + hwcnt_setup.jm_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN), + hwcnt_setup.shader_bm, kctx); + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_MMU_L2_EN), + hwcnt_setup.mmu_l2_bm, kctx); + + /* Due to PRLAM-8186 we need to disable the Tiler before we + * enable the HW counter dump. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + 0, kctx); + else + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + hwcnt_setup.tiler_bm, kctx); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_CONFIG), + (kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT) | + PRFCNT_CONFIG_MODE_MANUAL, kctx); + + /* If HW has PRLAM-8186 we can now re-enable the tiler HW + * counters dump */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8186)) + kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_TILER_EN), + hwcnt_setup.tiler_bm, kctx); + } + kbdev->hwcnt.backend.state = bckp_state; + switch (kbdev->hwcnt.backend.state) { + /* Cases for waking kbasep_cache_clean_worker worker */ + case KBASE_INSTR_STATE_CLEANED: + /* Cache-clean IRQ occurred, but we reset: + * Wakeup incase the waiter saw RESETTING */ + case KBASE_INSTR_STATE_REQUEST_CLEAN: + /* After a clean was requested, but before the regs were + * written: + * Wakeup incase the waiter saw RESETTING */ + wake_up(&kbdev->hwcnt.backend.cache_clean_wait); + break; + case KBASE_INSTR_STATE_CLEANING: + /* Either: + * 1) We've not got the Cache-clean IRQ yet: it was lost, or: + * 2) We got it whilst resetting: it was voluntarily lost + * + * So, move to the next state and wakeup: */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_CLEANED; + wake_up(&kbdev->hwcnt.backend.cache_clean_wait); + break; + + /* Cases for waking anyone else */ + case KBASE_INSTR_STATE_DUMPING: + /* If dumping, abort the dump, because we may've lost the IRQ */ + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE; + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + break; + case KBASE_INSTR_STATE_DISABLED: + case KBASE_INSTR_STATE_IDLE: + case KBASE_INSTR_STATE_FAULT: + /* Every other reason: wakeup in that state */ + kbdev->hwcnt.backend.triggered = 1; + wake_up(&kbdev->hwcnt.backend.wait); + break; + + /* Unhandled cases */ + case KBASE_INSTR_STATE_RESETTING: + default: + BUG(); + break; + } + spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags); + + /* Complete any jobs that were still on the GPU */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbase_backend_reset(kbdev, &end_timestamp); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + mutex_unlock(&kbdev->pm.lock); + + mutex_lock(&js_devdata->runpool_mutex); + + /* Reprogram the GPU's MMU */ + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbase_as *as = &kbdev->as[i]; + + mutex_lock(&as->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + if (js_devdata->runpool_irq.per_as_data[i].kctx) + kbase_mmu_update( + js_devdata->runpool_irq.per_as_data[i].kctx); + else + kbase_mmu_disable_as(kbdev, i); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&as->transaction_mutex); + } + + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING); + + kbase_disjoint_state_down(kbdev); + + wake_up(&kbdev->hwaccess.backend.reset_wait); + dev_err(kbdev->dev, "Reset complete"); + + if (js_devdata->nr_contexts_pullable > 0 && !kbdev->poweroff_pending) + try_schedule = true; + + mutex_unlock(&js_devdata->runpool_mutex); + + mutex_lock(&kbdev->pm.lock); + + /* Find out what cores are required now */ + kbase_pm_update_cores_state(kbdev); + + /* Synchronously request and wait for those cores, because if + * instrumentation is enabled it would need them immediately. */ + kbase_pm_check_transitions_sync(kbdev); + + mutex_unlock(&kbdev->pm.lock); + + /* Try submitting some jobs to restart processing */ + if (try_schedule) { + KBASE_TRACE_ADD(kbdev, JM_SUBMIT_AFTER_RESET, NULL, NULL, 0u, + 0); + kbase_js_sched_all(kbdev); + } + + kbase_pm_context_idle(kbdev); + KBASE_TRACE_ADD(kbdev, JM_END_RESET_WORKER, NULL, NULL, 0u, 0); +} + +static enum hrtimer_restart kbasep_reset_timer_callback(struct hrtimer *timer) +{ + struct kbase_device *kbdev = container_of(timer, struct kbase_device, + hwaccess.backend.reset_timer); + + KBASE_DEBUG_ASSERT(kbdev); + + /* Reset still pending? */ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) == + KBASE_RESET_GPU_COMMITTED) + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); + + return HRTIMER_NORESTART; +} + +/* + * If all jobs are evicted from the GPU then we can reset the GPU + * immediately instead of waiting for the timeout to elapse + */ + +static void kbasep_try_reset_gpu_early_locked(struct kbase_device *kbdev) +{ + int i; + int pending_jobs = 0; + + KBASE_DEBUG_ASSERT(kbdev); + + /* Count the number of jobs */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + pending_jobs += kbase_backend_nr_atoms_submitted(kbdev, i); + + if (pending_jobs > 0) { + /* There are still jobs on the GPU - wait */ + return; + } + + /* Check that the reset has been committed to (i.e. kbase_reset_gpu has + * been called), and that no other thread beat this thread to starting + * the reset */ + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED, KBASE_RESET_GPU_HAPPENING) != + KBASE_RESET_GPU_COMMITTED) { + /* Reset has already occurred */ + return; + } + + queue_work(kbdev->hwaccess.backend.reset_workq, + &kbdev->hwaccess.backend.reset_work); +} + +static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + + js_devdata = &kbdev->js_data; + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbasep_try_reset_gpu_early_locked(kbdev); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); +} + +/** + * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU + * @kbdev: kbase device + * + * This function just soft-stops all the slots to ensure that as many jobs as + * possible are saved. + * + * Return: + * The function returns a boolean which should be interpreted as follows: + * true - Prepared for reset, kbase_reset_gpu should be called. + * false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev) +{ + int i; + + KBASE_DEBUG_ASSERT(kbdev); + + if (atomic_cmpxchg(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_NOT_PENDING, + KBASE_RESET_GPU_PREPARED) != + KBASE_RESET_GPU_NOT_PENDING) { + /* Some other thread is already resetting the GPU */ + return false; + } + + kbase_disjoint_state_up(kbdev); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) + kbase_job_slot_softstop(kbdev, i, NULL); + + return true; +} + +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev) +{ + unsigned long flags; + bool ret; + struct kbasep_js_device_data *js_devdata; + + js_devdata = &kbdev->js_data; + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + ret = kbase_prepare_to_reset_gpu_locked(kbdev); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return ret; +} +KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu); + +/* + * This function should be called after kbase_prepare_to_reset_gpu if it + * returns true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu. + * + * After this function is called (or not called if kbase_prepare_to_reset_gpu + * returned false), the caller should wait for + * kbdev->hwaccess.backend.reset_waitq to be signalled to know when the reset + * has completed. + */ +void kbase_reset_gpu(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for + * a race to be occuring here */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED); + + dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); + + hrtimer_start(&kbdev->hwaccess.backend.reset_timer, + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), + HRTIMER_MODE_REL); + + /* Try resetting early */ + kbasep_try_reset_gpu_early(kbdev); +} +KBASE_EXPORT_TEST_API(kbase_reset_gpu); + +void kbase_reset_gpu_locked(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + /* Note this is an assert/atomic_set because it is a software issue for + * a race to be occuring here */ + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->hwaccess.backend.reset_gpu) == + KBASE_RESET_GPU_PREPARED); + atomic_set(&kbdev->hwaccess.backend.reset_gpu, + KBASE_RESET_GPU_COMMITTED); + + dev_err(kbdev->dev, "Preparing to soft-reset GPU: Waiting (upto %d ms) for all jobs to complete soft-stop\n", + kbdev->reset_timeout_ms); + hrtimer_start(&kbdev->hwaccess.backend.reset_timer, + HR_TIMER_DELAY_MSEC(kbdev->reset_timeout_ms), + HRTIMER_MODE_REL); + + /* Try resetting early */ + kbasep_try_reset_gpu_early_locked(kbdev); +} +#endif /* KBASE_GPU_RESET_EN */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h new file mode 100644 index 00000000000..3b959d5b447 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_internal.h @@ -0,0 +1,185 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Job Manager backend-specific low-level APIs. + */ + +#ifndef _KBASE_JM_HWACCESS_H_ +#define _KBASE_JM_HWACCESS_H_ + +#include <mali_kbase_hw.h> +#include <mali_kbase_debug.h> +#include <linux/atomic.h> + +#include <backend/gpu/mali_kbase_jm_rb.h> + +/** + * kbase_job_submit_nolock() - Submit a job to a certain job-slot + * @kbdev: Device pointer + * @katom: Atom to submit + * @js: Job slot to submit on + * + * The caller must check kbasep_jm_is_submit_slots_free() != false before + * calling this. + * + * The following locking conditions are made on the caller: + * - it must hold the kbasep_js_device_data::runpoool_irq::lock + */ +void kbase_job_submit_nolock(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, int js); + +/** + * kbase_job_done_slot() - Complete the head job on a particular job-slot + * @kbdev: Device pointer + * @s: Job slot + * @completion_code: Completion code of job reported by GPU + * @job_tail: Job tail address reported by GPU + * @end_timestamp: Timestamp of job completion + */ +void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, + u64 job_tail, ktime_t *end_timestamp); + +#ifdef CONFIG_GPU_TRACEPOINTS +static inline char *kbasep_make_job_slot_string(int js, char *js_string) +{ + sprintf(js_string, "job_slot_%i", js); + return js_string; +} +#endif + +/** + * kbase_job_hw_submit() - Submit a job to the GPU + * @kbdev: Device pointer + * @katom: Atom to submit + * @js: Job slot to submit on + * + * The caller must check kbasep_jm_is_submit_slots_free() != false before + * calling this. + * + * The following locking conditions are made on the caller: + * - it must hold the kbasep_js_device_data::runpoool_irq::lock + */ +void kbase_job_hw_submit(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js); + +/** + * kbasep_job_slot_soft_or_hard_stop_do_action() - Perform a soft or hard stop + * on the specified atom + * @kbdev: Device pointer + * @js: Job slot to stop on + * @action: The action to perform, either JSn_COMMAND_HARD_STOP or + * JSn_COMMAND_SOFT_STOP + * @core_reqs: Core requirements of atom to stop + * @target_katom: Atom to stop + * + * The following locking conditions are made on the caller: + * - it must hold the kbasep_js_device_data::runpool_irq::lock + */ +void kbasep_job_slot_soft_or_hard_stop_do_action(struct kbase_device *kbdev, + int js, + u32 action, + u16 core_reqs, + struct kbase_jd_atom *target_katom); + +/** + * kbase_backend_soft_hard_stop_slot() - Soft or hard stop jobs on a given job + * slot belonging to a given context. + * @kbdev: Device pointer + * @kctx: Context pointer. May be NULL + * @katom: Specific atom to stop. May be NULL + * @js: Job slot to hard stop + * @action: The action to perform, either JSn_COMMAND_HARD_STOP or + * JSn_COMMAND_SOFT_STOP + * + * If no context is provided then all jobs on the slot will be soft or hard + * stopped. + * + * If a katom is provided then only that specific atom will be stopped. In this + * case the kctx parameter is ignored. + * + * Jobs that are on the slot but are not yet on the GPU will be unpulled and + * returned to the job scheduler. + * + * Return: true if an atom was stopped, false otherwise + */ +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js, + struct kbase_jd_atom *katom, + u32 action); + +/** + * kbase_job_slot_init - Initialise job slot framework + * @kbdev: Device pointer + * + * Called on driver initialisation + * + * Return: 0 on success + */ +int kbase_job_slot_init(struct kbase_device *kbdev); + +/** + * kbase_job_slot_halt - Halt the job slot framework + * @kbdev: Device pointer + * + * Should prevent any further job slot processing + */ +void kbase_job_slot_halt(struct kbase_device *kbdev); + +/** + * kbase_job_slot_term - Terminate job slot framework + * @kbdev: Device pointer + * + * Called on driver termination + */ +void kbase_job_slot_term(struct kbase_device *kbdev); + +#if KBASE_GPU_RESET_EN +/** + * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. + * @kbdev: Device pointer + * + * This function just soft-stops all the slots to ensure that as many jobs as + * possible are saved. + * + * Return: a boolean which should be interpreted as follows: + * - true - Prepared for reset, kbase_reset_gpu should be called. + * - false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu_locked - Reset the GPU + * @kbdev: Device pointer + * + * This function should be called after kbase_prepare_to_reset_gpu if it + * returns true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu. + * + * After this function is called (or not called if kbase_prepare_to_reset_gpu + * returned false), the caller should wait for kbdev->reset_waitq to be + * signalled to know when the reset has completed. + */ +void kbase_reset_gpu_locked(struct kbase_device *kbdev); +#endif + +#endif /* _KBASE_JM_HWACCESS_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c new file mode 100644 index 00000000000..1bf9c709752 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.c @@ -0,0 +1,1407 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend specific APIs + */ + +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_jm.h> +#include <mali_kbase_jm.h> +#include <mali_kbase_js.h> +#include <mali_kbase_10969_workaround.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_jm_internal.h> +#include <backend/gpu/mali_kbase_js_affinity.h> +#include <backend/gpu/mali_kbase_js_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +/* Return whether the specified ringbuffer is empty. HW access lock must be + * held */ +#define SLOT_RB_EMPTY(rb) (rb->write_idx == rb->read_idx) +/* Return number of atoms currently in the specified ringbuffer. HW access lock + * must be held */ +#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx) + +static void kbase_gpu_release_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_gpu_enqueue_atom - Enqueue an atom in the HW access ringbuffer + * @kbdev: Device pointer + * @katom: Atom to enqueue + * + * Context: Caller must hold the HW access lock + */ +static void kbase_gpu_enqueue_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[katom->slot_nr]; + + WARN_ON(SLOT_RB_ENTRIES(rb) >= SLOT_RB_SIZE); + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + rb->entries[rb->write_idx & SLOT_RB_MASK].katom = katom; + rb->write_idx++; + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; +} + +/** + * kbase_gpu_dequeue_atom - Remove an atom from the HW access ringbuffer, once + * it has been completed + * @kbdev: Device pointer + * @js: Job slot to remove atom from + * + * Context: Caller must hold the HW access lock + * + * Return: Atom removed from ringbuffer + */ +static struct kbase_jd_atom *kbase_gpu_dequeue_atom(struct kbase_device *kbdev, + int js) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + struct kbase_jd_atom *katom; + + if (SLOT_RB_EMPTY(rb)) { + WARN(1, "GPU ringbuffer unexpectedly empty\n"); + return NULL; + } + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + katom = rb->entries[rb->read_idx & SLOT_RB_MASK].katom; + + kbase_gpu_release_atom(kbdev, katom); + + rb->read_idx++; + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB; + + kbase_js_debug_log_current_affinities(kbdev); + + return katom; +} + +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, + int idx) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if ((SLOT_RB_ENTRIES(rb) - 1) < idx) + return NULL; /* idx out of range */ + + return rb->entries[(rb->read_idx + idx) & SLOT_RB_MASK].katom; +} + +struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev, + int js) +{ + return kbase_gpu_inspect(kbdev, js, 0); +} + +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, + int js) +{ + struct slot_rb *rb = &kbdev->hwaccess.backend.slot_rb[js]; + + if (SLOT_RB_EMPTY(rb)) + return NULL; + + return rb->entries[(rb->write_idx - 1) & SLOT_RB_MASK].katom; +} + +/** + * kbase_gpu_atoms_submitted - Inspect whether a slot has any atoms currently + * on the GPU + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return: true if there are atoms on the GPU for slot js, + * false otherwise + */ +static bool kbase_gpu_atoms_submitted(struct kbase_device *kbdev, int js) +{ + int i; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (!katom) + return false; + if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED || + katom->gpu_rb_state == KBASE_ATOM_GPU_RB_READY) + return true; + } + + return false; +} + +/** + * kbase_gpu_atoms_submitted_any() - Inspect whether there are any atoms + * currently on the GPU + * @kbdev: Device pointer + * + * Return: true if there are any atoms on the GPU, false otherwise + */ +static bool kbase_gpu_atoms_submitted_any(struct kbase_device *kbdev) +{ + int js; + int i; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + return true; + } + } + return false; +} + +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && (katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED)) + nr++; + } + + return nr; +} + +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + if (kbase_gpu_inspect(kbdev, js, i)) + nr++; + } + + return nr; +} + +static int kbase_gpu_nr_atoms_on_slot_min(struct kbase_device *kbdev, int js, + enum kbase_atom_gpu_rb_state min_rb_state) +{ + int nr = 0; + int i; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + for (i = 0; i < SLOT_RB_SIZE; i++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, i); + + if (katom && (katom->gpu_rb_state >= min_rb_state)) + nr++; + } + + return nr; +} + +int kbase_backend_slot_free(struct kbase_device *kbdev, int js) +{ + if (atomic_read(&kbdev->hwaccess.backend.reset_gpu) != + KBASE_RESET_GPU_NOT_PENDING) { + /* The GPU is being reset - so prevent submission */ + return 0; + } + + return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); +} + + +static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +static bool kbasep_js_job_check_ref_cores(struct kbase_device *kbdev, + int js, + struct kbase_jd_atom *katom) +{ + /* The most recently checked affinity. Having this at this scope allows + * us to guarantee that we've checked the affinity in this function + * call. + */ + u64 recently_chosen_affinity = 0; + bool chosen_affinity = false; + bool retry; + + do { + retry = false; + + /* NOTE: The following uses a number of FALLTHROUGHs to optimize + * the calls to this function. Ending of the function is + * indicated by BREAK OUT */ + switch (katom->coreref_state) { + /* State when job is first attempted to be run */ + case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: + KBASE_DEBUG_ASSERT(katom->affinity == 0); + + /* Compute affinity */ + if (false == kbase_js_choose_affinity( + &recently_chosen_affinity, kbdev, katom, + js)) { + /* No cores are currently available */ + /* *** BREAK OUT: No state transition *** */ + break; + } + + chosen_affinity = true; + + /* Request the cores */ + kbase_pm_request_cores(kbdev, + katom->core_req & BASE_JD_REQ_T, + recently_chosen_affinity); + + katom->affinity = recently_chosen_affinity; + + /* Proceed to next state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: + { + enum kbase_pm_cores_ready cores_ready; + + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + + cores_ready = kbase_pm_register_inuse_cores( + kbdev, + katom->core_req & BASE_JD_REQ_T, + katom->affinity); + if (cores_ready == KBASE_NEW_AFFINITY) { + /* Affinity no longer valid - return to + * previous state */ + kbasep_js_job_check_deref_cores(kbdev, + katom); + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_REGISTER_INUSE_FAILED, + katom->kctx, katom, + katom->jc, js, + (u32) katom->affinity); + /* *** BREAK OUT: Return to previous + * state, retry *** */ + retry = true; + break; + } + if (cores_ready == KBASE_CORES_NOT_READY) { + /* Stay in this state and return, to + * retry at this state later */ + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_REGISTER_INUSE_FAILED, + katom->kctx, katom, + katom->jc, js, + (u32) katom->affinity); + /* *** BREAK OUT: No state transition + * *** */ + break; + } + /* Proceed to next state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; + } + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + + /* Optimize out choosing the affinity twice in the same + * function call */ + if (chosen_affinity == false) { + /* See if the affinity changed since a previous + * call. */ + if (false == kbase_js_choose_affinity( + &recently_chosen_affinity, + kbdev, katom, js)) { + /* No cores are currently available */ + kbasep_js_job_check_deref_cores(kbdev, + katom); + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_REQUEST_ON_RECHECK_FAILED, + katom->kctx, katom, + katom->jc, js, + (u32) recently_chosen_affinity); + /* *** BREAK OUT: Transition to lower + * state *** */ + break; + } + chosen_affinity = true; + } + + /* Now see if this requires a different set of cores */ + if (recently_chosen_affinity != katom->affinity) { + enum kbase_pm_cores_ready cores_ready; + + kbase_pm_request_cores(kbdev, + katom->core_req & BASE_JD_REQ_T, + recently_chosen_affinity); + + /* Register new cores whilst we still hold the + * old ones, to minimize power transitions */ + cores_ready = + kbase_pm_register_inuse_cores(kbdev, + katom->core_req & BASE_JD_REQ_T, + recently_chosen_affinity); + kbasep_js_job_check_deref_cores(kbdev, katom); + + /* Fixup the state that was reduced by + * deref_cores: */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; + katom->affinity = recently_chosen_affinity; + if (cores_ready == KBASE_NEW_AFFINITY) { + /* Affinity no longer valid - return to + * previous state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; + + kbasep_js_job_check_deref_cores(kbdev, + katom); + + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_REGISTER_INUSE_FAILED, + katom->kctx, katom, + katom->jc, js, + (u32) katom->affinity); + /* *** BREAK OUT: Return to previous + * state, retry *** */ + retry = true; + break; + } + /* Now might be waiting for powerup again, with + * a new affinity */ + if (cores_ready == KBASE_CORES_NOT_READY) { + /* Return to previous state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES; + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_REGISTER_ON_RECHECK_FAILED, + katom->kctx, katom, + katom->jc, js, + (u32) katom->affinity); + /* *** BREAK OUT: Transition to lower + * state *** */ + break; + } + } + /* Proceed to next state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + case KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS: + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + KBASE_DEBUG_ASSERT(katom->affinity == + recently_chosen_affinity); + + /* Note: this is where the caller must've taken the + * runpool_irq.lock */ + + /* Check for affinity violations - if there are any, + * then we just ask the caller to requeue and try again + * later */ + if (kbase_js_affinity_would_violate(kbdev, js, + katom->affinity) != false) { + /* Return to previous state */ + katom->coreref_state = + KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY; + /* *** BREAK OUT: Transition to lower state *** + */ + KBASE_TRACE_ADD_SLOT_INFO(kbdev, + JS_CORE_REF_AFFINITY_WOULD_VIOLATE, + katom->kctx, katom, katom->jc, js, + (u32) katom->affinity); + break; + } + + /* No affinity violations would result, so the cores are + * ready */ + katom->coreref_state = KBASE_ATOM_COREREF_STATE_READY; + /* *** BREAK OUT: Cores Ready *** */ + break; + + default: + KBASE_DEBUG_ASSERT_MSG(false, + "Unhandled kbase_atom_coreref_state %d", + katom->coreref_state); + break; + } + } while (retry != false); + + return (katom->coreref_state == KBASE_ATOM_COREREF_STATE_READY); +} + +static void kbasep_js_job_check_deref_cores(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + + switch (katom->coreref_state) { + case KBASE_ATOM_COREREF_STATE_READY: + /* State where atom was submitted to the HW - just proceed to + * power-down */ + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + + /* *** FALLTHROUGH *** */ + + case KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY: + /* State where cores were registered */ + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + kbase_pm_release_cores(kbdev, katom->core_req & BASE_JD_REQ_T, + katom->affinity); + + break; + + case KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES: + /* State where cores were requested, but not registered */ + KBASE_DEBUG_ASSERT(katom->affinity != 0 || + (katom->core_req & BASE_JD_REQ_T)); + kbase_pm_unrequest_cores(kbdev, katom->core_req & BASE_JD_REQ_T, + katom->affinity); + break; + + case KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED: + /* Initial state - nothing required */ + KBASE_DEBUG_ASSERT(katom->affinity == 0); + break; + + default: + KBASE_DEBUG_ASSERT_MSG(false, + "Unhandled coreref_state: %d", + katom->coreref_state); + break; + } + + katom->affinity = 0; + katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; +} + + +static void kbase_gpu_release_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + switch (katom->gpu_rb_state) { + case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: + /* Should be impossible */ + WARN(1, "Attempting to release atom not in ringbuffer\n"); + break; + + case KBASE_ATOM_GPU_RB_SUBMITTED: + /* Inform power management at start/finish of atom + * so it can update its GPU utilisation metrics. */ + kbase_pm_metrics_release_atom(kbdev, katom); + + if (katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter(kbdev); + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_READY: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: + kbase_js_affinity_release_slot_cores(kbdev, katom->slot_nr, + katom->affinity); + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + kbasep_js_job_check_deref_cores(kbdev, katom); + break; + + case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: + /* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */ + + case KBASE_ATOM_GPU_RB_RETURN_TO_JS: + break; + } + + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_WAITING_BLOCKED; +} + +static void kbase_gpu_mark_atom_for_return(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + kbase_gpu_release_atom(kbdev, katom); + katom->gpu_rb_state = KBASE_ATOM_GPU_RB_RETURN_TO_JS; +} + +static inline bool kbase_gpu_rmu_workaround(struct kbase_device *kbdev, int js) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + bool slot_busy[3]; + + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) + return true; + slot_busy[0] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 0, + KBASE_ATOM_GPU_RB_WAITING_AFFINITY); + slot_busy[1] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 1, + KBASE_ATOM_GPU_RB_WAITING_AFFINITY); + slot_busy[2] = kbase_gpu_nr_atoms_on_slot_min(kbdev, 2, + KBASE_ATOM_GPU_RB_WAITING_AFFINITY); + + if ((js == 2 && !(slot_busy[0] || slot_busy[1])) || + (js != 2 && !slot_busy[2])) + return true; + + /* Don't submit slot 2 atom while GPU has jobs on slots 0/1 */ + if (js == 2 && (kbase_gpu_atoms_submitted(kbdev, 0) || + kbase_gpu_atoms_submitted(kbdev, 1) || + backend->rmu_workaround_flag)) + return false; + + /* Don't submit slot 0/1 atom while GPU has jobs on slot 2 */ + if (js != 2 && (kbase_gpu_atoms_submitted(kbdev, 2) || + !backend->rmu_workaround_flag)) + return false; + + backend->rmu_workaround_flag = !backend->rmu_workaround_flag; + + return true; +} + +static bool kbase_gpu_in_secure_mode(struct kbase_device *kbdev) +{ + return kbdev->js_data.runpool_irq.secure_mode; +} + +static int kbase_gpu_secure_mode_enable(struct kbase_device *kbdev) +{ + int err = -EINVAL; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + WARN_ONCE(!kbdev->secure_ops, + "Cannot enable secure mode: secure callbacks not specified.\n"); + + if (kbdev->secure_ops) { + /* Switch GPU to secure mode */ + err = kbdev->secure_ops->secure_mode_enable(kbdev); + + if (err) + dev_warn(kbdev->dev, "Failed to enable secure mode: %d\n", err); + else + kbdev->js_data.runpool_irq.secure_mode = true; + } + + return err; +} + +static int kbase_gpu_secure_mode_disable(struct kbase_device *kbdev) +{ + int err = -EINVAL; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + WARN_ONCE(!kbdev->secure_ops, + "Cannot disable secure mode: secure callbacks not specified.\n"); + + if (kbdev->secure_ops) { + /* Switch GPU to non-secure mode */ + err = kbdev->secure_ops->secure_mode_disable(kbdev); + + if (err) + dev_warn(kbdev->dev, "Failed to disable secure mode: %d\n", err); + else + kbdev->js_data.runpool_irq.secure_mode = false; + } + + return err; +} + +void kbase_gpu_slot_update(struct kbase_device *kbdev) +{ + int js; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + struct kbase_jd_atom *katom[2]; + int idx; + + katom[0] = kbase_gpu_inspect(kbdev, js, 0); + katom[1] = kbase_gpu_inspect(kbdev, js, 1); + WARN_ON(katom[1] && !katom[0]); + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + bool cores_ready; + + if (!katom[idx]) + continue; + + switch (katom[idx]->gpu_rb_state) { + case KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB: + /* Should be impossible */ + WARN(1, "Attempting to update atom not in ringbuffer\n"); + break; + + case KBASE_ATOM_GPU_RB_WAITING_BLOCKED: + if (katom[idx]->atom_flags & + KBASE_KATOM_FLAG_X_DEP_BLOCKED) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE: + cores_ready = + kbasep_js_job_check_ref_cores(kbdev, js, + katom[idx]); + + if (katom[idx]->event_code == + BASE_JD_EVENT_PM_EVENT) { + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_RETURN_TO_JS; + break; + } + + if (!cores_ready) + break; + + kbase_js_affinity_retain_slot_cores(kbdev, js, + katom[idx]->affinity); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_AFFINITY; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_AFFINITY: + if (!kbase_gpu_rmu_workaround(kbdev, js)) + break; + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE: + if (kbase_gpu_in_secure_mode(kbdev) != kbase_jd_katom_is_secure(katom[idx])) { + int err = 0; + + /* Not in correct mode, take action */ + if (kbase_gpu_atoms_submitted_any(kbdev)) { + /* + * We are not in the correct + * GPU mode for this job, and + * we can't switch now because + * there are jobs already + * running. + */ + break; + } + + /* No jobs running, so we can switch GPU mode right now */ + if (kbase_jd_katom_is_secure(katom[idx])) { + err = kbase_gpu_secure_mode_enable(kbdev); + } else { + err = kbase_gpu_secure_mode_disable(kbdev); + } + + if (err) { + /* Failed to switch secure mode, fail atom */ + katom[idx]->event_code = BASE_JD_EVENT_JOB_INVALID; + kbase_gpu_mark_atom_for_return(kbdev, katom[idx]); + break; + } + } + + /* Secure mode sanity checks */ + KBASE_DEBUG_ASSERT_MSG( + kbase_jd_katom_is_secure(katom[idx]) == kbase_gpu_in_secure_mode(kbdev), + "Secure mode of atom (%d) doesn't match secure mode of GPU (%d)", + kbase_jd_katom_is_secure(katom[idx]), kbase_gpu_in_secure_mode(kbdev)); + KBASE_DEBUG_ASSERT_MSG( + (kbase_jd_katom_is_secure(katom[idx]) && js == 0) || + !kbase_jd_katom_is_secure(katom[idx]), + "Secure atom on JS%d not supported", js); + + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_READY; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_READY: + /* Only submit if head atom or previous atom + * already submitted */ + if (idx == 1 && + (katom[0]->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED && + katom[0]->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) + break; + + /* Check if this job needs the cycle counter + * enabled before submission */ + if (katom[idx]->core_req & BASE_JD_REQ_PERMON) + kbase_pm_request_gpu_cycle_counter_l2_is_on( + kbdev); + + /* Inform power management at start/finish of + * atom so it can update its GPU utilisation + * metrics. */ + kbase_pm_metrics_run_atom(kbdev, katom[idx]); + + kbase_job_hw_submit(kbdev, katom[idx], js); + katom[idx]->gpu_rb_state = + KBASE_ATOM_GPU_RB_SUBMITTED; + + /* ***FALLTHROUGH: TRANSITION TO HIGHER STATE*** */ + + case KBASE_ATOM_GPU_RB_SUBMITTED: + /* Atom submitted to HW, nothing else to do */ + break; + + case KBASE_ATOM_GPU_RB_RETURN_TO_JS: + /* Only return if head atom or previous atom + * already removed - as atoms must be returned + * in order */ + if (idx == 0 || katom[0]->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + kbase_gpu_dequeue_atom(kbdev, js); + kbase_jm_return_atom_to_js(kbdev, + katom[idx]); + } + break; + } + } + } + + /* Warn if PRLAM-8987 affinity restrictions are violated */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) + WARN_ON((kbase_gpu_atoms_submitted(kbdev, 0) || + kbase_gpu_atoms_submitted(kbdev, 1)) && + kbase_gpu_atoms_submitted(kbdev, 2)); +} + + +void kbase_backend_run_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + kbase_gpu_enqueue_atom(kbdev, katom); + kbase_gpu_slot_update(kbdev); +} + +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js) +{ + struct kbase_jd_atom *katom; + struct kbase_jd_atom *next_katom; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + katom = kbase_gpu_inspect(kbdev, js, 0); + next_katom = kbase_gpu_inspect(kbdev, js, 1); + + if (next_katom && katom->kctx == next_katom->kctx && + next_katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED && + (kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_LO), NULL) + != 0 || + kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI), NULL) + != 0)) { + kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), + JS_COMMAND_NOP, NULL); + next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; + return true; + } + + return false; +} + +void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + u32 completion_code, + u64 job_tail, + ktime_t *end_timestamp) +{ + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); + struct kbase_context *kctx = katom->kctx; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6787) && + completion_code != BASE_JD_EVENT_DONE && + !(completion_code & BASE_JD_SW_EVENT)) { + katom->need_cache_flush_cores_retained = katom->affinity; + kbase_pm_request_cores(kbdev, false, katom->affinity); + } else if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10676)) { + if (kbdev->gpu_props.num_core_groups > 1 && + !(katom->affinity & + kbdev->gpu_props.props.coherency_info.group[0].core_mask + ) && + (katom->affinity & + kbdev->gpu_props.props.coherency_info.group[1].core_mask + )) { + dev_info(kbdev->dev, "JD: Flushing cache due to PRLAM-10676\n"); + katom->need_cache_flush_cores_retained = + katom->affinity; + kbase_pm_request_cores(kbdev, false, + katom->affinity); + } + } + + katom = kbase_gpu_dequeue_atom(kbdev, js); + + kbase_timeline_job_slot_done(kbdev, katom->kctx, katom, js, 0); + + if (completion_code == BASE_JD_EVENT_STOPPED) { + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, + 0); + + /* + * Dequeue next atom from ringbuffers on same slot if required. + * This atom will already have been removed from the NEXT + * registers by kbase_gpu_soft_hard_stop_slot(), to ensure that + * the atoms on this slot are returned in the correct order. + */ + if (next_katom && katom->kctx == next_katom->kctx) { + kbase_gpu_dequeue_atom(kbdev, js); + kbase_jm_return_atom_to_js(kbdev, next_katom); + } + } else if (completion_code != BASE_JD_EVENT_DONE) { + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + int i; + +#if KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR != 0 + KBASE_TRACE_DUMP(kbdev); +#endif + kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); + + /* + * Remove all atoms on the same context from ringbuffers. This + * will not remove atoms that are already on the GPU, as these + * are guaranteed not to have fail dependencies on the failed + * atom. + */ + for (i = 0; i < kbdev->gpu_props.num_job_slots; i++) { + struct kbase_jd_atom *katom_idx0 = + kbase_gpu_inspect(kbdev, i, 0); + struct kbase_jd_atom *katom_idx1 = + kbase_gpu_inspect(kbdev, i, 1); + + if (katom_idx0 && katom_idx0->kctx == katom->kctx && + katom_idx0->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Dequeue katom_idx0 from ringbuffer */ + kbase_gpu_dequeue_atom(kbdev, i); + + if (katom_idx1 && + katom_idx1->kctx == katom->kctx && + katom_idx0->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Dequeue katom_idx1 from ringbuffer */ + kbase_gpu_dequeue_atom(kbdev, i); + + katom_idx1->event_code = + BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, + katom_idx1); + } + katom_idx0->event_code = BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, katom_idx0); + + } else if (katom_idx1 && + katom_idx1->kctx == katom->kctx && + katom_idx1->gpu_rb_state != + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Can not dequeue this atom yet - will be + * dequeued when atom at idx0 completes */ + katom_idx1->event_code = BASE_JD_EVENT_STOPPED; + kbase_gpu_mark_atom_for_return(kbdev, + katom_idx1); + } + } + } + + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JM_JOB_DONE, kctx, katom, katom->jc, + js, completion_code); + + if (job_tail != 0 && job_tail != katom->jc) { + bool was_updated = (job_tail != katom->jc); + + /* Some of the job has been executed, so we update the job chain + * address to where we should resume from */ + katom->jc = job_tail; + if (was_updated) + KBASE_TRACE_ADD_SLOT(kbdev, JM_UPDATE_HEAD, katom->kctx, + katom, job_tail, js); + } + + /* Only update the event code for jobs that weren't cancelled */ + if (katom->event_code != BASE_JD_EVENT_JOB_CANCELLED) + katom->event_code = (base_jd_event_code)completion_code; + + kbase_device_trace_register_access(kctx, REG_WRITE, + JOB_CONTROL_REG(JOB_IRQ_CLEAR), + 1 << js); + + /* Complete the job, and start new ones + * + * Also defer remaining work onto the workqueue: + * - Re-queue Soft-stopped jobs + * - For any other jobs, queue the job back into the dependency system + * - Schedule out the parent context if necessary, and schedule a new + * one in. + */ +#ifdef CONFIG_GPU_TRACEPOINTS + { + /* The atom in the HEAD */ + struct kbase_jd_atom *next_katom = kbase_gpu_inspect(kbdev, js, + 0); + + if (next_katom && next_katom->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + char js_string[16]; + + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, + js_string), + ktime_to_ns(*end_timestamp), + (u32)next_katom->kctx, 0, + next_katom->work_id); + kbdev->hwaccess.backend.slot_rb[js].last_context = + next_katom->kctx; + } else { + char js_string[16]; + + trace_gpu_sched_switch(kbasep_make_job_slot_string(js, + js_string), + ktime_to_ns(ktime_get()), 0, 0, + 0); + kbdev->hwaccess.backend.slot_rb[js].last_context = 0; + } + } +#endif + + if (completion_code == BASE_JD_EVENT_STOPPED) + kbase_jm_return_atom_to_js(kbdev, katom); + else + kbase_jm_complete(kbdev, katom, end_timestamp); + + /* Job completion may have unblocked other atoms. Try to update all job + * slots */ + kbase_gpu_slot_update(kbdev); +} + +void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp) +{ + int js; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int idx; + + for (idx = 0; idx < 2; idx++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, 0); + + if (katom) { + enum kbase_atom_gpu_rb_state gpu_rb_state = + katom->gpu_rb_state; + + kbase_gpu_release_atom(kbdev, katom); + kbase_gpu_dequeue_atom(kbdev, js); + + if (gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + katom->event_code = + BASE_JD_EVENT_JOB_CANCELLED; + kbase_jm_complete(kbdev, katom, + end_timestamp); + } else { + katom->event_code = + BASE_JD_EVENT_STOPPED; + kbase_jm_return_atom_to_js(kbdev, + katom); + } + } + } + } +} + +static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, + int js, + struct kbase_jd_atom *katom, + u32 action) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + u32 hw_action = action & JS_COMMAND_MASK; + + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, katom); + kbasep_job_slot_soft_or_hard_stop_do_action(kbdev, js, hw_action, + katom->core_req, katom); + kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); +} + +static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + u32 action, + bool disjoint) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_gpu_mark_atom_for_return(kbdev, katom); + kbasep_js_clear_submit_allowed(js_devdata, katom->kctx); + + if (disjoint) + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, + katom); +} + +static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) +{ + if (katom->x_post_dep) { + struct kbase_jd_atom *dep_atom = katom->x_post_dep; + + if (dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB && + dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_RETURN_TO_JS) + return dep_atom->slot_nr; + } + return -1; +} + +bool kbase_backend_soft_hard_stop_slot(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js, + struct kbase_jd_atom *katom, + u32 action) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + struct kbase_jd_atom *katom_idx0; + struct kbase_jd_atom *katom_idx1; + + bool katom_idx0_valid, katom_idx1_valid; + + bool ret = false; + + int stop_x_dep_idx0 = -1, stop_x_dep_idx1 = -1; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + katom_idx0 = kbase_gpu_inspect(kbdev, js, 0); + katom_idx1 = kbase_gpu_inspect(kbdev, js, 1); + + if (katom) { + katom_idx0_valid = (katom_idx0 == katom); + /* If idx0 is to be removed and idx1 is on the same context, + * then idx1 must also be removed otherwise the atoms might be + * returned out of order */ + if (katom_idx1) + katom_idx1_valid = (katom_idx1 == katom) || + (katom_idx0_valid && + (katom_idx0->kctx == + katom_idx1->kctx)); + else + katom_idx1_valid = false; + } else { + katom_idx0_valid = (katom_idx0 && + (!kctx || katom_idx0->kctx == kctx)); + katom_idx1_valid = (katom_idx1 && + (!kctx || katom_idx1->kctx == kctx)); + } + + if (katom_idx0_valid) + stop_x_dep_idx0 = should_stop_x_dep_slot(katom_idx0); + if (katom_idx1_valid) + stop_x_dep_idx1 = should_stop_x_dep_slot(katom_idx1); + + if (katom_idx0_valid) { + if (katom_idx0->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Simple case - just dequeue and return */ + kbase_gpu_dequeue_atom(kbdev, js); + if (katom_idx1_valid) { + kbase_gpu_dequeue_atom(kbdev, js); + katom_idx1->event_code = + BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_jm_return_atom_to_js(kbdev, katom_idx1); + kbasep_js_clear_submit_allowed(js_devdata, + katom_idx1->kctx); + } + + katom_idx0->event_code = + BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_jm_return_atom_to_js(kbdev, katom_idx0); + kbasep_js_clear_submit_allowed(js_devdata, + katom_idx0->kctx); + } else { + /* katom_idx0 is on GPU */ + if (katom_idx1 && katom_idx1->gpu_rb_state == + KBASE_ATOM_GPU_RB_SUBMITTED) { + /* katom_idx0 and katom_idx1 are on GPU */ + + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT), NULL) == 0) { + /* idx0 has already completed - stop + * idx1 if needed*/ + if (katom_idx1_valid) { + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } else { + /* idx1 is in NEXT registers - attempt + * to remove */ + kbase_reg_write(kbdev, + JOB_SLOT_REG(js, + JS_COMMAND_NEXT), + JS_COMMAND_NOP, NULL); + + if (kbase_reg_read(kbdev, + JOB_SLOT_REG(js, + JS_HEAD_NEXT_LO), NULL) + != 0 || + kbase_reg_read(kbdev, + JOB_SLOT_REG(js, + JS_HEAD_NEXT_HI), NULL) + != 0) { + /* idx1 removed successfully, + * will be handled in IRQ */ + kbase_gpu_remove_atom(kbdev, + katom_idx1, + action, true); + stop_x_dep_idx1 = + should_stop_x_dep_slot(katom_idx1); + + /* stop idx0 if still on GPU */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx0, + action); + ret = true; + } else if (katom_idx1_valid) { + /* idx0 has already completed, + * stop idx1 if needed */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } + } else if (katom_idx1_valid) { + /* idx1 not on GPU but must be dequeued*/ + + /* idx1 will be handled in IRQ */ + kbase_gpu_remove_atom(kbdev, katom_idx1, action, + false); + /* stop idx0 */ + /* This will be repeated for anything removed + * from the next registers, since their normal + * flow was also interrupted, and this function + * might not enter disjoint state e.g. if we + * don't actually do a hard stop on the head + * atom */ + kbase_gpu_stop_atom(kbdev, js, katom_idx0, + action); + ret = true; + } else { + /* no atom in idx1 */ + /* just stop idx0 */ + kbase_gpu_stop_atom(kbdev, js, katom_idx0, + action); + ret = true; + } + } + } else if (katom_idx1_valid) { + if (katom_idx1->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) { + /* Mark for return */ + /* idx1 will be returned once idx0 completes */ + kbase_gpu_remove_atom(kbdev, katom_idx1, action, + false); + } else { + /* idx1 is on GPU */ + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT), NULL) == 0) { + /* idx0 has already completed - stop idx1 */ + kbase_gpu_stop_atom(kbdev, js, katom_idx1, + action); + ret = true; + } else { + /* idx1 is in NEXT registers - attempt to + * remove */ + kbase_reg_write(kbdev, JOB_SLOT_REG(js, + JS_COMMAND_NEXT), + JS_COMMAND_NOP, NULL); + + if (kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_HEAD_NEXT_LO), NULL) != 0 || + kbase_reg_read(kbdev, JOB_SLOT_REG(js, + JS_HEAD_NEXT_HI), NULL) != 0) { + /* idx1 removed successfully, will be + * handled in IRQ once idx0 completes */ + kbase_gpu_remove_atom(kbdev, katom_idx1, + action, + false); + } else { + /* idx0 has already completed - stop + * idx1 */ + kbase_gpu_stop_atom(kbdev, js, + katom_idx1, + action); + ret = true; + } + } + } + } + + + if (stop_x_dep_idx0 != -1) + kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx0, + NULL, action); + + if (stop_x_dep_idx1 != -1) + kbase_backend_soft_hard_stop_slot(kbdev, kctx, stop_x_dep_idx1, + NULL, action); + + return ret; +} + +static void kbasep_gpu_cacheclean(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + /* Limit the number of loops to avoid a hang if the interrupt is missed + */ + u32 max_loops = KBASE_CLEAN_CACHE_MAX_LOOPS; + + mutex_lock(&kbdev->cacheclean_lock); + + /* use GPU_COMMAND completion solution */ + /* clean & invalidate the caches */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CLEAN_INV_CACHES, NULL); + + /* wait for cache flush to complete before continuing */ + while (--max_loops && + (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & + CLEAN_CACHES_COMPLETED) == 0) + ; + + /* clear the CLEAN_CACHES_COMPLETED irq */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, NULL, 0u, + CLEAN_CACHES_COMPLETED); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), + CLEAN_CACHES_COMPLETED, NULL); + KBASE_DEBUG_ASSERT_MSG(kbdev->hwcnt.backend.state != + KBASE_INSTR_STATE_CLEANING, + "Instrumentation code was cleaning caches, but Job Management code cleared their IRQ - Instrumentation code will now hang."); + + mutex_unlock(&kbdev->cacheclean_lock); + + kbase_pm_unrequest_cores(kbdev, false, + katom->need_cache_flush_cores_retained); +} + +void kbase_backend_complete_wq(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + /* + * If cache flush required due to HW workaround then perform the flush + * now + */ + if (katom->need_cache_flush_cores_retained) { + kbasep_gpu_cacheclean(kbdev, katom); + katom->need_cache_flush_cores_retained = 0; + } + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10969) && + (katom->core_req & BASE_JD_REQ_FS) && + katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT && + (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED) && + !(katom->atom_flags & KBASE_KATOM_FLAGS_RERUN)) { + dev_dbg(kbdev->dev, "Soft-stopped fragment shader job got a TILE_RANGE_FAULT. Possible HW issue, trying SW workaround\n"); + if (kbasep_10969_workaround_clamp_coordinates(katom)) { + /* The job had a TILE_RANGE_FAULT after was soft-stopped + * Due to an HW issue we try to execute the job again. + */ + dev_dbg(kbdev->dev, + "Clamping has been executed, try to rerun the job\n" + ); + katom->event_code = BASE_JD_EVENT_STOPPED; + katom->atom_flags |= KBASE_KATOM_FLAGS_RERUN; + } + } +} + +void kbase_gpu_dump_slots(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata; + unsigned long flags; + int js; + + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n"); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + int idx; + + for (idx = 0; idx < SLOT_RB_SIZE; idx++) { + struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, + js, + idx); + + if (katom) + dev_info(kbdev->dev, + " js%d idx%d : katom=%p gpu_rb_state=%d\n", + js, idx, katom, katom->gpu_rb_state); + else + dev_info(kbdev->dev, " js%d idx%d : empty\n", + js, idx); + } + } + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); +} + + + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h new file mode 100644 index 00000000000..13d600c47b1 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_jm_rb.h @@ -0,0 +1,83 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/** + * @file mali_kbase_hwaccess_gpu.h + * Register-based HW access backend specific APIs + */ + +#ifndef _KBASE_HWACCESS_GPU_H_ +#define _KBASE_HWACCESS_GPU_H_ + +#include <backend/gpu/mali_kbase_pm_internal.h> + +/** + * Evict the atom in the NEXT slot for the specified job slot. This function is + * called from the job complete IRQ handler when the previous job has failed. + * + * @param[in] kbdev Device pointer + * @param[in] js Job slot to evict from + * @return true if job evicted from NEXT registers + * false otherwise + */ +bool kbase_gpu_irq_evict(struct kbase_device *kbdev, int js); + +/** + * Complete an atom on job slot js + * + * @param[in] kbdev Device pointer + * @param[in] js Job slot that has completed + * @param[in] event_code Event code from job that has completed + * @param[in] end_timestamp Time of completion + */ +void kbase_gpu_complete_hw(struct kbase_device *kbdev, int js, + u32 completion_code, + u64 job_tail, + ktime_t *end_timestamp); + +/** + * Inspect the contents of the HW access ringbuffer + * + * @param[in] kbdev Device pointer + * @param[in] js Job slot to inspect + * @param[in] idx Index into ringbuffer. 0 is the job currently running on + * the slot, 1 is the job waiting, all other values are + * invalid. + * @return The atom at that position in the ringbuffer + * NULL if no atom present + */ +struct kbase_jd_atom *kbase_gpu_inspect(struct kbase_device *kbdev, int js, + int idx); + +/** + * Inspect the jobs in the slot ringbuffers and update state. + * + * This will cause jobs to be submitted to hardware if they are unblocked + * + * @param[in] kbdev Device pointer + */ +void kbase_gpu_slot_update(struct kbase_device *kbdev); + +/** + * Print the contents of the slot ringbuffers + * + * @param[in] kbdev Device pointer + */ +void kbase_gpu_dump_slots(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_GPU_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c new file mode 100644 index 00000000000..1e29d5fb223 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c @@ -0,0 +1,296 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js_affinity.c + * Base kernel affinity manager APIs + */ + +#include <mali_kbase.h> +#include "mali_kbase_js_affinity.h" + +#include <backend/gpu/mali_kbase_pm_internal.h> + + +bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, + int js) +{ + /* + * Here are the reasons for using job slot 2: + * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose) + * - In absence of the above, then: + * - Atoms with BASE_JD_REQ_COHERENT_GROUP + * - But, only when there aren't contexts with + * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on + * all cores on slot 1 could be blocked by those using a coherent group + * on slot 2 + * - And, only when you actually have 2 or more coregroups - if you + * only have 1 coregroup, then having jobs for slot 2 implies they'd + * also be for slot 1, meaning you'll get interference from them. Jobs + * able to run on slot 2 could also block jobs that can only run on + * slot 1 (tiler jobs) + */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) + return true; + + if (js != 2) + return true; + + /* Only deal with js==2 now: */ + if (kbdev->gpu_props.num_core_groups > 1) { + /* Only use slot 2 in the 2+ coregroup case */ + if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev, + KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) == + false) { + /* ...But only when we *don't* have atoms that run on + * all cores */ + + /* No specific check for BASE_JD_REQ_COHERENT_GROUP + * atoms - the policy will sort that out */ + return true; + } + } + + /* Above checks failed mean we shouldn't use slot 2 */ + return false; +} + +/* + * As long as it has been decided to have a deeper modification of + * what job scheduler, power manager and affinity manager will + * implement, this function is just an intermediate step that + * assumes: + * - all working cores will be powered on when this is called. + * - largest current configuration is 2 core groups. + * - It has been decided not to have hardcoded values so the low + * and high cores in a core split will be evently distributed. + * - Odd combinations of core requirements have been filtered out + * and do not get to this function (e.g. CS+T+NSS is not + * supported here). + * - This function is frequently called and can be optimized, + * (see notes in loops), but as the functionallity will likely + * be modified, optimization has not been addressed. +*/ +bool kbase_js_choose_affinity(u64 * const affinity, + struct kbase_device *kbdev, + struct kbase_jd_atom *katom, int js) +{ + base_jd_core_req core_req = katom->core_req; + unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; + u64 core_availability_mask; + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + core_availability_mask = kbase_pm_ca_get_core_mask(kbdev); + + /* + * If no cores are currently available (core availability policy is + * transitioning) then fail. + */ + if (0 == core_availability_mask) { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + *affinity = 0; + return false; + } + + KBASE_DEBUG_ASSERT(js >= 0); + + if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) == + BASE_JD_REQ_T) { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + /* Tiler only job, bit 0 needed to enable tiler but no shader + * cores required */ + *affinity = 1; + return true; + } + + if (1 == kbdev->gpu_props.num_cores) { + /* trivial case only one core, nothing to do */ + *affinity = core_availability_mask; + } else { + if ((core_req & (BASE_JD_REQ_COHERENT_GROUP | + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) { + if (js == 0 || num_core_groups == 1) { + /* js[0] and single-core-group systems just get + * the first core group */ + *affinity = + kbdev->gpu_props.props.coherency_info.group[0].core_mask + & core_availability_mask; + } else { + /* js[1], js[2] use core groups 0, 1 for + * dual-core-group systems */ + u32 core_group_idx = ((u32) js) - 1; + + KBASE_DEBUG_ASSERT(core_group_idx < + num_core_groups); + *affinity = + kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask + & core_availability_mask; + + /* If the job is specifically targeting core + * group 1 and the core availability policy is + * keeping that core group off, then fail */ + if (*affinity == 0 && core_group_idx == 1 && + kbdev->pm.backend.cg1_disabled + == true) + katom->event_code = + BASE_JD_EVENT_PM_EVENT; + } + } else { + /* All cores are available when no core split is + * required */ + *affinity = core_availability_mask; + } + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + /* + * If no cores are currently available in the desired core group(s) + * (core availability policy is transitioning) then fail. + */ + if (*affinity == 0) + return false; + + /* Enable core 0 if tiler required */ + if (core_req & BASE_JD_REQ_T) + *affinity = *affinity | 1; + + return true; +} + +static inline bool kbase_js_affinity_is_violating( + struct kbase_device *kbdev, + u64 *affinities) +{ + /* This implementation checks whether the two slots involved in Generic + * thread creation have intersecting affinity. This is due to micro- + * architectural issues where a job in slot A targetting cores used by + * slot B could prevent the job in slot B from making progress until the + * job in slot A has completed. + */ + u64 affinity_set_left; + u64 affinity_set_right; + u64 intersection; + + KBASE_DEBUG_ASSERT(affinities != NULL); + + affinity_set_left = affinities[1]; + + affinity_set_right = affinities[2]; + + /* A violation occurs when any bit in the left_set is also in the + * right_set */ + intersection = affinity_set_left & affinity_set_right; + + return (bool) (intersection != (u64) 0u); +} + +bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, + u64 affinity) +{ + struct kbasep_js_device_data *js_devdata; + u64 new_affinities[BASE_JM_MAX_NR_SLOTS]; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); + js_devdata = &kbdev->js_data; + + memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities, + sizeof(js_devdata->runpool_irq.slot_affinities)); + + new_affinities[js] |= affinity; + + return kbase_js_affinity_is_violating(kbdev, new_affinities); +} + +void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, + u64 affinity) +{ + struct kbasep_js_device_data *js_devdata; + u64 cores; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); + js_devdata = &kbdev->js_data; + + KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity) + == false); + + cores = affinity; + while (cores) { + int bitnum = fls64(cores) - 1; + u64 bit = 1ULL << bitnum; + s8 cnt; + + cnt = + ++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); + + if (cnt == 1) + js_devdata->runpool_irq.slot_affinities[js] |= bit; + + cores &= ~bit; + } +} + +void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, + u64 affinity) +{ + struct kbasep_js_device_data *js_devdata; + u64 cores; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS); + js_devdata = &kbdev->js_data; + + cores = affinity; + while (cores) { + int bitnum = fls64(cores) - 1; + u64 bit = 1ULL << bitnum; + s8 cnt; + + KBASE_DEBUG_ASSERT( + js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0); + + cnt = + --(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]); + + if (0 == cnt) + js_devdata->runpool_irq.slot_affinities[js] &= ~bit; + + cores &= ~bit; + } +} + +#if KBASE_TRACE_ENABLE +void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata; + int slot_nr; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + for (slot_nr = 0; slot_nr < 3; ++slot_nr) + KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL, + NULL, 0u, slot_nr, + (u32) js_devdata->runpool_irq.slot_affinities[slot_nr]); +} +#endif /* KBASE_TRACE_ENABLE */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h new file mode 100644 index 00000000000..541e554983e --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.h @@ -0,0 +1,140 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js_affinity.h + * Affinity Manager internal APIs. + */ + +#ifndef _KBASE_JS_AFFINITY_H_ +#define _KBASE_JS_AFFINITY_H_ + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS +/* Import the external affinity mask variables */ +extern u64 mali_js0_affinity_mask; +extern u64 mali_js1_affinity_mask; +extern u64 mali_js2_affinity_mask; +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js_affinity Affinity Manager internal APIs. + * @{ + * + */ + +/** + * @brief Decide whether it is possible to submit a job to a particular job slot + * in the current status + * + * Will check if submitting to the given job slot is allowed in the current + * status. For example using job slot 2 while in soft-stoppable state and only + * having 1 coregroup is not allowed by the policy. This function should be + * called prior to submitting a job to a slot to make sure policy rules are not + * violated. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_device_data::runpool_irq::lock + * + * @param kbdev The kbase device structure of the device + * @param js Job slot number to check for allowance + */ +bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev, + int js); + +/** + * @brief Compute affinity for a given job. + * + * Currently assumes an all-on/all-off power management policy. + * Also assumes there is at least one core with tiler available. + * + * Returns true if a valid affinity was chosen, false if + * no cores were available. + * + * @param[out] affinity Affinity bitmap computed + * @param kbdev The kbase device structure of the device + * @param katom Job chain of which affinity is going to be found + * @param js Slot the job chain is being submitted + */ +bool kbase_js_choose_affinity(u64 * const affinity, + struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + int js); + +/** + * @brief Determine whether a proposed \a affinity on job slot \a js would + * cause a violation of affinity restrictions. + * + * The following locks must be held by the caller: + * - kbasep_js_device_data::runpool_irq::lock + */ +bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js, + u64 affinity); + +/** + * @brief Affinity tracking: retain cores used by a slot + * + * The following locks must be held by the caller: + * - kbasep_js_device_data::runpool_irq::lock + */ +void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js, + u64 affinity); + +/** + * @brief Affinity tracking: release cores used by a slot + * + * Cores \b must be released as soon as a job is dequeued from a slot's 'submit + * slots', and before another job is submitted to those slots. Otherwise, the + * refcount could exceed the maximum number submittable to a slot, + * BASE_JM_SUBMIT_SLOTS. + * + * The following locks must be held by the caller: + * - kbasep_js_device_data::runpool_irq::lock + */ +void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js, + u64 affinity); + +/** + * @brief Output to the Trace log the current tracked affinities on all slots + */ +#if KBASE_TRACE_ENABLE +void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev); +#else /* KBASE_TRACE_ENABLE */ +static inline void +kbase_js_debug_log_current_affinities(struct kbase_device *kbdev) +{ +} +#endif /* KBASE_TRACE_ENABLE */ + + /** @} *//* end group kbase_js_affinity */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + + +#endif /* _KBASE_JS_AFFINITY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c new file mode 100644 index 00000000000..04bfa519039 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_backend.c @@ -0,0 +1,316 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend specific job scheduler APIs + */ + +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_jm.h> +#include <backend/gpu/mali_kbase_jm_internal.h> +#include <backend/gpu/mali_kbase_js_internal.h> + +/* + * Define for when dumping is enabled. + * This should not be based on the instrumentation level as whether dumping is + * enabled for a particular level is down to the integrator. However this is + * being used for now as otherwise the cinstr headers would be needed. + */ +#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL) + +/* + * Hold the runpool_mutex for this + */ +static inline bool timer_callback_should_run(struct kbase_device *kbdev) +{ + s8 nr_running_ctxs; + + lockdep_assert_held(&kbdev->js_data.runpool_mutex); + + /* nr_contexts_pullable is updated with the runpool_mutex. However, the + * locking in the caller gives us a barrier that ensures + * nr_contexts_pullable is up-to-date for reading */ + nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); + +#ifdef CONFIG_MALI_DEBUG + if (kbdev->js_data.softstop_always) { + /* Debug support for allowing soft-stop on a single context */ + return true; + } +#endif /* CONFIG_MALI_DEBUG */ + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9435)) { + /* Timeouts would have to be 4x longer (due to micro- + * architectural design) to support OpenCL conformance tests, so + * only run the timer when there's: + * - 2 or more CL contexts + * - 1 or more GLES contexts + * + * NOTE: We will treat a context that has both Compute and Non- + * Compute jobs will be treated as an OpenCL context (hence, we + * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE). + */ + { + s8 nr_compute_ctxs = + kbasep_js_ctx_attr_count_on_runpool(kbdev, + KBASEP_JS_CTX_ATTR_COMPUTE); + s8 nr_noncompute_ctxs = nr_running_ctxs - + nr_compute_ctxs; + + return (bool) (nr_compute_ctxs >= 2 || + nr_noncompute_ctxs > 0); + } + } else { + /* Run the timer callback whenever you have at least 1 context + */ + return (bool) (nr_running_ctxs > 0); + } +} + +static enum hrtimer_restart timer_callback(struct hrtimer *timer) +{ + unsigned long flags; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + struct kbase_backend_data *backend; + int s; + bool reset_needed = false; + + KBASE_DEBUG_ASSERT(timer != NULL); + + backend = container_of(timer, struct kbase_backend_data, + scheduling_timer); + kbdev = container_of(backend, struct kbase_device, hwaccess.backend); + js_devdata = &kbdev->js_data; + + /* Loop through the slots */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + for (s = 0; s < kbdev->gpu_props.num_job_slots; s++) { + struct kbase_jd_atom *atom = NULL; + + if (kbase_backend_nr_atoms_on_slot(kbdev, s) > 0) { + atom = kbase_gpu_inspect(kbdev, s, 0); + KBASE_DEBUG_ASSERT(atom != NULL); + } + + if (atom != NULL) { + /* The current version of the model doesn't support + * Soft-Stop */ + if (!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_5736)) { + u32 ticks = atom->sched_info.cfs.ticks++; + +#if !CINSTR_DUMPING_ENABLED + u32 soft_stop_ticks, hard_stop_ticks, + gpu_reset_ticks; + if (atom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + soft_stop_ticks = + js_devdata->soft_stop_ticks_cl; + hard_stop_ticks = + js_devdata->hard_stop_ticks_cl; + gpu_reset_ticks = + js_devdata->gpu_reset_ticks_cl; + } else { + soft_stop_ticks = + js_devdata->soft_stop_ticks; + hard_stop_ticks = + js_devdata->hard_stop_ticks_ss; + gpu_reset_ticks = + js_devdata->gpu_reset_ticks_ss; + } + + /* Job is Soft-Stoppable */ + if (ticks == soft_stop_ticks) { + int disjoint_threshold = + KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD; + u32 softstop_flags = 0u; + /* Job has been scheduled for at least + * js_devdata->soft_stop_ticks ticks. + * Soft stop the slot so we can run + * other jobs. + */ + dev_dbg(kbdev->dev, "Soft-stop"); +#if !KBASE_DISABLE_SCHEDULING_SOFT_STOPS + /* nr_user_contexts_running is updated + * with the runpool_mutex, but we can't + * take that here. + * + * However, if it's about to be + * increased then the new context can't + * run any jobs until they take the + * runpool_irq lock, so it's OK to + * observe the older value. + * + * Similarly, if it's about to be + * decreased, the last job from another + * context has already finished, so it's + * not too bad that we observe the older + * value and register a disjoint event + * when we try soft-stopping */ + if (js_devdata->nr_user_contexts_running + >= disjoint_threshold) + softstop_flags |= + JS_COMMAND_SW_CAUSES_DISJOINT; + + kbase_job_slot_softstop_swflags(kbdev, + s, atom, softstop_flags); +#endif + } else if (ticks == hard_stop_ticks) { + /* Job has been scheduled for at least + * js_devdata->hard_stop_ticks_ss ticks. + * It should have been soft-stopped by + * now. Hard stop the slot. + */ +#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS + int ms = + js_devdata->scheduling_period_ns + / 1000000u; + dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", + (unsigned long)ticks, + (unsigned long)ms); + kbase_job_slot_hardstop(atom->kctx, s, + atom); +#endif + } else if (ticks == gpu_reset_ticks) { + /* Job has been scheduled for at least + * js_devdata->gpu_reset_ticks_ss ticks. + * It should have left the GPU by now. + * Signal that the GPU needs to be + * reset. + */ + reset_needed = true; + } +#else /* !CINSTR_DUMPING_ENABLED */ + /* NOTE: During CINSTR_DUMPING_ENABLED, we use + * the alternate timeouts, which makes the hard- + * stop and GPU reset timeout much longer. We + * also ensure that we don't soft-stop at all. + */ + if (ticks == js_devdata->soft_stop_ticks) { + /* Job has been scheduled for at least + * js_devdata->soft_stop_ticks. We do + * not soft-stop during + * CINSTR_DUMPING_ENABLED, however. + */ + dev_dbg(kbdev->dev, "Soft-stop"); + } else if (ticks == + js_devdata->hard_stop_ticks_dumping) { + /* Job has been scheduled for at least + * js_devdata->hard_stop_ticks_dumping + * ticks. Hard stop the slot. + */ +#if !KBASE_DISABLE_SCHEDULING_HARD_STOPS + int ms = + js_devdata->scheduling_period_ns + / 1000000u; + dev_warn(kbdev->dev, "JS: Job Hard-Stopped (took more than %lu ticks at %lu ms/tick)", + (unsigned long)ticks, + (unsigned long)ms); + kbase_job_slot_hardstop(atom->kctx, s, + atom); +#endif + } else if (ticks == + js_devdata->gpu_reset_ticks_dumping) { + /* Job has been scheduled for at least + * js_devdata->gpu_reset_ticks_dumping + * ticks. It should have left the GPU by + * now. Signal that the GPU needs to be + * reset. + */ + reset_needed = true; + } +#endif /* !CINSTR_DUMPING_ENABLED */ + } + } + } +#if KBASE_GPU_RESET_EN + if (reset_needed) { + dev_err(kbdev->dev, "JS: Job has been on the GPU for too long (JS_RESET_TICKS_SS/DUMPING timeout hit). Issueing GPU soft-reset to resolve."); + + if (kbase_prepare_to_reset_gpu_locked(kbdev)) + kbase_reset_gpu_locked(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + /* the timer is re-issued if there is contexts in the run-pool */ + + if (backend->timer_running) + hrtimer_start(&backend->scheduling_timer, + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return HRTIMER_NORESTART; +} + +void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + unsigned long flags; + + lockdep_assert_held(&js_devdata->runpool_mutex); + + if (!timer_callback_should_run(kbdev)) { + /* Take spinlock to force synchronisation with timer */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + backend->timer_running = false; + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + /* From now on, return value of timer_callback_should_run() will + * also cause the timer to not requeue itself. Its return value + * cannot change, because it depends on variables updated with + * the runpool_mutex held, which the caller of this must also + * hold */ + hrtimer_cancel(&backend->scheduling_timer); + } + + if (timer_callback_should_run(kbdev) && !backend->timer_running) { + /* Take spinlock to force synchronisation with timer */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + backend->timer_running = true; + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + hrtimer_start(&backend->scheduling_timer, + HR_TIMER_DELAY_NSEC(js_devdata->scheduling_period_ns), + HRTIMER_MODE_REL); + + KBASE_TRACE_ADD(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, + 0u); + } +} + +int kbase_backend_timer_init(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + backend->scheduling_timer.function = timer_callback; + + backend->timer_running = false; + + return 0; +} + +void kbase_backend_timer_term(struct kbase_device *kbdev) +{ + struct kbase_backend_data *backend = &kbdev->hwaccess.backend; + + hrtimer_cancel(&backend->scheduling_timer); +} + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h new file mode 100644 index 00000000000..3c101e4320d --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_internal.h @@ -0,0 +1,44 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Register-based HW access backend specific job scheduler APIs + */ + +#ifndef _KBASE_JS_BACKEND_H_ +#define _KBASE_JS_BACKEND_H_ + +/** + * kbase_backend_timer_init() - Initialise the JS scheduling timer + * @kbdev: Device pointer + * + * This function should be called at driver initialisation + * + * Return: 0 on success + */ +int kbase_backend_timer_init(struct kbase_device *kbdev); + +/** + * kbase_backend_timer_term() - Terminate the JS scheduling timer + * @kbdev: Device pointer + * + * This function should be called at driver termination + */ +void kbase_backend_timer_term(struct kbase_device *kbdev); + +#endif /* _KBASE_JS_BACKEND_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c new file mode 100644 index 00000000000..e06dc58621f --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.c @@ -0,0 +1,295 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <linux/bitops.h> + +#include <mali_kbase.h> +#include <mali_kbase_mem.h> +#include <mali_kbase_mmu_hw.h> +#include <backend/gpu/mali_kbase_mmu_hw_direct.h> +#include <backend/gpu/mali_kbase_device_internal.h> + +static inline u64 lock_region(struct kbase_device *kbdev, u64 pfn, + u32 num_pages) +{ + u64 region; + + /* can't lock a zero sized range */ + KBASE_DEBUG_ASSERT(num_pages); + + region = pfn << PAGE_SHIFT; + /* + * fls returns (given the ASSERT above): + * 1 .. 32 + * + * 10 + fls(num_pages) + * results in the range (11 .. 42) + */ + + /* gracefully handle num_pages being zero */ + if (0 == num_pages) { + region |= 11; + } else { + u8 region_width; + + region_width = 10 + fls(num_pages); + if (num_pages != (1ul << (region_width - 11))) { + /* not pow2, so must go up to the next pow2 */ + region_width += 1; + } + KBASE_DEBUG_ASSERT(region_width <= KBASE_LOCK_REGION_MAX_SIZE); + KBASE_DEBUG_ASSERT(region_width >= KBASE_LOCK_REGION_MIN_SIZE); + region |= region_width; + } + + return region; +} + +static int wait_ready(struct kbase_device *kbdev, + unsigned int as_nr, struct kbase_context *kctx) +{ + unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + + /* Wait for the MMU status to indicate there is no active command. */ + while (--max_loops && kbase_reg_read(kbdev, + MMU_AS_REG(as_nr, AS_STATUS), + kctx) & AS_STATUS_AS_ACTIVE) { + ; + } + + if (max_loops == 0) { + dev_err(kbdev->dev, "AS_ACTIVE bit stuck\n"); + return -1; + } + + return 0; +} + +static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd, + struct kbase_context *kctx) +{ + int status; + + /* write AS_COMMAND when MMU is ready to accept another command */ + status = wait_ready(kbdev, as_nr, kctx); + if (status == 0) + kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd, + kctx); + + return status; +} + +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) +{ + const int num_as = 16; + const int busfault_shift = MMU_PAGE_FAULT_FLAGS; + const int pf_shift = 0; + const unsigned long as_bit_mask = (1UL << num_as) - 1; + unsigned long flags; + u32 new_mask; + u32 tmp; + + /* bus faults */ + u32 bf_bits = (irq_stat >> busfault_shift) & as_bit_mask; + /* page faults (note: Ignore ASes with both pf and bf) */ + u32 pf_bits = ((irq_stat >> pf_shift) & as_bit_mask) & ~bf_bits; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + + /* remember current mask */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); + /* mask interrupts for now */ + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); + + while (bf_bits | pf_bits) { + struct kbase_as *as; + int as_no; + struct kbase_context *kctx; + + /* + * the while logic ensures we have a bit set, no need to check + * for not-found here + */ + as_no = ffs(bf_bits | pf_bits) - 1; + as = &kbdev->as[as_no]; + + /* + * Refcount the kctx ASAP - it shouldn't disappear anyway, since + * Bus/Page faults _should_ only occur whilst jobs are running, + * and a job causing the Bus/Page fault shouldn't complete until + * the MMU is updated + */ + kctx = kbasep_js_runpool_lookup_ctx(kbdev, as_no); + + /* find faulting address */ + as->fault_addr = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, + AS_FAULTADDRESS_HI), + kctx); + as->fault_addr <<= 32; + as->fault_addr |= kbase_reg_read(kbdev, + MMU_AS_REG(as_no, + AS_FAULTADDRESS_LO), + kctx); + + /* record the fault status */ + as->fault_status = kbase_reg_read(kbdev, + MMU_AS_REG(as_no, + AS_FAULTSTATUS), + kctx); + + /* find the fault type */ + as->fault_type = (bf_bits & (1 << as_no)) ? + KBASE_MMU_FAULT_TYPE_BUS : + KBASE_MMU_FAULT_TYPE_PAGE; + + + if (kbase_as_has_bus_fault(as)) { + /* Mark bus fault as handled. + * Note that a bus fault is processed first in case + * where both a bus fault and page fault occur. + */ + bf_bits &= ~(1UL << as_no); + + /* remove the queued BF (and PF) from the mask */ + new_mask &= ~(MMU_BUS_ERROR(as_no) | + MMU_PAGE_FAULT(as_no)); + } else { + /* Mark page fault as handled */ + pf_bits &= ~(1UL << as_no); + + /* remove the queued PF from the mask */ + new_mask &= ~MMU_PAGE_FAULT(as_no); + } + + /* Process the interrupt for this address space */ + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + kbase_mmu_interrupt_process(kbdev, kctx, as); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, + flags); + } + + /* reenable interrupts */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), NULL); + new_mask |= tmp; + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask, NULL); + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} + +void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx) +{ + struct kbase_mmu_setup *current_setup = &as->current_setup; + + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), + current_setup->transtab & 0xFFFFFFFFUL, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), + (current_setup->transtab >> 32) & 0xFFFFFFFFUL, kctx); + + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), + current_setup->memattr & 0xFFFFFFFFUL, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), + (current_setup->memattr >> 32) & 0xFFFFFFFFUL, kctx); + + write_cmd(kbdev, as->number, AS_COMMAND_UPDATE, kctx); +} + +int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx, u64 vpfn, u32 nr, u32 op, + unsigned int handling_irq) +{ + int ret; + + if (op == AS_COMMAND_UNLOCK) { + /* Unlock doesn't require a lock first */ + ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); + } else { + u64 lock_addr = lock_region(kbdev, vpfn, nr); + + /* Lock the region that needs to be updated */ + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO), + lock_addr & 0xFFFFFFFFUL, kctx); + kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI), + (lock_addr >> 32) & 0xFFFFFFFFUL, kctx); + write_cmd(kbdev, as->number, AS_COMMAND_LOCK, kctx); + + /* Run the MMU operation */ + write_cmd(kbdev, as->number, op, kctx); + + /* Wait for the flush to complete */ + ret = wait_ready(kbdev, as->number, kctx); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_9630)) { + /* Issue an UNLOCK command to ensure that valid page + tables are re-read by the GPU after an update. + Note that, the FLUSH command should perform all the + actions necessary, however the bus logs show that if + multiple page faults occur within an 8 page region + the MMU does not always re-read the updated page + table entries for later faults or is only partially + read, it subsequently raises the page fault IRQ for + the same addresses, the unlock ensures that the MMU + cache is flushed, so updates can be re-read. As the + region is now unlocked we need to issue 2 UNLOCK + commands in order to flush the MMU/uTLB, + see PRLAM-8812. + */ + write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); + write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK, kctx); + } + } + + return ret; +} + +void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx, enum kbase_mmu_fault_type type) +{ + u32 pf_bf_mask; + + /* Clear the page (and bus fault IRQ as well in case one occurred) */ + pf_bf_mask = MMU_PAGE_FAULT(as->number); + if (type == KBASE_MMU_FAULT_TYPE_BUS) + pf_bf_mask |= MMU_BUS_ERROR(as->number); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask, kctx); +} + +void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx, enum kbase_mmu_fault_type type) +{ + unsigned long flags; + u32 irq_mask; + + /* Enable the page fault IRQ (and bus fault IRQ as well in case one + * occurred) */ + spin_lock_irqsave(&kbdev->mmu_mask_change, flags); + + irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK), kctx) | + MMU_PAGE_FAULT(as->number); + + if (type == KBASE_MMU_FAULT_TYPE_BUS) + irq_mask |= MMU_BUS_ERROR(as->number); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask, kctx); + + spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h new file mode 100644 index 00000000000..a8b3b76f47e --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_mmu_hw_direct.h @@ -0,0 +1,59 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file + * Interface file for the direct implementation for MMU hardware access + */ + +/** + * @page mali_kbase_mmu_hw_direct_page Direct MMU hardware interface + * + * @section mali_kbase_mmu_hw_direct_intro_sec Introduction + * This module provides the interface(s) that are required by the direct + * register access implementation of the MMU hardware interface + * @ref mali_kbase_mmu_hw_page . + */ + +#ifndef _MALI_KBASE_MMU_HW_DIRECT_H_ +#define _MALI_KBASE_MMU_HW_DIRECT_H_ + +#include <mali_kbase_defs.h> + +/** + * @addtogroup mali_kbase_mmu_hw + * @{ + */ + +/** + * @addtogroup mali_kbase_mmu_hw_direct Direct register access to MMU + * @{ + */ + +/** @brief Process an MMU interrupt. + * + * Process the MMU interrupt that was reported by the @ref kbase_device. + * + * @param[in] kbdev kbase context to clear the fault from. + * @param[in] irq_stat Value of the MMU_IRQ_STATUS register + */ +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + +/** @} *//* end group mali_kbase_mmu_hw_direct */ +/** @} *//* end group mali_kbase_mmu_hw */ + +#endif /* _MALI_KBASE_MMU_HW_DIRECT_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c new file mode 100644 index 00000000000..d18ae86f8dd --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.c @@ -0,0 +1,63 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * "Always on" power management policy + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> + +static u64 always_on_get_core_mask(struct kbase_device *kbdev) +{ + return kbdev->shader_present_bitmap; +} + +static bool always_on_get_core_active(struct kbase_device *kbdev) +{ + return true; +} + +static void always_on_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void always_on_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/* + * The struct kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback + * and name. + */ +const struct kbase_pm_policy kbase_pm_always_on_policy_ops = { + "always_on", /* name */ + always_on_init, /* init */ + always_on_term, /* term */ + always_on_get_core_mask, /* get_core_mask */ + always_on_get_core_active, /* get_core_active */ + 0u, /* flags */ + KBASE_PM_POLICY_ID_ALWAYS_ON, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_always_on_policy_ops); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h new file mode 100644 index 00000000000..f9d244b01bc --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_always_on.h @@ -0,0 +1,77 @@ + +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * "Always on" power management policy + */ + +#ifndef MALI_KBASE_PM_ALWAYS_ON_H +#define MALI_KBASE_PM_ALWAYS_ON_H + +/** + * DOC: + * The "Always on" power management policy has the following + * characteristics: + * + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * All Shader Cores are powered up, regardless of whether or not they will + * be needed later. + * + * - When KBase indicates that a set of Shader Cores are needed to submit the + * currently queued Job Chains: + * All Shader Cores are kept powered, regardless of whether or not they will + * be needed + * + * - When KBase indicates that the GPU need not be powered: + * The Shader Cores are kept powered, regardless of whether or not they will + * be needed. The GPU itself is also kept powered, even though it is not + * needed. + * + * This policy is automatically overridden during system suspend: the desired + * core state is ignored, and the cores are forced off regardless of what the + * policy requests. After resuming from suspend, new changes to the desired + * core state made by the policy are honored. + * + * Note: + * + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * struct kbasep_pm_policy_always_on - Private struct for policy instance data + * @dummy: unused dummy variable + * + * This contains data that is private to the particular power policy that is + * active. + */ +struct kbasep_pm_policy_always_on { + int dummy; +}; + +extern const struct kbase_pm_policy kbase_pm_always_on_policy_ops; + +#endif /* MALI_KBASE_PM_ALWAYS_ON_H */ + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c new file mode 100644 index 00000000000..bbf39b0ce03 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_backend.c @@ -0,0 +1,375 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/** + * @file mali_kbase_pm_hwaccess.c + * GPU backend implementation of base kernel power management APIs + */ + +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#include <mali_kbase_config_defaults.h> +#ifdef CONFIG_MALI_PLATFORM_DEVICETREE +#include <linux/pm_runtime.h> +#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ + +#include <mali_kbase_pm.h> +#include <backend/gpu/mali_kbase_jm_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +void kbase_pm_register_access_enable(struct kbase_device *kbdev) +{ + struct kbase_pm_callback_conf *callbacks; + +#ifdef CONFIG_MALI_PLATFORM_DEVICETREE + pm_runtime_enable(kbdev->dev); +#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + + if (callbacks) + callbacks->power_on_callback(kbdev); + + kbdev->pm.backend.gpu_powered = true; +} + +void kbase_pm_register_access_disable(struct kbase_device *kbdev) +{ + struct kbase_pm_callback_conf *callbacks; + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + + if (callbacks) + callbacks->power_off_callback(kbdev); + + kbdev->pm.backend.gpu_powered = false; +#ifdef CONFIG_MALI_PLATFORM_DEVICETREE + pm_runtime_disable(kbdev->dev); +#endif +} + +int kbase_hwaccess_pm_init(struct kbase_device *kbdev) +{ + int ret = 0; + struct kbase_pm_callback_conf *callbacks; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_init(&kbdev->pm.lock); + + kbdev->pm.backend.gpu_powered = false; + kbdev->pm.suspending = false; +#ifdef CONFIG_MALI_DEBUG + kbdev->pm.backend.driver_ready_for_irqs = false; +#endif /* CONFIG_MALI_DEBUG */ + kbdev->pm.backend.gpu_in_desired_state = true; + init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait); + + callbacks = (struct kbase_pm_callback_conf *)POWER_MANAGEMENT_CALLBACKS; + if (callbacks) { + kbdev->pm.backend.callback_power_on = + callbacks->power_on_callback; + kbdev->pm.backend.callback_power_off = + callbacks->power_off_callback; + kbdev->pm.backend.callback_power_suspend = + callbacks->power_suspend_callback; + kbdev->pm.backend.callback_power_resume = + callbacks->power_resume_callback; + kbdev->pm.callback_power_runtime_init = + callbacks->power_runtime_init_callback; + kbdev->pm.callback_power_runtime_term = + callbacks->power_runtime_term_callback; + kbdev->pm.backend.callback_power_runtime_on = + callbacks->power_runtime_on_callback; + kbdev->pm.backend.callback_power_runtime_off = + callbacks->power_runtime_off_callback; + } else { + kbdev->pm.backend.callback_power_on = NULL; + kbdev->pm.backend.callback_power_off = NULL; + kbdev->pm.backend.callback_power_suspend = NULL; + kbdev->pm.backend.callback_power_resume = NULL; + kbdev->pm.callback_power_runtime_init = NULL; + kbdev->pm.callback_power_runtime_term = NULL; + kbdev->pm.backend.callback_power_runtime_on = NULL; + kbdev->pm.backend.callback_power_runtime_off = NULL; + } + + /* Initialise the metrics subsystem */ + ret = kbasep_pm_metrics_init(kbdev); + if (ret) + return ret; + + init_waitqueue_head(&kbdev->pm.backend.l2_powered_wait); + kbdev->pm.backend.l2_powered = 0; + + init_waitqueue_head(&kbdev->pm.backend.reset_done_wait); + kbdev->pm.backend.reset_done = false; + + init_waitqueue_head(&kbdev->pm.zero_active_count_wait); + kbdev->pm.active_count = 0; + + spin_lock_init(&kbdev->pm.power_change_lock); + spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock); + spin_lock_init(&kbdev->pm.backend.gpu_powered_lock); + + if (kbase_pm_ca_init(kbdev) != 0) + goto workq_fail; + + if (kbase_pm_policy_init(kbdev) != 0) + goto pm_policy_fail; + + return 0; + +pm_policy_fail: + kbase_pm_ca_term(kbdev); +workq_fail: + kbasep_pm_metrics_term(kbdev); + return -EINVAL; +} + +void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume) +{ + lockdep_assert_held(&kbdev->pm.lock); + + /* Turn clocks and interrupts on - no-op if we haven't done a previous + * kbase_pm_clock_off() */ + kbase_pm_clock_on(kbdev, is_resume); + + /* Update core status as required by the policy */ + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START); + kbase_pm_update_cores_state(kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END); + + /* NOTE: We don't wait to reach the desired state, since running atoms + * will wait for that state to be reached anyway */ +} + +bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend) +{ + unsigned long flags; + bool cores_are_available; + + lockdep_assert_held(&kbdev->pm.lock); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + /* Force all cores off */ + kbdev->pm.backend.desired_shader_state = 0; + + /* Force all cores to be unavailable, in the situation where + * transitions are in progress for some cores but not others, + * and kbase_pm_check_transitions_nolock can not immediately + * power off the cores */ + kbdev->shader_available_bitmap = 0; + kbdev->tiler_available_bitmap = 0; + kbdev->l2_available_bitmap = 0; + + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END); + /* Don't need 'cores_are_available', because we don't return anything */ + CSTD_UNUSED(cores_are_available); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + /* NOTE: We won't wait to reach the core's desired state, even if we're + * powering off the GPU itself too. It's safe to cut the power whilst + * they're transitioning to off, because the cores should be idle and + * all cache flushes should already have occurred */ + + /* Consume any change-state events */ + kbase_timeline_pm_check_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + /* Disable interrupts and turn the clock off */ + return kbase_pm_clock_off(kbdev, is_suspend); +} + +int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, + unsigned int flags) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + unsigned long irq_flags; + int ret; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + /* A suspend won't happen during startup/insmod */ + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + /* Power up the GPU, don't enable IRQs as we are not ready to receive + * them. */ + ret = kbase_pm_init_hw(kbdev, flags); + if (ret) { + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + return ret; + } + + kbasep_pm_read_present_cores(kbdev); + + kbdev->pm.debug_core_mask = kbdev->shader_present_bitmap; + + /* Pretend the GPU is active to prevent a power policy turning the GPU + * cores off */ + kbdev->pm.active_count = 1; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + /* Ensure cycle counter is off */ + kbdev->pm.backend.gpu_cycle_counter_requests = 0; + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + + /* We are ready to receive IRQ's now as power policy is set up, so + * enable them now. */ +#ifdef CONFIG_MALI_DEBUG + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, irq_flags); + kbdev->pm.backend.driver_ready_for_irqs = true; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, irq_flags); +#endif + kbase_pm_enable_interrupts(kbdev); + + /* Turn on the GPU and any cores needed by the policy */ + kbase_pm_do_poweron(kbdev, false); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + + /* Idle the GPU and/or cores, if the policy wants it to */ + kbase_pm_context_idle(kbdev); + + return 0; +} + +void kbase_hwaccess_pm_halt(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + mutex_lock(&kbdev->pm.lock); + kbase_pm_cancel_deferred_poweroff(kbdev); + if (!kbase_pm_do_poweroff(kbdev, false)) { + /* Page/bus faults are pending, must drop pm.lock to process. + * Interrupts are disabled so no more faults should be + * generated at this point */ + mutex_unlock(&kbdev->pm.lock); + kbase_flush_mmu_wqs(kbdev); + mutex_lock(&kbdev->pm.lock); + WARN_ON(!kbase_pm_do_poweroff(kbdev, false)); + } + mutex_unlock(&kbdev->pm.lock); +} + +KBASE_EXPORT_TEST_API(kbase_hwaccess_pm_halt); + +void kbase_hwaccess_pm_term(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests == 0); + + /* Free any resources the policy allocated */ + kbase_pm_policy_term(kbdev); + kbase_pm_ca_term(kbdev); + + /* Shut down the metrics subsystem */ + kbasep_pm_metrics_term(kbdev); +} + +void kbase_pm_power_changed(struct kbase_device *kbdev) +{ + bool cores_are_available; + unsigned long flags; + + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START); + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, + SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END); + + if (cores_are_available) { + /* Log timelining information that a change in state has + * completed */ + kbase_timeline_pm_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + kbase_gpu_slot_update(kbdev); + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + } +} + +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask) +{ + kbdev->pm.debug_core_mask = new_core_mask; + + kbase_pm_update_cores_state_nolock(kbdev); +} + +void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev) +{ + kbase_pm_update_active(kbdev); +} + +void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev) +{ + kbase_pm_update_active(kbdev); +} + +void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + /* Force power off the GPU and all cores (regardless of policy), only + * after the PM active count reaches zero (otherwise, we risk turning it + * off prematurely) */ + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + kbase_pm_cancel_deferred_poweroff(kbdev); + if (!kbase_pm_do_poweroff(kbdev, true)) { + /* Page/bus faults are pending, must drop pm.lock to process. + * Interrupts are disabled so no more faults should be + * generated at this point */ + mutex_unlock(&kbdev->pm.lock); + kbase_flush_mmu_wqs(kbdev); + mutex_lock(&kbdev->pm.lock); + WARN_ON(!kbase_pm_do_poweroff(kbdev, false)); + } + + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); +} + +void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + kbdev->pm.suspending = false; + kbase_pm_do_poweron(kbdev, true); + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c new file mode 100644 index 00000000000..521958aa174 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.c @@ -0,0 +1,179 @@ +/* + * + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_pm_ca.c + * Base kernel core availability APIs + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +static const struct kbase_pm_ca_policy *const policy_list[] = { + &kbase_pm_ca_fixed_policy_ops, +#if !MALI_CUSTOMER_RELEASE + &kbase_pm_ca_random_policy_ops +#endif +}; + +/** The number of policies available in the system. + * This is derived from the number of functions listed in policy_get_functions. + */ +#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) + +int kbase_pm_ca_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.backend.ca_current_policy = policy_list[0]; + + kbdev->pm.backend.ca_current_policy->init(kbdev); + + return 0; +} + +void kbase_pm_ca_term(struct kbase_device *kbdev) +{ + kbdev->pm.backend.ca_current_policy->term(kbdev); +} + +int kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **list) +{ + if (!list) + return POLICY_COUNT; + + *list = policy_list; + + return POLICY_COUNT; +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_list_policies); + +const struct kbase_pm_ca_policy +*kbase_pm_ca_get_policy(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return kbdev->pm.backend.ca_current_policy; +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_get_policy); + +void kbase_pm_ca_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_ca_policy *new_policy) +{ + const struct kbase_pm_ca_policy *old_policy; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(new_policy != NULL); + + KBASE_TRACE_ADD(kbdev, PM_CA_SET_POLICY, NULL, NULL, 0u, + new_policy->id); + + /* During a policy change we pretend the GPU is active */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread */ + kbase_pm_context_active(kbdev); + + mutex_lock(&kbdev->pm.lock); + + /* Remove the policy to prevent IRQ handlers from working on it */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + old_policy = kbdev->pm.backend.ca_current_policy; + kbdev->pm.backend.ca_current_policy = NULL; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + if (old_policy->term) + old_policy->term(kbdev); + + if (new_policy->init) + new_policy->init(kbdev); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbdev->pm.backend.ca_current_policy = new_policy; + + /* If any core power state changes were previously attempted, but + * couldn't be made because the policy was changing (current_policy was + * NULL), then re-try them here. */ + kbase_pm_update_cores_state_nolock(kbdev); + + kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, + kbdev->shader_ready_bitmap, + kbdev->shader_transitioning_bitmap); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + mutex_unlock(&kbdev->pm.lock); + + /* Now the policy change is finished, we release our fake context active + * reference */ + kbase_pm_context_idle(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_set_policy); + +u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.power_change_lock); + + /* All cores must be enabled when instrumentation is in use */ + if (kbdev->pm.backend.instr_enabled) + return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask; + + if (kbdev->pm.backend.ca_current_policy == NULL) + return kbdev->shader_present_bitmap & kbdev->pm.debug_core_mask; + + return kbdev->pm.backend.ca_current_policy->get_core_mask(kbdev) & + kbdev->pm.debug_core_mask; +} + +KBASE_EXPORT_TEST_API(kbase_pm_ca_get_core_mask); + +void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, + u64 cores_transitioning) +{ + lockdep_assert_held(&kbdev->pm.power_change_lock); + + if (kbdev->pm.backend.ca_current_policy != NULL) + kbdev->pm.backend.ca_current_policy->update_core_status(kbdev, + cores_ready, + cores_transitioning); +} + +void kbase_pm_ca_instr_enable(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbdev->pm.backend.instr_enabled = true; + + kbase_pm_update_cores_state_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +void kbase_pm_ca_instr_disable(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbdev->pm.backend.instr_enabled = false; + + kbase_pm_update_cores_state_nolock(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h new file mode 100644 index 00000000000..ee9e751f2d7 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca.h @@ -0,0 +1,92 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Base kernel core availability APIs + */ + +#ifndef _KBASE_PM_CA_H_ +#define _KBASE_PM_CA_H_ + +/** + * kbase_pm_ca_init - Initialize core availability framework + * + * Must be called before calling any other core availability function + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Return: 0 if the core availability framework was successfully initialized, + * -errno otherwise + */ +int kbase_pm_ca_init(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_term - Terminate core availability framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_ca_term(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_get_core_mask - Get currently available shaders core mask + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Returns a mask of the currently available shader cores. + * Calls into the core availability policy + * + * Return: The bit mask of available cores + */ +u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_update_core_status - Update core status + * + * @kbdev: The kbase device structure for the device (must be + * a valid pointer) + * @cores_ready: The bit mask of cores ready for job submission + * @cores_transitioning: The bit mask of cores that are transitioning power + * state + * + * Update core availability policy with current core power status + * + * Calls into the core availability policy + */ +void kbase_pm_ca_update_core_status(struct kbase_device *kbdev, u64 cores_ready, + u64 cores_transitioning); + +/** + * kbase_pm_ca_instr_enable - Enable override for instrumentation + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This overrides the output of the core availability policy, ensuring that all + * cores are available + */ +void kbase_pm_ca_instr_enable(struct kbase_device *kbdev); + +/** + * kbase_pm_ca_instr_disable - Disable override for instrumentation + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This disables any previously enabled override, and resumes normal policy + * functionality + */ +void kbase_pm_ca_instr_disable(struct kbase_device *kbdev); + +#endif /* _KBASE_PM_CA_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c new file mode 100644 index 00000000000..f890a64e8bb --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.c @@ -0,0 +1,65 @@ +/* + * + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_pm_ca_fixed.c + * A power policy implementing fixed core availability + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> + +static void fixed_init(struct kbase_device *kbdev) +{ + kbdev->pm.backend.ca_in_transition = false; +} + +static void fixed_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static u64 fixed_get_core_mask(struct kbase_device *kbdev) +{ + return kbdev->shader_present_bitmap; +} + +static void fixed_update_core_status(struct kbase_device *kbdev, + u64 cores_ready, + u64 cores_transitioning) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(cores_ready); + CSTD_UNUSED(cores_transitioning); +} + +/** The @ref struct kbase_pm_policy structure for the fixed power policy. + * + * This is the static structure that defines the fixed power policy's callback + * and name. + */ +const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops = { + "fixed", /* name */ + fixed_init, /* init */ + fixed_term, /* term */ + fixed_get_core_mask, /* get_core_mask */ + fixed_update_core_status, /* update_core_status */ + 0u, /* flags */ + KBASE_PM_CA_POLICY_ID_FIXED, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_ca_fixed_policy_ops); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h new file mode 100644 index 00000000000..a763155cb70 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_ca_fixed.h @@ -0,0 +1,40 @@ +/* + * + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * A power policy implementing fixed core availability + */ + +#ifndef MALI_KBASE_PM_CA_FIXED_H +#define MALI_KBASE_PM_CA_FIXED_H + +/** + * struct kbasep_pm_ca_policy_fixed - Private structure for policy instance data + * + * @dummy: Dummy member - no state is needed + * + * This contains data that is private to the particular power policy that is + * active. + */ +struct kbasep_pm_ca_policy_fixed { + int dummy; +}; + +extern const struct kbase_pm_ca_policy kbase_pm_ca_fixed_policy_ops; + +#endif /* MALI_KBASE_PM_CA_FIXED_H */ + diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c new file mode 100644 index 00000000000..3decd0312b6 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.c @@ -0,0 +1,69 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_coarse_demand.c + * "Coarse Demand" power management policy + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> + +static u64 coarse_demand_get_core_mask(struct kbase_device *kbdev) +{ + if (kbdev->pm.active_count == 0) + return 0; + + return kbdev->shader_present_bitmap; +} + +static bool coarse_demand_get_core_active(struct kbase_device *kbdev) +{ + if (kbdev->pm.active_count == 0) + return false; + + return true; +} + +static void coarse_demand_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void coarse_demand_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/** The @ref struct kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback + * and name. + */ +const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops = { + "coarse_demand", /* name */ + coarse_demand_init, /* init */ + coarse_demand_term, /* term */ + coarse_demand_get_core_mask, /* get_core_mask */ + coarse_demand_get_core_active, /* get_core_active */ + 0u, /* flags */ + KBASE_PM_POLICY_ID_COARSE_DEMAND, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_coarse_demand_policy_ops); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h new file mode 100644 index 00000000000..dd1e8f710b3 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_coarse_demand.h @@ -0,0 +1,63 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_coarse_demand.h + * "Coarse Demand" power management policy + */ + +#ifndef MALI_KBASE_PM_COARSE_DEMAND_H +#define MALI_KBASE_PM_COARSE_DEMAND_H + +/** + * The "Coarse" demand power management policy has the following + * characteristics: + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * - All Shader Cores are powered up, regardless of whether or not they will + * be needed later. + * - When KBase indicates that a set of Shader Cores are needed to submit the + * currently queued Job Chains: + * - All Shader Cores are kept powered, regardless of whether or not they will + * be needed + * - When KBase indicates that the GPU need not be powered: + * - The Shader Cores are powered off, and the GPU itself is powered off too. + * + * @note: + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * Private structure for policy instance data. + * + * This contains data that is private to the particular power policy that is + * active. + */ +struct kbasep_pm_policy_coarse_demand { + /** No state needed - just have a dummy variable here */ + int dummy; +}; + +extern const struct kbase_pm_policy kbase_pm_coarse_demand_policy_ops; + +#endif /* MALI_KBASE_PM_COARSE_DEMAND_H */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h new file mode 100644 index 00000000000..aad6c49b0c3 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_defs.h @@ -0,0 +1,555 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_pm_hwaccess_defs.h + * Backend-specific Power Manager definitions + */ + +#ifndef _KBASE_PM_HWACCESS_DEFS_H_ +#define _KBASE_PM_HWACCESS_DEFS_H_ + +#include "mali_kbase_pm_ca_fixed.h" +#if !MALI_CUSTOMER_RELEASE +#include "mali_kbase_pm_ca_random.h" +#endif + +#include "mali_kbase_pm_always_on.h" +#include "mali_kbase_pm_coarse_demand.h" +#include "mali_kbase_pm_demand.h" +#if !MALI_CUSTOMER_RELEASE +#include "mali_kbase_pm_demand_always_powered.h" +#include "mali_kbase_pm_fast_start.h" +#endif + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; +struct kbase_jd_atom; + +/** The types of core in a GPU. + * + * These enumerated values are used in calls to: + * - @ref kbase_pm_get_present_cores + * - @ref kbase_pm_get_active_cores + * - @ref kbase_pm_get_trans_cores + * - @ref kbase_pm_get_ready_cores. + * + * They specify which type of core should be acted on. These values are set in + * a manner that allows @ref core_type_to_reg function to be simpler and more + * efficient. + */ +enum kbase_pm_core_type { + KBASE_PM_CORE_L2 = L2_PRESENT_LO, /**< The L2 cache */ + KBASE_PM_CORE_SHADER = SHADER_PRESENT_LO, /**< Shader cores */ + KBASE_PM_CORE_TILER = TILER_PRESENT_LO /**< Tiler cores */ +}; + +/** + * struct kbasep_pm_metrics_data - Metrics data collected for use by the power management framework. + * + * @vsync_hit: indicates if a framebuffer update occured since the last vsync. + * A framebuffer driver is expected to provide this information by + * checking at each vsync if the framebuffer was updated and calling + * kbase_pm_vsync_callback() if there was a change of status. + * @utilisation: percentage indicating GPU load (0-100). + * The utilisation is the fraction of time the GPU was powered up + * and busy. This is based on the time_busy and time_idle metrics. + * @util_gl_share: percentage of GPU load related to OpenGL jobs (0-100). + * This is based on the busy_gl and time_busy metrics. + * @util_cl_share: percentage of GPU load related to OpenCL jobs (0-100). + * This is based on the busy_cl and time_busy metrics. + * @time_period_start: time at which busy/idle measurements started + * @time_busy: number of ns the GPU was busy executing jobs since the + * @time_period_start timestamp. + * @time_idle: number of ns since time_period_start the GPU was not executing + * jobs since the @time_period_start timestamp. + * @prev_busy: busy time in ns of previous time period. + * Updated when metrics are reset. + * @prev_idle: idle time in ns of previous time period + * Updated when metrics are reset. + * @gpu_active: true when the GPU is executing jobs. false when + * not. Updated when the job scheduler informs us a job in submitted + * or removed from a GPU slot. + * @busy_cl: number of ns the GPU was busy executing CL jobs. Note that + * if two CL jobs were active for 400ns, this value would be updated + * with 800. + * @busy_gl: number of ns the GPU was busy executing GL jobs. Note that + * if two GL jobs were active for 400ns, this value would be updated + * with 800. + * @active_cl_ctx: number of CL jobs active on the GPU. This is a portion of + * the @nr_in_slots value. + * @active_gl_ctx: number of GL jobs active on the GPU. This is a portion of + * the @nr_in_slots value. + * @nr_in_slots: Total number of jobs currently submitted to the GPU across + * all job slots. Maximum value would be 2*BASE_JM_MAX_NR_SLOTS + * (one in flight and one in the JSn_HEAD_NEXT register for each + * job slot). + * @lock: spinlock protecting the kbasep_pm_metrics_data structure + * @timer: timer to regularly make DVFS decisions based on the power + * management metrics. + * @timer_active: boolean indicating @timer is running + * @platform_data: pointer to data controlled by platform specific code + * @kbdev: pointer to kbase device for which metrics are collected + * + */ +struct kbasep_pm_metrics_data { + int vsync_hit; + int utilisation; + int util_gl_share; + int util_cl_share[2]; /* 2 is a max number of core groups we can have */ + ktime_t time_period_start; + u32 time_busy; + u32 time_idle; + u32 prev_busy; + u32 prev_idle; + bool gpu_active; + u32 busy_cl[2]; + u32 busy_gl; + u32 active_cl_ctx[2]; + u32 active_gl_ctx; + u8 nr_in_slots; + spinlock_t lock; + +#ifdef CONFIG_MALI_MIDGARD_DVFS + struct hrtimer timer; + bool timer_active; +#endif + + void *platform_data; + struct kbase_device *kbdev; +}; + +/** Actions for DVFS. + * + * kbase_pm_get_dvfs_action will return one of these enumerated values to + * describe the action that the DVFS system should take. + */ +enum kbase_pm_dvfs_action { + KBASE_PM_DVFS_NOP, /* < No change in clock frequency is + * requested */ + KBASE_PM_DVFS_CLOCK_UP, /* < The clock frequency should be increased + * if possible */ + KBASE_PM_DVFS_CLOCK_DOWN /* < The clock frequency should be decreased + * if possible */ +}; + +union kbase_pm_policy_data { + struct kbasep_pm_policy_always_on always_on; + struct kbasep_pm_policy_coarse_demand coarse_demand; + struct kbasep_pm_policy_demand demand; +#if !MALI_CUSTOMER_RELEASE + struct kbasep_pm_policy_demand_always_powered demand_always_powered; + struct kbasep_pm_policy_fast_start fast_start; +#endif +}; + +union kbase_pm_ca_policy_data { + struct kbasep_pm_ca_policy_fixed fixed; +#if !MALI_CUSTOMER_RELEASE + struct kbasep_pm_ca_policy_random random; +#endif +}; + +/** + * Data stored per device for power management. + * + * This structure contains data for the power management framework. There is one + * instance of this structure per device in the system. + */ +struct kbase_pm_backend_data { + /** + * The policy that is currently actively controlling core availability. + * + * @note: During an IRQ, this can be NULL when the policy is being + * changed with kbase_pm_ca_set_policy(). The change is protected under + * kbase_device::pm::power_change_lock. Direct access to this from IRQ + * context must therefore check for NULL. If NULL, then + * kbase_pm_ca_set_policy() will re-issue the policy functions that + * would've been done under IRQ. + */ + const struct kbase_pm_ca_policy *ca_current_policy; + + /** + * The policy that is currently actively controlling the power state. + * + * @note: During an IRQ, this can be NULL when the policy is being + * changed with kbase_pm_set_policy(). The change is protected under + * kbase_device::pm::power_change_lock. Direct access to this from IRQ + * context must therefore check for NULL. If NULL, then + * kbase_pm_set_policy() will re-issue the policy functions that + * would've been done under IRQ. + */ + const struct kbase_pm_policy *pm_current_policy; + + /** Private data for current CA policy */ + union kbase_pm_ca_policy_data ca_policy_data; + + /** Private data for current PM policy */ + union kbase_pm_policy_data pm_policy_data; + + /** + * Flag indicating when core availability policy is transitioning cores. + * The core availability policy must set this when a change in core + * availability is occuring. + * + * power_change_lock must be held when accessing this. */ + bool ca_in_transition; + + /** Waiting for reset and a queue to wait for changes */ + bool reset_done; + wait_queue_head_t reset_done_wait; + + /** Wait queue for whether the l2 cache has been powered as requested */ + wait_queue_head_t l2_powered_wait; + /** State indicating whether all the l2 caches are powered. + * Non-zero indicates they're *all* powered + * Zero indicates that some (or all) are not powered */ + int l2_powered; + + /** The reference count of active gpu cycle counter users */ + int gpu_cycle_counter_requests; + /** Lock to protect gpu_cycle_counter_requests */ + spinlock_t gpu_cycle_counter_requests_lock; + + /** + * A bit mask identifying the shader cores that the power policy would + * like to be on. The current state of the cores may be different, but + * there should be transitions in progress that will eventually achieve + * this state (assuming that the policy doesn't change its mind in the + * mean time). + */ + u64 desired_shader_state; + /** + * A bit mask indicating which shader cores are currently in a power-on + * transition + */ + u64 powering_on_shader_state; + /** + * A bit mask identifying the tiler cores that the power policy would + * like to be on. @see kbase_pm_device_data:desired_shader_state + */ + u64 desired_tiler_state; + /** + * A bit mask indicating which tiler core are currently in a power-on + * transition + */ + u64 powering_on_tiler_state; + + /** + * A bit mask indicating which l2-caches are currently in a power-on + * transition + */ + u64 powering_on_l2_state; + + /** + * This flag is set if the GPU is powered as requested by the + * desired_xxx_state variables + */ + bool gpu_in_desired_state; + /* Wait queue set when gpu_in_desired_state != 0 */ + wait_queue_head_t gpu_in_desired_state_wait; + + /** + * Set to true when the GPU is powered and register accesses are + * possible, false otherwise + */ + bool gpu_powered; + + /** Set to true when instrumentation is enabled, false otherwise */ + bool instr_enabled; + + bool cg1_disabled; + +#ifdef CONFIG_MALI_DEBUG + /** + * Debug state indicating whether sufficient initialization of the + * driver has occurred to handle IRQs + */ + bool driver_ready_for_irqs; +#endif /* CONFIG_MALI_DEBUG */ + + /** + * Spinlock that must be held when: + * - writing gpu_powered + * - accessing driver_ready_for_irqs (in CONFIG_MALI_DEBUG builds) + */ + spinlock_t gpu_powered_lock; + + /** Structure to hold metrics for the GPU */ + + struct kbasep_pm_metrics_data metrics; + + /** + * Set to the number of poweroff timer ticks until the GPU is powered + * off + */ + int gpu_poweroff_pending; + + /** + * Set to the number of poweroff timer ticks until shaders are powered + * off + */ + int shader_poweroff_pending_time; + + /** Timer for powering off GPU */ + struct hrtimer gpu_poweroff_timer; + + struct workqueue_struct *gpu_poweroff_wq; + + struct work_struct gpu_poweroff_work; + + /** Bit mask of shaders to be powered off on next timer callback */ + u64 shader_poweroff_pending; + + /** + * Set to true if the poweroff timer is currently running, + * false otherwise + */ + bool poweroff_timer_needed; + + /** + * Callback when the GPU needs to be turned on. See + * @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + * + * @return 1 if GPU state was lost, 0 otherwise + */ + int (*callback_power_on)(struct kbase_device *kbdev); + + /** + * Callback when the GPU may be turned off. See + * @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + */ + void (*callback_power_off)(struct kbase_device *kbdev); + + /** + * Callback when a suspend occurs and the GPU needs to be turned off. + * See @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + */ + void (*callback_power_suspend)(struct kbase_device *kbdev); + + /** + * Callback when a resume occurs and the GPU needs to be turned on. + * See @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + */ + void (*callback_power_resume)(struct kbase_device *kbdev); + + /** + * Callback when the GPU needs to be turned on. See + * @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + * + * @return 1 if GPU state was lost, 0 otherwise + */ + int (*callback_power_runtime_on)(struct kbase_device *kbdev); + + /** + * Callback when the GPU may be turned off. See + * @ref kbase_pm_callback_conf + * + * @param kbdev The kbase device + */ + void (*callback_power_runtime_off)(struct kbase_device *kbdev); + +}; + + +/** List of policy IDs */ +enum kbase_pm_policy_id { + KBASE_PM_POLICY_ID_DEMAND = 1, + KBASE_PM_POLICY_ID_ALWAYS_ON, + KBASE_PM_POLICY_ID_COARSE_DEMAND, +#if !MALI_CUSTOMER_RELEASE + KBASE_PM_POLICY_ID_DEMAND_ALWAYS_POWERED, + KBASE_PM_POLICY_ID_FAST_START +#endif +}; + +typedef u32 kbase_pm_policy_flags; + +/** + * Power policy structure. + * + * Each power policy exposes a (static) instance of this structure which + * contains function pointers to the policy's methods. + */ +struct kbase_pm_policy { + /** The name of this policy */ + char *name; + + /** + * Function called when the policy is selected + * + * This should initialize the kbdev->pm.pm_policy_data structure. It + * should not attempt to make any changes to hardware state. + * + * It is undefined what state the cores are in when the function is + * called. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + */ + void (*init)(struct kbase_device *kbdev); + + /** + * Function called when the policy is unselected. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + */ + void (*term)(struct kbase_device *kbdev); + + /** + * Function called to get the current shader core mask + * + * The returned mask should meet or exceed (kbdev->shader_needed_bitmap + * | kbdev->shader_inuse_bitmap). + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + * + * @return The mask of shader cores to be powered + */ + u64 (*get_core_mask)(struct kbase_device *kbdev); + + /** + * Function called to get the current overall GPU power state + * + * This function should consider the state of kbdev->pm.active_count. If + * this count is greater than 0 then there is at least one active + * context on the device and the GPU should be powered. If it is equal + * to 0 then there are no active contexts and the GPU could be powered + * off if desired. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + * + * @return true if the GPU should be powered, false otherwise + */ + bool (*get_core_active)(struct kbase_device *kbdev); + + /** Field indicating flags for this policy */ + kbase_pm_policy_flags flags; + + /** + * Field indicating an ID for this policy. This is not necessarily the + * same as its index in the list returned by kbase_pm_list_policies(). + * It is used purely for debugging. + */ + enum kbase_pm_policy_id id; +}; + + +enum kbase_pm_ca_policy_id { + KBASE_PM_CA_POLICY_ID_FIXED = 1, + KBASE_PM_CA_POLICY_ID_RANDOM +}; + +typedef u32 kbase_pm_ca_policy_flags; + +/** + * Core availability policy structure. + * + * Each core availability policy exposes a (static) instance of this structure + * which contains function pointers to the policy's methods. + */ +struct kbase_pm_ca_policy { + /** The name of this policy */ + char *name; + + /** + * Function called when the policy is selected + * + * This should initialize the kbdev->pm.ca_policy_data structure. It + * should not attempt to make any changes to hardware state. + * + * It is undefined what state the cores are in when the function is + * called. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + */ + void (*init)(struct kbase_device *kbdev); + + /** + * Function called when the policy is unselected. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + */ + void (*term)(struct kbase_device *kbdev); + + /** + * Function called to get the current shader core availability mask + * + * When a change in core availability is occuring, the policy must set + * kbdev->pm.ca_in_transition to true. This is to indicate that + * reporting changes in power state cannot be optimized out, even if + * kbdev->pm.desired_shader_state remains unchanged. This must be done + * by any functions internal to the Core Availability Policy that change + * the return value of kbase_pm_ca_policy::get_core_mask. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + * + * @return The current core availability mask + */ + u64 (*get_core_mask)(struct kbase_device *kbdev); + + /** + * Function called to update the current core status + * + * If none of the cores in core group 0 are ready or transitioning, then + * the policy must ensure that the next call to get_core_mask does not + * return 0 for all cores in core group 0. It is an error to disable + * core group 0 through the core availability policy. + * + * When a change in core availability has finished, the policy must set + * kbdev->pm.ca_in_transition to false. This is to indicate that + * changes in power state can once again be optimized out when + * kbdev->pm.desired_shader_state is unchanged. + * + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) + * @param cores_ready The mask of cores currently powered and + * ready to run jobs + * @param cores_transitioning The mask of cores currently transitioning + * power state + */ + void (*update_core_status)(struct kbase_device *kbdev, u64 cores_ready, + u64 cores_transitioning); + + /** Field indicating flags for this policy */ + kbase_pm_ca_policy_flags flags; + + /** + * Field indicating an ID for this policy. This is not necessarily the + * same as its index in the list returned by kbase_pm_list_policies(). + * It is used purely for debugging. + */ + enum kbase_pm_ca_policy_id id; +}; + +#endif /* _KBASE_PM_HWACCESS_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c new file mode 100644 index 00000000000..4e5b79c9c44 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.c @@ -0,0 +1,72 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_demand.c + * A simple demand based power management policy + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> + +static u64 demand_get_core_mask(struct kbase_device *kbdev) +{ + u64 desired = kbdev->shader_needed_bitmap | kbdev->shader_inuse_bitmap; + + if (0 == kbdev->pm.active_count) + return 0; + + return desired; +} + +static bool demand_get_core_active(struct kbase_device *kbdev) +{ + if (0 == kbdev->pm.active_count) + return false; + + return true; +} + +static void demand_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void demand_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +/** + * The @ref struct kbase_pm_policy structure for the demand power policy. + * + * This is the static structure that defines the demand power policy's callback + * and name. + */ +const struct kbase_pm_policy kbase_pm_demand_policy_ops = { + "demand", /* name */ + demand_init, /* init */ + demand_term, /* term */ + demand_get_core_mask, /* get_core_mask */ + demand_get_core_active, /* get_core_active */ + 0u, /* flags */ + KBASE_PM_POLICY_ID_DEMAND, /* id */ +}; + +KBASE_EXPORT_TEST_API(kbase_pm_demand_policy_ops); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h new file mode 100644 index 00000000000..8b7a17ab984 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_demand.h @@ -0,0 +1,60 @@ +/* + * + * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_demand.h + * A simple demand based power management policy + */ + +#ifndef MALI_KBASE_PM_DEMAND_H +#define MALI_KBASE_PM_DEMAND_H + +/** + * The demand power management policy has the following characteristics: + * - When KBase indicates that the GPU will be powered up, but we don't yet + * know which Job Chains are to be run: + * - The Shader Cores are not powered up + * - When KBase indicates that a set of Shader Cores are needed to submit the + * currently queued Job Chains: + * - Only those Shader Cores are powered up + * - When KBase indicates that the GPU need not be powered: + * - The Shader Cores are powered off, and the GPU itself is powered off too. + * + * @note: + * - KBase indicates the GPU will be powered up when it has a User Process that + * has just started to submit Job Chains. + * - KBase indicates the GPU need not be powered when all the Job Chains from + * User Processes have finished, and it is waiting for a User Process to + * submit some more Job Chains. + */ + +/** + * Private structure for policy instance data. + * + * This contains data that is private to the particular power policy that is + * active. + */ +struct kbasep_pm_policy_demand { + /** No state needed - just have a dummy variable here */ + int dummy; +}; + +extern const struct kbase_pm_policy kbase_pm_demand_policy_ops; + +#endif /* MALI_KBASE_PM_DEMAND_H */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c new file mode 100644 index 00000000000..1d72fd60143 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_driver.c @@ -0,0 +1,1264 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_driver.c + * Base kernel Power Management hardware control + */ + +#include <mali_kbase.h> +#include <mali_kbase_config_defaults.h> +#include <mali_midg_regmap.h> +#if defined(CONFIG_MALI_GATOR_SUPPORT) +#include <mali_kbase_gator.h> +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include <mali_kbase_tlstream.h> +#endif +#include <mali_kbase_pm.h> +#include <mali_kbase_cache_policy.h> +#include <mali_kbase_config_defaults.h> +#include <mali_kbase_smc.h> +#include <backend/gpu/mali_kbase_cache_policy_backend.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_irq_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +#if MALI_MOCK_TEST +#define MOCKABLE(function) function##_original +#else +#define MOCKABLE(function) function +#endif /* MALI_MOCK_TEST */ + +/** + * Actions that can be performed on a core. + * + * This enumeration is private to the file. Its values are set to allow + * @ref core_type_to_reg function, which decodes this enumeration, to be simpler + * and more efficient. + */ +enum kbasep_pm_action { + ACTION_PRESENT = 0, + ACTION_READY = (SHADER_READY_LO - SHADER_PRESENT_LO), + ACTION_PWRON = (SHADER_PWRON_LO - SHADER_PRESENT_LO), + ACTION_PWROFF = (SHADER_PWROFF_LO - SHADER_PRESENT_LO), + ACTION_PWRTRANS = (SHADER_PWRTRANS_LO - SHADER_PRESENT_LO), + ACTION_PWRACTIVE = (SHADER_PWRACTIVE_LO - SHADER_PRESENT_LO) +}; + +static u64 kbase_pm_get_state( + struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + enum kbasep_pm_action action); + +/** + * Decode a core type and action to a register. + * + * Given a core type (defined by @ref kbase_pm_core_type) and an action (defined + * by @ref kbasep_pm_action) this function will return the register offset that + * will perform the action on the core type. The register returned is the \c _LO + * register and an offset must be applied to use the \c _HI register. + * + * @param core_type The type of core + * @param action The type of action + * + * @return The register offset of the \c _LO register that performs an action of + * type \c action on a core of type \c core_type. + */ +static u32 core_type_to_reg(enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) +{ + return (u32)core_type + (u32)action; +} + + +/** Invokes an action on a core set + * + * This function performs the action given by \c action on a set of cores of a + * type given by \c core_type. It is a static function used by + * @ref kbase_pm_transition_core_type + * + * @param kbdev The kbase device structure of the device + * @param core_type The type of core that the action should be performed on + * @param cores A bit mask of cores to perform the action on (low 32 bits) + * @param action The action to perform on the cores + */ +static void kbase_pm_invoke(struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + u64 cores, + enum kbasep_pm_action action) +{ + u32 reg; + u32 lo = cores & 0xFFFFFFFF; + u32 hi = (cores >> 32) & 0xFFFFFFFF; + + lockdep_assert_held(&kbdev->pm.power_change_lock); + + reg = core_type_to_reg(core_type, action); + + KBASE_DEBUG_ASSERT(reg); +#if defined(CONFIG_MALI_GATOR_SUPPORT) + if (cores) { + if (action == ACTION_PWRON) + kbase_trace_mali_pm_power_on(core_type, cores); + else if (action == ACTION_PWROFF) + kbase_trace_mali_pm_power_off(core_type, cores); + } +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) + if (cores) { + u64 state = kbase_pm_get_state(kbdev, core_type, ACTION_READY); + + if (action == ACTION_PWRON) + state |= cores; + else if (action == ACTION_PWROFF) + state &= ~cores; + kbase_tlstream_aux_pm_state(core_type, state); + } +#endif + /* Tracing */ + if (cores) { + if (action == ACTION_PWRON) + switch (core_type) { + case KBASE_PM_CORE_SHADER: + KBASE_TRACE_ADD(kbdev, PM_PWRON, NULL, NULL, 0u, + lo); + break; + case KBASE_PM_CORE_TILER: + KBASE_TRACE_ADD(kbdev, PM_PWRON_TILER, NULL, + NULL, 0u, lo); + break; + case KBASE_PM_CORE_L2: + KBASE_TRACE_ADD(kbdev, PM_PWRON_L2, NULL, NULL, + 0u, lo); + break; + default: + break; + } + else if (action == ACTION_PWROFF) + switch (core_type) { + case KBASE_PM_CORE_SHADER: + KBASE_TRACE_ADD(kbdev, PM_PWROFF, NULL, NULL, + 0u, lo); + break; + case KBASE_PM_CORE_TILER: + KBASE_TRACE_ADD(kbdev, PM_PWROFF_TILER, NULL, + NULL, 0u, lo); + break; + case KBASE_PM_CORE_L2: + KBASE_TRACE_ADD(kbdev, PM_PWROFF_L2, NULL, NULL, + 0u, lo); + break; + default: + break; + } + } + + if (lo != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg), lo, NULL); + + if (hi != 0) + kbase_reg_write(kbdev, GPU_CONTROL_REG(reg + 4), hi, NULL); +} + +/** + * Get information about a core set + * + * This function gets information (chosen by \c action) about a set of cores of + * a type given by \c core_type. It is a static function used by @ref + * kbase_pm_get_present_cores, @ref kbase_pm_get_active_cores, @ref + * kbase_pm_get_trans_cores and @ref kbase_pm_get_ready_cores. + * + * @param kbdev The kbase device structure of the device + * @param core_type The type of core that the should be queried + * @param action The property of the cores to query + * + * @return A bit mask specifying the state of the cores + */ +static u64 kbase_pm_get_state(struct kbase_device *kbdev, + enum kbase_pm_core_type core_type, + enum kbasep_pm_action action) +{ + u32 reg; + u32 lo, hi; + + reg = core_type_to_reg(core_type, action); + + KBASE_DEBUG_ASSERT(reg); + + lo = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg), NULL); + hi = kbase_reg_read(kbdev, GPU_CONTROL_REG(reg + 4), NULL); + + return (((u64) hi) << 32) | ((u64) lo); +} + +void kbasep_pm_read_present_cores(struct kbase_device *kbdev) +{ + kbdev->shader_present_bitmap = + kbase_pm_get_state(kbdev, KBASE_PM_CORE_SHADER, ACTION_PRESENT); + kbdev->tiler_present_bitmap = + kbase_pm_get_state(kbdev, KBASE_PM_CORE_TILER, ACTION_PRESENT); + kbdev->l2_present_bitmap = + kbase_pm_get_state(kbdev, KBASE_PM_CORE_L2, ACTION_PRESENT); + kbdev->shader_inuse_bitmap = 0; + kbdev->shader_needed_bitmap = 0; + kbdev->shader_available_bitmap = 0; + kbdev->tiler_available_bitmap = 0; + kbdev->l2_users_count = 0; + kbdev->l2_available_bitmap = 0; + kbdev->tiler_needed_cnt = 0; + kbdev->tiler_inuse_cnt = 0; + + memset(kbdev->shader_needed_cnt, 0, sizeof(kbdev->shader_needed_cnt)); +} + +KBASE_EXPORT_TEST_API(kbasep_pm_read_present_cores); + +/** + * Get the cores that are present + */ +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + switch (type) { + case KBASE_PM_CORE_L2: + return kbdev->l2_present_bitmap; + case KBASE_PM_CORE_SHADER: + return kbdev->shader_present_bitmap; + case KBASE_PM_CORE_TILER: + return kbdev->tiler_present_bitmap; + } + KBASE_DEBUG_ASSERT(0); + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_present_cores); + +/** + * Get the cores that are "active" (busy processing work) + */ +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + return kbase_pm_get_state(kbdev, type, ACTION_PWRACTIVE); +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_active_cores); + +/** + * Get the cores that are transitioning between power states + */ +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + return kbase_pm_get_state(kbdev, type, ACTION_PWRTRANS); +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_trans_cores); + +/** + * Get the cores that are powered on + */ +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type) +{ + u64 result; + + result = kbase_pm_get_state(kbdev, type, ACTION_READY); + + switch (type) { + case KBASE_PM_CORE_SHADER: + KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED, NULL, NULL, 0u, + (u32) result); + break; + case KBASE_PM_CORE_TILER: + KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_TILER, NULL, NULL, 0u, + (u32) result); + break; + case KBASE_PM_CORE_L2: + KBASE_TRACE_ADD(kbdev, PM_CORES_POWERED_L2, NULL, NULL, 0u, + (u32) result); + break; + default: + break; + } + + return result; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_ready_cores); + +/** + * Perform power transitions for a particular core type. + * + * This function will perform any available power transitions to make the actual + * hardware state closer to the desired state. If a core is currently + * transitioning then changes to the power state of that call cannot be made + * until the transition has finished. Cores which are not present in the + * hardware are ignored if they are specified in the desired_state bitmask, + * however the return value will always be 0 in this case. + * + * @param kbdev The kbase device + * @param type The core type to perform transitions for + * @param desired_state A bit mask of the desired state of the cores + * @param in_use A bit mask of the cores that are currently running + * jobs. These cores have to be kept powered up because + * there are jobs running (or about to run) on them. + * @param[out] available Receives a bit mask of the cores that the job + * scheduler can use to submit jobs to. May be NULL if + * this is not needed. + * @param[in,out] powering_on Bit mask to update with cores that are + * transitioning to a power-on state. + * + * @return true if the desired state has been reached, false otherwise + */ +static bool kbase_pm_transition_core_type(struct kbase_device *kbdev, + enum kbase_pm_core_type type, + u64 desired_state, + u64 in_use, + u64 * const available, + u64 *powering_on) +{ + u64 present; + u64 ready; + u64 trans; + u64 powerup; + u64 powerdown; + u64 powering_on_trans; + u64 desired_state_in_use; + + lockdep_assert_held(&kbdev->pm.power_change_lock); + + /* Get current state */ + present = kbase_pm_get_present_cores(kbdev, type); + trans = kbase_pm_get_trans_cores(kbdev, type); + ready = kbase_pm_get_ready_cores(kbdev, type); + /* mask off ready from trans in case transitions finished between the + * register reads */ + trans &= ~ready; + + powering_on_trans = trans & *powering_on; + *powering_on = powering_on_trans; + + if (available != NULL) + *available = (ready | powering_on_trans) & desired_state; + + /* Update desired state to include the in-use cores. These have to be + * kept powered up because there are jobs running or about to run on + * these cores + */ + desired_state_in_use = desired_state | in_use; + + /* Update state of whether l2 caches are powered */ + if (type == KBASE_PM_CORE_L2) { + if ((ready == present) && (desired_state_in_use == ready) && + (trans == 0)) { + /* All are ready, none will be turned off, and none are + * transitioning */ + kbdev->pm.backend.l2_powered = 1; + if (kbdev->l2_users_count > 0) { + /* Notify any registered l2 cache users + * (optimized out when no users waiting) */ + wake_up(&kbdev->pm.backend.l2_powered_wait); + } + } else + kbdev->pm.backend.l2_powered = 0; + } + + if (desired_state_in_use == ready && (trans == 0)) + return true; + + /* Restrict the cores to those that are actually present */ + powerup = desired_state_in_use & present; + powerdown = (~desired_state_in_use) & present; + + /* Restrict to cores that are not already in the desired state */ + powerup &= ~ready; + powerdown &= ready; + + /* Don't transition any cores that are already transitioning, except for + * Mali cores that support the following case: + * + * If the SHADER_PWRON or TILER_PWRON registers are written to turn on + * a core that is currently transitioning to power off, then this is + * remembered and the shader core is automatically powered up again once + * the original transition completes. Once the automatic power on is + * complete any job scheduled on the shader core should start. + */ + powerdown &= ~trans; + + if (kbase_hw_has_feature(kbdev, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS)) + if (KBASE_PM_CORE_SHADER == type || KBASE_PM_CORE_TILER == type) + trans = powering_on_trans; /* for exception cases, only + * mask off cores in power on + * transitions */ + + powerup &= ~trans; + + /* Perform transitions if any */ + kbase_pm_invoke(kbdev, type, powerup, ACTION_PWRON); + kbase_pm_invoke(kbdev, type, powerdown, ACTION_PWROFF); + + /* Recalculate cores transitioning on, and re-evaluate our state */ + powering_on_trans |= powerup; + *powering_on = powering_on_trans; + if (available != NULL) + *available = (ready | powering_on_trans) & desired_state; + + return false; +} + +KBASE_EXPORT_TEST_API(kbase_pm_transition_core_type); + +/** + * Determine which caches should be on for a particular core state. + * + * This function takes a bit mask of the present caches and the cores (or + * caches) that are attached to the caches that will be powered. It then + * computes which caches should be turned on to allow the cores requested to be + * powered up. + * + * @param present The bit mask of present caches + * @param cores_powered A bit mask of cores (or L2 caches) that are desired to + * be powered + * + * @return A bit mask of the caches that should be turned on + */ +static u64 get_desired_cache_status(u64 present, u64 cores_powered) +{ + u64 desired = 0; + + while (present) { + /* Find out which is the highest set bit */ + u64 bit = fls64(present) - 1; + u64 bit_mask = 1ull << bit; + /* Create a mask which has all bits from 'bit' upwards set */ + + u64 mask = ~(bit_mask - 1); + + /* If there are any cores powered at this bit or above (that + * haven't previously been processed) then we need this core on + */ + if (cores_powered & mask) + desired |= bit_mask; + + /* Remove bits from cores_powered and present */ + cores_powered &= ~mask; + present &= ~bit_mask; + } + + return desired; +} + +KBASE_EXPORT_TEST_API(get_desired_cache_status); + +bool +MOCKABLE(kbase_pm_check_transitions_nolock) (struct kbase_device *kbdev) +{ + bool cores_are_available = false; + bool in_desired_state = true; + u64 desired_l2_state; + u64 cores_powered; + u64 tiler_available_bitmap; + u64 shader_available_bitmap; + u64 shader_ready_bitmap; + u64 shader_transitioning_bitmap; + u64 l2_available_bitmap; + u64 prev_l2_available_bitmap; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.power_change_lock); + + spin_lock(&kbdev->pm.backend.gpu_powered_lock); + if (kbdev->pm.backend.gpu_powered == false) { + spin_unlock(&kbdev->pm.backend.gpu_powered_lock); + if (kbdev->pm.backend.desired_shader_state == 0 && + kbdev->pm.backend.desired_tiler_state == 0) + return true; + return false; + } + + /* Trace that a change-state is being requested, and that it took + * (effectively) no time to start it. This is useful for counting how + * many state changes occurred, in a way that's backwards-compatible + * with processing the trace data */ + kbase_timeline_pm_send_event(kbdev, + KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); + kbase_timeline_pm_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE); + + /* If any cores are already powered then, we must keep the caches on */ + cores_powered = kbase_pm_get_ready_cores(kbdev, KBASE_PM_CORE_SHADER); + + cores_powered |= kbdev->pm.backend.desired_shader_state; + + /* If there are l2 cache users registered, keep all l2s powered even if + * all other cores are off. */ + if (kbdev->l2_users_count > 0) + cores_powered |= kbdev->l2_present_bitmap; + + desired_l2_state = get_desired_cache_status(kbdev->l2_present_bitmap, + cores_powered); + + /* If any l2 cache is on, then enable l2 #0, for use by job manager */ + if (0 != desired_l2_state) { + desired_l2_state |= 1; + /* Also enable tiler if l2 cache is powered */ + kbdev->pm.backend.desired_tiler_state = + kbdev->tiler_present_bitmap; + } else { + kbdev->pm.backend.desired_tiler_state = 0; + } + + prev_l2_available_bitmap = kbdev->l2_available_bitmap; + in_desired_state &= kbase_pm_transition_core_type(kbdev, + KBASE_PM_CORE_L2, desired_l2_state, 0, + &l2_available_bitmap, + &kbdev->pm.backend.powering_on_l2_state); + + if (kbdev->l2_available_bitmap != l2_available_bitmap) + KBASE_TIMELINE_POWER_L2(kbdev, l2_available_bitmap); + + kbdev->l2_available_bitmap = l2_available_bitmap; + + if (in_desired_state) { + in_desired_state &= kbase_pm_transition_core_type(kbdev, + KBASE_PM_CORE_TILER, + kbdev->pm.backend.desired_tiler_state, + 0, &tiler_available_bitmap, + &kbdev->pm.backend.powering_on_tiler_state); + in_desired_state &= kbase_pm_transition_core_type(kbdev, + KBASE_PM_CORE_SHADER, + kbdev->pm.backend.desired_shader_state, + kbdev->shader_inuse_bitmap, + &shader_available_bitmap, + &kbdev->pm.backend.powering_on_shader_state); + + if (kbdev->shader_available_bitmap != shader_available_bitmap) { + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + NULL, 0u, + (u32) shader_available_bitmap); + KBASE_TIMELINE_POWER_SHADER(kbdev, + shader_available_bitmap); + } + + kbdev->shader_available_bitmap = shader_available_bitmap; + + if (kbdev->tiler_available_bitmap != tiler_available_bitmap) { + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, + (u32) tiler_available_bitmap); + KBASE_TIMELINE_POWER_TILER(kbdev, + tiler_available_bitmap); + } + + kbdev->tiler_available_bitmap = tiler_available_bitmap; + + } else if ((l2_available_bitmap & kbdev->tiler_present_bitmap) != + kbdev->tiler_present_bitmap) { + tiler_available_bitmap = 0; + + if (kbdev->tiler_available_bitmap != tiler_available_bitmap) + KBASE_TIMELINE_POWER_TILER(kbdev, + tiler_available_bitmap); + + kbdev->tiler_available_bitmap = tiler_available_bitmap; + } + + /* State updated for slow-path waiters */ + kbdev->pm.backend.gpu_in_desired_state = in_desired_state; + + shader_ready_bitmap = kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_SHADER); + shader_transitioning_bitmap = kbase_pm_get_trans_cores(kbdev, + KBASE_PM_CORE_SHADER); + + /* Determine whether the cores are now available (even if the set of + * available cores is empty). Note that they can be available even if + * we've not finished transitioning to the desired state */ + if ((kbdev->shader_available_bitmap & + kbdev->pm.backend.desired_shader_state) + == kbdev->pm.backend.desired_shader_state && + (kbdev->tiler_available_bitmap & + kbdev->pm.backend.desired_tiler_state) + == kbdev->pm.backend.desired_tiler_state) { + cores_are_available = true; + + KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE, NULL, NULL, 0u, + (u32)(kbdev->shader_available_bitmap & + kbdev->pm.backend.desired_shader_state)); + KBASE_TRACE_ADD(kbdev, PM_CORES_AVAILABLE_TILER, NULL, NULL, 0u, + (u32)(kbdev->tiler_available_bitmap & + kbdev->pm.backend.desired_tiler_state)); + + /* Log timelining information about handling events that power + * up cores, to match up either with immediate submission either + * because cores already available, or from PM IRQ */ + if (!in_desired_state) + kbase_timeline_pm_send_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + } + + if (in_desired_state) { + KBASE_DEBUG_ASSERT(cores_are_available); + +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_pm_status(KBASE_PM_CORE_L2, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_L2)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_SHADER, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_SHADER)); + kbase_trace_mali_pm_status(KBASE_PM_CORE_TILER, + kbase_pm_get_ready_cores(kbdev, + KBASE_PM_CORE_TILER)); +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pm_state( + KBASE_PM_CORE_L2, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_L2)); + kbase_tlstream_aux_pm_state( + KBASE_PM_CORE_SHADER, + kbase_pm_get_ready_cores( + kbdev, KBASE_PM_CORE_SHADER)); + kbase_tlstream_aux_pm_state( + KBASE_PM_CORE_TILER, + kbase_pm_get_ready_cores( + kbdev, + KBASE_PM_CORE_TILER)); +#endif + + KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED, NULL, NULL, + kbdev->pm.backend.gpu_in_desired_state, + (u32)kbdev->pm.backend.desired_shader_state); + KBASE_TRACE_ADD(kbdev, PM_DESIRED_REACHED_TILER, NULL, NULL, 0u, + (u32)kbdev->pm.backend.desired_tiler_state); + + /* Log timelining information for synchronous waiters */ + kbase_timeline_pm_send_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); + /* Wake slow-path waiters. Job scheduler does not use this. */ + KBASE_TRACE_ADD(kbdev, PM_WAKE_WAITERS, NULL, NULL, 0u, 0); + wake_up(&kbdev->pm.backend.gpu_in_desired_state_wait); + } + + spin_unlock(&kbdev->pm.backend.gpu_powered_lock); + + /* kbase_pm_ca_update_core_status can cause one-level recursion into + * this function, so it must only be called once all changes to kbdev + * have been committed, and after the gpu_powered_lock has been + * dropped. */ + if (kbdev->shader_ready_bitmap != shader_ready_bitmap || + kbdev->shader_transitioning_bitmap != shader_transitioning_bitmap) { + kbdev->shader_ready_bitmap = shader_ready_bitmap; + kbdev->shader_transitioning_bitmap = + shader_transitioning_bitmap; + + kbase_pm_ca_update_core_status(kbdev, shader_ready_bitmap, + shader_transitioning_bitmap); + } + + /* The core availability policy is not allowed to keep core group 0 + * turned off (unless it was changing the l2 power state) */ + if (!((shader_ready_bitmap | shader_transitioning_bitmap) & + kbdev->gpu_props.props.coherency_info.group[0].core_mask) && + (prev_l2_available_bitmap == desired_l2_state) && + !(kbase_pm_ca_get_core_mask(kbdev) & + kbdev->gpu_props.props.coherency_info.group[0].core_mask)) + BUG(); + + /* The core availability policy is allowed to keep core group 1 off, + * but all jobs specifically targeting CG1 must fail */ + if (!((shader_ready_bitmap | shader_transitioning_bitmap) & + kbdev->gpu_props.props.coherency_info.group[1].core_mask) && + !(kbase_pm_ca_get_core_mask(kbdev) & + kbdev->gpu_props.props.coherency_info.group[1].core_mask)) + kbdev->pm.backend.cg1_disabled = true; + else + kbdev->pm.backend.cg1_disabled = false; + + return cores_are_available; +} +KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_nolock); + +void kbase_pm_check_transitions_sync(struct kbase_device *kbdev) +{ + unsigned long flags; + bool cores_are_available; + /* Force the transition to be checked and reported - the cores may be + * 'available' (for job submission) but not fully powered up. */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + /* Don't need 'cores_are_available', because we don't return anything */ + CSTD_UNUSED(cores_are_available); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + /* Wait for cores */ + wait_event(kbdev->pm.backend.gpu_in_desired_state_wait, + kbdev->pm.backend.gpu_in_desired_state); + + /* Log timelining information that a change in state has completed */ + kbase_timeline_pm_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); +} +KBASE_EXPORT_TEST_API(kbase_pm_check_transitions_sync); + +void kbase_pm_enable_interrupts(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Clear all interrupts, + * and unmask them all. + */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, + NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, + NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, + NULL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF, NULL); +} + +KBASE_EXPORT_TEST_API(kbase_pm_enable_interrupts); + +void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Clear all interrupts, + * and unmask them all. + */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, + NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), GPU_IRQ_REG_ALL, + NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, + NULL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF, NULL); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), mask, NULL); +} + +void kbase_pm_disable_interrupts(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + /* + * Mask all interrupts, + * and clear them all. + */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0, NULL); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), GPU_IRQ_REG_ALL, + NULL); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0, NULL); + kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF, + NULL); + + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0, NULL); + kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF, NULL); +} + +KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts); + +/* + * pmu layout: + * 0x0000: PMU TAG (RO) (0xCAFECAFE) + * 0x0004: PMU VERSION ID (RO) (0x00000000) + * 0x0008: CLOCK ENABLE (RW) (31:1 SBZ, 0 CLOCK STATE) + */ +void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume) +{ + bool reset_required = is_resume; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + unsigned long flags; + int i; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&kbdev->pm.lock); + + if (kbdev->pm.backend.gpu_powered) { + /* Already turned on */ + if (kbdev->poweroff_pending) + kbase_pm_enable_interrupts(kbdev); + kbdev->poweroff_pending = false; + KBASE_DEBUG_ASSERT(!is_resume); + return; + } + + kbdev->poweroff_pending = false; + + KBASE_TRACE_ADD(kbdev, PM_GPU_ON, NULL, NULL, 0u, 0u); + + if (is_resume && kbdev->pm.backend.callback_power_resume) { + kbdev->pm.backend.callback_power_resume(kbdev); + } else if (kbdev->pm.backend.callback_power_on) { + kbdev->pm.backend.callback_power_on(kbdev); + /* If your platform properly keeps the GPU state you may use the + * return value of the callback_power_on function to + * conditionally reset the GPU on power up. Currently we are + * conservative and always reset the GPU. */ + reset_required = true; + } + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + kbdev->pm.backend.gpu_powered = true; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (reset_required) { + /* GPU state was lost, reset GPU to ensure it is in a + * consistent state */ + kbase_pm_init_hw(kbdev, PM_ENABLE_IRQS); + } + + /* Reprogram the GPU's MMU */ + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbase_as *as = &kbdev->as[i]; + + mutex_lock(&as->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + if (js_devdata->runpool_irq.per_as_data[i].kctx) + kbase_mmu_update( + js_devdata->runpool_irq.per_as_data[i].kctx); + else + kbase_mmu_disable_as(kbdev, i); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&as->transaction_mutex); + } + + /* Lastly, enable the interrupts */ + kbase_pm_enable_interrupts(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_clock_on); + +bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* ASSERT that the cores should now be unavailable. No lock needed. */ + KBASE_DEBUG_ASSERT(kbdev->shader_available_bitmap == 0u); + + kbdev->poweroff_pending = true; + + if (!kbdev->pm.backend.gpu_powered) { + /* Already turned off */ + if (is_suspend && kbdev->pm.backend.callback_power_suspend) + kbdev->pm.backend.callback_power_suspend(kbdev); + return true; + } + + KBASE_TRACE_ADD(kbdev, PM_GPU_OFF, NULL, NULL, 0u, 0u); + + /* Disable interrupts. This also clears any outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Ensure that any IRQ handlers have finished */ + kbase_synchronize_irqs(kbdev); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (atomic_read(&kbdev->faults_pending)) { + /* Page/bus faults are still being processed. The GPU can not + * be powered off until they have completed */ + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + flags); + return false; + } + + /* The GPU power may be turned off from this point */ + kbdev->pm.backend.gpu_powered = false; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, flags); + + if (is_suspend && kbdev->pm.backend.callback_power_suspend) + kbdev->pm.backend.callback_power_suspend(kbdev); + else if (kbdev->pm.backend.callback_power_off) + kbdev->pm.backend.callback_power_off(kbdev); + return true; +} + +KBASE_EXPORT_TEST_API(kbase_pm_clock_off); + +struct kbasep_reset_timeout_data { + struct hrtimer timer; + bool timed_out; + struct kbase_device *kbdev; +}; + +void kbase_pm_reset_done(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + kbdev->pm.backend.reset_done = true; + wake_up(&kbdev->pm.backend.reset_done_wait); +} + +/** + * Wait for the RESET_COMPLETED IRQ to occur, then reset the waiting state. + */ +static void kbase_pm_wait_for_reset(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.lock); + + wait_event(kbdev->pm.backend.reset_done_wait, + (kbdev->pm.backend.reset_done)); + kbdev->pm.backend.reset_done = false; +} + +KBASE_EXPORT_TEST_API(kbase_pm_reset_done); + +static enum hrtimer_restart kbasep_reset_timeout(struct hrtimer *timer) +{ + struct kbasep_reset_timeout_data *rtdata = + container_of(timer, struct kbasep_reset_timeout_data, timer); + + rtdata->timed_out = 1; + + /* Set the wait queue to wake up kbase_pm_init_hw even though the reset + * hasn't completed */ + kbase_pm_reset_done(rtdata->kbdev); + + return HRTIMER_NORESTART; +} + +static void kbase_pm_hw_issues_detect(struct kbase_device *kbdev) +{ + kbdev->hw_quirks_sc = 0; + + /* Needed due to MIDBASE-1494: LS_PAUSEBUFFER_DISABLE. See PRLAM-8443. + * and needed due to MIDGLES-3539. See PRLAM-11035 */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8443) || + kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_11035)) + kbdev->hw_quirks_sc |= SC_LS_PAUSEBUFFER_DISABLE; + + /* Needed due to MIDBASE-2054: SDC_DISABLE_OQ_DISCARD. See PRLAM-10327. + */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10327)) + kbdev->hw_quirks_sc |= SC_SDC_DISABLE_OQ_DISCARD; + + /* Enable alternative hardware counter selection if configured. */ + if (DEFAULT_ALTERNATIVE_HWC) + kbdev->hw_quirks_sc |= SC_ALT_COUNTERS; + + /* Needed due to MIDBASE-2795. ENABLE_TEXGRD_FLAGS. See PRLAM-10797. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10797)) + kbdev->hw_quirks_sc |= SC_ENABLE_TEXGRD_FLAGS; + + kbdev->hw_quirks_tiler = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TILER_CONFIG), NULL); + + /* Set tiler clock gate override if required */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_T76X_3953)) + kbdev->hw_quirks_tiler |= TC_CLOCK_GATE_OVERRIDE; + + /* Limit the GPU bus bandwidth if the platform needs this. */ + kbdev->hw_quirks_mmu = kbase_reg_read(kbdev, + GPU_CONTROL_REG(L2_MMU_CONFIG), NULL); + + /* Limit read ID width for AXI */ + kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS); + kbdev->hw_quirks_mmu |= (DEFAULT_ARID_LIMIT & 0x3) << + L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT; + + /* Limit write ID width for AXI */ + kbdev->hw_quirks_mmu &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES); + kbdev->hw_quirks_mmu |= (DEFAULT_AWID_LIMIT & 0x3) << + L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT; +} + +static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev) +{ + if (kbdev->hw_quirks_sc) + kbase_reg_write(kbdev, GPU_CONTROL_REG(SHADER_CONFIG), + kbdev->hw_quirks_sc, NULL); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(TILER_CONFIG), + kbdev->hw_quirks_tiler, NULL); + + kbase_reg_write(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG), + kbdev->hw_quirks_mmu, NULL); +} + + +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags) +{ + unsigned long irq_flags; + struct kbasep_reset_timeout_data rtdata; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + lockdep_assert_held(&kbdev->pm.lock); + + /* Ensure the clock is on before attempting to access the hardware */ + if (!kbdev->pm.backend.gpu_powered) { + if (kbdev->pm.backend.callback_power_on) + kbdev->pm.backend.callback_power_on(kbdev); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_powered_lock, + irq_flags); + kbdev->pm.backend.gpu_powered = true; + spin_unlock_irqrestore(&kbdev->pm.backend.gpu_powered_lock, + irq_flags); + } + + /* Ensure interrupts are off to begin with, this also clears any + * outstanding interrupts */ + kbase_pm_disable_interrupts(kbdev); + /* Prepare for the soft-reset */ + kbdev->pm.backend.reset_done = false; + + /* The cores should be made unavailable due to the reset */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, irq_flags); + if (kbdev->shader_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE, NULL, + NULL, 0u, (u32)0u); + if (kbdev->tiler_available_bitmap != 0u) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, + NULL, NULL, 0u, (u32)0u); + kbdev->shader_available_bitmap = 0u; + kbdev->tiler_available_bitmap = 0u; + kbdev->l2_available_bitmap = 0u; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, irq_flags); + + /* Soft reset the GPU */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_SOFT_RESET, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_SOFT_RESET, NULL); + + /* Unmask the reset complete interrupt only */ + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), RESET_COMPLETED, + NULL); + + /* Initialize a structure for tracking the status of the reset */ + rtdata.kbdev = kbdev; + rtdata.timed_out = 0; + + /* Create a timer to use as a timeout on the reset */ + hrtimer_init_on_stack(&rtdata.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rtdata.timer.function = kbasep_reset_timeout; + + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), + HRTIMER_MODE_REL); + + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + + if (rtdata.timed_out == 0) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + goto out; + } + + /* No interrupt has been received - check if the RAWSTAT register says + * the reset has completed */ + if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT), NULL) & + RESET_COMPLETED) { + /* The interrupt is set in the RAWSTAT; this suggests that the + * interrupts are not getting to the CPU */ + dev_warn(kbdev->dev, "Reset interrupt didn't reach CPU. Check interrupt assignments.\n"); + /* If interrupts aren't working we can't continue. */ + destroy_hrtimer_on_stack(&rtdata.timer); + goto out; + } + + /* The GPU doesn't seem to be responding to the reset so try a hard + * reset */ + dev_err(kbdev->dev, "Failed to soft-reset GPU (timed out after %d ms), now attempting a hard reset\n", + RESET_TIMEOUT); + KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_HARD_RESET, NULL); + + /* Restart the timer to wait for the hard reset to complete */ + rtdata.timed_out = 0; + + hrtimer_start(&rtdata.timer, HR_TIMER_DELAY_MSEC(RESET_TIMEOUT), + HRTIMER_MODE_REL); + + /* Wait for the RESET_COMPLETED interrupt to be raised */ + kbase_pm_wait_for_reset(kbdev); + + if (rtdata.timed_out == 0) { + /* GPU has been reset */ + hrtimer_cancel(&rtdata.timer); + destroy_hrtimer_on_stack(&rtdata.timer); + goto out; + } + + destroy_hrtimer_on_stack(&rtdata.timer); + + dev_err(kbdev->dev, "Failed to hard-reset the GPU (timed out after %d ms)\n", + RESET_TIMEOUT); + + /* The GPU still hasn't reset, give up */ + return -EINVAL; + +out: + + if (flags & PM_HW_ISSUES_DETECT) + kbase_pm_hw_issues_detect(kbdev); + + kbase_pm_hw_issues_apply(kbdev); + + + /* If cycle counter was in use re-enable it, enable_irqs will only be + * false when called from kbase_pm_powerup */ + if (kbdev->pm.backend.gpu_cycle_counter_requests && + (flags & PM_ENABLE_IRQS)) { + /* enable interrupts as the L2 may have to be powered on */ + kbase_pm_enable_interrupts(kbdev); + kbase_pm_request_l2_caches(kbdev); + + /* Re-enable the counters if we need to */ + spin_lock_irqsave( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + if (kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START, NULL); + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + irq_flags); + + kbase_pm_release_l2_caches(kbdev); + kbase_pm_disable_interrupts(kbdev); + } + + if (flags & PM_ENABLE_IRQS) + kbase_pm_enable_interrupts(kbdev); + + return 0; +} + +/** + * Increase the count of cycle counter users and turn the cycle counters on if + * they were previously off + * + * This function is designed to be called by + * @ref kbase_pm_request_gpu_cycle_counter or + * @ref kbase_pm_request_gpu_cycle_counter_l2_is_on only + * + * When this function is called the l2 cache must be on and the l2 cache users + * count must have been incremented by a call to (@ref + * kbase_pm_request_l2_caches or @ref kbase_pm_request_l2_caches_l2_on) + * + * @param kbdev The kbase device structure of the device + * + */ +static void +kbase_pm_request_gpu_cycle_counter_do_request(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); + + ++kbdev->pm.backend.gpu_cycle_counter_requests; + + if (1 == kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_START, NULL); + + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); +} + +void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < + INT_MAX); + + kbase_pm_request_l2_caches(kbdev); + + kbase_pm_request_gpu_cycle_counter_do_request(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter); + +void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_powered); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests < + INT_MAX); + + kbase_pm_request_l2_caches_l2_is_on(kbdev); + + kbase_pm_request_gpu_cycle_counter_do_request(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_gpu_cycle_counter_l2_is_on); + +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_cycle_counter_requests > 0); + + --kbdev->pm.backend.gpu_cycle_counter_requests; + + if (0 == kbdev->pm.backend.gpu_cycle_counter_requests) + kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), + GPU_COMMAND_CYCLE_COUNT_STOP, NULL); + + spin_unlock_irqrestore( + &kbdev->pm.backend.gpu_cycle_counter_requests_lock, + flags); + + kbase_pm_release_l2_caches(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_gpu_cycle_counter); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h new file mode 100644 index 00000000000..a4bd11a1c0f --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_internal.h @@ -0,0 +1,557 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_hwaccess_internal.h + * Power management API definitions used internally by GPU backend + */ + +#ifndef _KBASE_BACKEND_PM_INTERNAL_H_ +#define _KBASE_BACKEND_PM_INTERNAL_H_ + +#include <mali_kbase_hwaccess_pm.h> + +#include "mali_kbase_pm_ca.h" +#include "mali_kbase_pm_policy.h" + + +/** + * The GPU is idle. + * + * The OS may choose to turn off idle devices + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_dev_idle(struct kbase_device *kbdev); + +/** + * The GPU is active. + * + * The OS should avoid opportunistically turning off the GPU while it is active + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_dev_activate(struct kbase_device *kbdev); + +/** + * Get details of the cores that are present in the device. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) present in the GPU device and also a count of + * the number of cores. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param type The type of core (see the @ref enum kbase_pm_core_type + * enumeration) + * + * @return The bit mask of cores present + */ +u64 kbase_pm_get_present_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * Get details of the cores that are currently active in the device. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are actively processing work (i.e. + * turned on *and* busy). + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param type The type of core (see the @ref enum kbase_pm_core_type + * enumeration) + * + * @return The bit mask of active cores + */ +u64 kbase_pm_get_active_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * Get details of the cores that are currently transitioning between power + * states. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are currently transitioning between + * power states. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param type The type of core (see the @ref enum kbase_pm_core_type + * enumeration) + * + * @return The bit mask of transitioning cores + */ +u64 kbase_pm_get_trans_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * Get details of the cores that are currently powered and ready for jobs. + * + * This function can be called by the active power policy to return a bitmask of + * the cores (of a specified type) that are powered and ready for jobs (they may + * or may not be currently executing jobs). + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param type The type of core (see the @ref enum kbase_pm_core_type + * enumeration) + * + * @return The bit mask of ready cores + */ +u64 kbase_pm_get_ready_cores(struct kbase_device *kbdev, + enum kbase_pm_core_type type); + +/** + * Turn the clock for the device on, and enable device interrupts. + * + * This function can be used by a power policy to turn the clock for the GPU on. + * It should be modified during integration to perform the necessary actions to + * ensure that the GPU is fully powered and clocked. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param is_resume true if clock on due to resume after suspend, + * false otherwise + */ +void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume); + +/** + * Disable device interrupts, and turn the clock for the device off. + * + * This function can be used by a power policy to turn the clock for the GPU + * off. It should be modified during integration to perform the necessary + * actions to turn the clock off (if this is possible in the integration). + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param is_suspend true if clock off due to suspend, false otherwise + * + * @return true if clock was turned off + * false if clock can not be turned off due to pending page/bus fault + * workers. Caller must flush MMU workqueues and retry + */ +bool kbase_pm_clock_off(struct kbase_device *kbdev, bool is_suspend); + +/** + * Enable interrupts on the device. + * + * Interrupts are also enabled after a call to kbase_pm_clock_on(). + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_enable_interrupts(struct kbase_device *kbdev); + +/** + * Enable interrupts on the device, using the provided mask to set MMU_IRQ_MASK. + * + * Interrupts are also enabled after a call to kbase_pm_clock_on(). + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param mask The mask to use for MMU_IRQ_MASK + */ +void kbase_pm_enable_interrupts_mmu_mask(struct kbase_device *kbdev, u32 mask); + +/** + * Disable interrupts on the device. + * + * This prevents delivery of Power Management interrupts to the CPU so that + * kbase_pm_check_transitions_nolock() will not be called from the IRQ handler + * until @ref kbase_pm_enable_interrupts or kbase_pm_clock_on() is called. + * + * Interrupts are also disabled after a call to kbase_pm_clock_off(). + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_disable_interrupts(struct kbase_device *kbdev); + +/** + * kbase_pm_init_hw - Initialize the hardware. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @flags: Flags specifying the type of PM init + * + * This function checks the GPU ID register to ensure that the GPU is supported + * by the driver and performs a reset on the device so that it is in a known + * state before the device is used. + * + * Return: 0 if the device is supported and successfully reset. + */ +int kbase_pm_init_hw(struct kbase_device *kbdev, unsigned int flags); + +/** + * The GPU has been reset successfully. + * + * This function must be called by the GPU interrupt handler when the + * RESET_COMPLETED bit is set. It signals to the power management initialization + * code that the GPU has been successfully reset. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_reset_done(struct kbase_device *kbdev); + + +/** + * Check if there are any power transitions to make, and if so start them. + * + * This function will check the desired_xx_state members of + * struct kbase_pm_device_data and the actual status of the hardware to see if + * any power transitions can be made at this time to make the hardware state + * closer to the state desired by the power policy. + * + * The return value can be used to check whether all the desired cores are + * available, and so whether it's worth submitting a job (e.g. from a Power + * Management IRQ). + * + * Note that this still returns true when desired_xx_state has no + * cores. That is: of the no cores desired, none were <em>un</em>available. In + * this case, the caller may still need to try submitting jobs. This is because + * the Core Availability Policy might have taken us to an intermediate state + * where no cores are powered, before powering on more cores (e.g. for core + * rotation) + * + * The caller must hold kbase_device::pm::power_change_lock + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @return non-zero when all desired cores are available. That is, + * it's worthwhile for the caller to submit a job. + * @return false otherwise + */ +bool kbase_pm_check_transitions_nolock(struct kbase_device *kbdev); + +/** + * Synchronous and locking variant of kbase_pm_check_transitions_nolock() + * + * On returning, the desired state at the time of the call will have been met. + * + * @note There is nothing to stop the core being switched off by calls to + * kbase_pm_release_cores() or kbase_pm_unrequest_cores(). Therefore, the + * caller must have already made a call to + * kbase_pm_request_cores()/kbase_pm_request_cores_sync() previously. + * + * The usual use-case for this is to ensure cores are 'READY' after performing + * a GPU Reset. + * + * Unlike kbase_pm_check_transitions_nolock(), the caller must not hold + * kbase_device::pm::power_change_lock, because this function will take that + * lock itself. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_check_transitions_sync(struct kbase_device *kbdev); + +/** + * Variant of kbase_pm_update_cores_state() where the caller must hold + * kbase_device::pm::power_change_lock + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev); + +/** + * Update the desired state of shader cores from the Power Policy, and begin + * any power transitions. + * + * This function will update the desired_xx_state members of + * struct kbase_pm_device_data by calling into the current Power Policy. It will + * then begin power transitions to make the hardware acheive the desired shader + * core state. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_update_cores_state(struct kbase_device *kbdev); + +/** + * Cancel any pending requests to power off the GPU and/or shader cores. + * + * This should be called by any functions which directly power off the GPU. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev); + +/** + * Read the bitmasks of present cores. + * + * This information is cached to avoid having to perform register reads whenever + * the information is required. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbasep_pm_read_present_cores(struct kbase_device *kbdev); + +/** + * Initialize the metrics gathering framework. + * + * This must be called before other metric gathering APIs are called. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * + * @return 0 on success, error code on error + */ +int kbasep_pm_metrics_init(struct kbase_device *kbdev); + +/** + * Terminate the metrics gathering framework. + * + * This must be called when metric gathering is no longer required. It is an + * error to call any metrics gathering function (other than + * kbasep_pm_metrics_init) after calling this function. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbasep_pm_metrics_term(struct kbase_device *kbdev); + +/** + * Function to be called by the frame buffer driver to update the vsync metric. + * + * This function should be called by the frame buffer driver to update whether + * the system is hitting the vsync target or not. buffer_updated should be true + * if the vsync corresponded with a new frame being displayed, otherwise it + * should be false. This function does not need to be called every vsync, but + * only when the value of buffer_updated differs from a previous call. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + * @param buffer_updated True if the buffer has been updated on this VSync, + * false otherwise + */ +void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated); + +/** + * Configure the frame buffer device to set the vsync callback. + * + * This function should do whatever is necessary for this integration to ensure + * that kbase_pm_report_vsync is called appropriately. + * + * This function will need porting as part of the integration for a device. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_register_vsync_callback(struct kbase_device *kbdev); + +/** + * Free any resources that kbase_pm_register_vsync_callback allocated. + * + * This function should perform any cleanup required from the call to + * kbase_pm_register_vsync_callback. No call backs should occur after this + * function has returned. + * + * This function will need porting as part of the integration for a device. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev); + +/** + * Determine whether the DVFS system should change the clock speed of the GPU. + * + * This function should be called regularly by the DVFS system to check whether + * the clock speed of the GPU needs updating. It will return one of three + * enumerated values of kbase_pm_dvfs_action: + * + * @param kbdev The kbase device structure for the device + * (must be a valid pointer) + * @retval KBASE_PM_DVFS_NOP The clock does not need changing + * @retval KBASE_PM_DVFS_CLOCK_UP The clock frequency should be increased if + * possible. + * @retval KBASE_PM_DVFS_CLOCK_DOWN The clock frequency should be decreased if + * possible. + */ +enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev); + +/** + * Mark that the GPU cycle counter is needed, if the caller is the first caller + * then the GPU cycle counters will be enabled along with the l2 cache + * + * The GPU must be powered when calling this function (i.e. + * @ref kbase_pm_context_active must have been called). + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_request_gpu_cycle_counter(struct kbase_device *kbdev); + +/** + * This is a version of the above function + * (@ref kbase_pm_request_gpu_cycle_counter) suitable for being called when the + * l2 cache is known to be on and assured to be on until the subsequent call of + * kbase_pm_release_gpu_cycle_counter such as when a job is submitted. It does + * not sleep and can be called from atomic functions. + * + * The GPU must be powered when calling this function (i.e. + * @ref kbase_pm_context_active must have been called) and the l2 cache must be + * powered on. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_request_gpu_cycle_counter_l2_is_on(struct kbase_device *kbdev); + +/** + * Mark that the GPU cycle counter is no longer in use, if the caller is the + * last caller then the GPU cycle counters will be disabled. A request must have + * been made before a call to this. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_release_gpu_cycle_counter(struct kbase_device *kbdev); + +/** + * Enables access to the GPU registers before power management has powered up + * the GPU with kbase_pm_powerup(). + * + * Access to registers should be done using kbase_os_reg_read/write() at this + * stage, not kbase_reg_read/write(). + * + * This results in the power management callbacks provided in the driver + * configuration to get called to turn on power and/or clocks to the GPU. See + * @ref kbase_pm_callback_conf. + * + * This should only be used before power management is powered up with + * kbase_pm_powerup() + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_register_access_enable(struct kbase_device *kbdev); + +/** + * Disables access to the GPU registers enabled earlier by a call to + * kbase_pm_register_access_enable(). + * + * This results in the power management callbacks provided in the driver + * configuration to get called to turn off power and/or clocks to the GPU. See + * @ref kbase_pm_callback_conf + * + * This should only be used before power management is powered up with + * kbase_pm_powerup() + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_pm_register_access_disable(struct kbase_device *kbdev); + +/* NOTE: kbase_pm_is_suspending is in mali_kbase.h, because it is an inline + * function */ + +/** + * Check if the power management metrics collection is active. + * + * Note that this returns if the power management metrics collection was + * active at the time of calling, it is possible that after the call the metrics + * collection enable may have changed state. + * + * The caller must handle the consequence that the state may have changed. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @return true if metrics collection was active else false. + */ +bool kbase_pm_metrics_is_active(struct kbase_device *kbdev); + +/** + * Power on the GPU, and any cores that are requested. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param is_resume true if power on due to resume after suspend, + * false otherwise + */ +void kbase_pm_do_poweron(struct kbase_device *kbdev, bool is_resume); + +/** + * Power off the GPU, and any cores that have been requested. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param is_suspend true if power off due to suspend, + * false otherwise + * @return true if power was turned off + * false if power can not be turned off due to pending page/bus + * fault workers. Caller must flush MMU workqueues and retry + */ +bool kbase_pm_do_poweroff(struct kbase_device *kbdev, bool is_suspend); + +#ifdef CONFIG_PM_DEVFREQ +void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, + unsigned long *total, unsigned long *busy); +void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev); +#endif + +#ifdef CONFIG_MALI_MIDGARD_DVFS + +/** + * Function provided by platform specific code when DVFS is enabled to allow + * the power management metrics system to report utilisation. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + * @param utilisation The current calculated utilisation by the metrics + * system. + * @param util_gl_share The current calculated gl share of utilisation. + * @param util_cl_share The current calculated cl share of utilisation per core + * group. + * @return Returns 0 on failure and non zero on success. + */ + +int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation, + u32 util_gl_share, u32 util_cl_share[2]); +#endif + +void kbase_pm_power_changed(struct kbase_device *kbdev); + +/** + * Inform the metrics system that an atom is about to be run. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param katom The atom that is about to be run + */ +void kbase_pm_metrics_run_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * Inform the metrics system that an atom has been run and is being released. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param katom The atom that is about to be released + */ +void kbase_pm_metrics_release_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + + +#endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c new file mode 100644 index 00000000000..b209c505bbd --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c @@ -0,0 +1,532 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_metrics.c + * Metrics for power management + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +/* When VSync is being hit aim for utilisation between 70-90% */ +#define KBASE_PM_VSYNC_MIN_UTILISATION 70 +#define KBASE_PM_VSYNC_MAX_UTILISATION 90 +/* Otherwise aim for 10-40% */ +#define KBASE_PM_NO_VSYNC_MIN_UTILISATION 10 +#define KBASE_PM_NO_VSYNC_MAX_UTILISATION 40 + +/* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns + * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly + * under 11s. Exceeding this will cause overflow */ +#define KBASE_PM_TIME_SHIFT 8 + +/* Maximum time between sampling of utilization data, without resetting the + * counters. */ +#define MALI_UTILIZATION_MAX_PERIOD 100000 /* ns = 100ms */ + +#ifdef CONFIG_MALI_MIDGARD_DVFS +static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) +{ + unsigned long flags; + enum kbase_pm_dvfs_action action; + struct kbasep_pm_metrics_data *metrics; + + KBASE_DEBUG_ASSERT(timer != NULL); + + metrics = container_of(timer, struct kbasep_pm_metrics_data, timer); + action = kbase_pm_get_dvfs_action(metrics->kbdev); + + spin_lock_irqsave(&metrics->lock, flags); + + if (metrics->timer_active) + hrtimer_start(timer, + HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); + + spin_unlock_irqrestore(&metrics->lock, flags); + + return HRTIMER_NORESTART; +} +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + +int kbasep_pm_metrics_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + kbdev->pm.backend.metrics.kbdev = kbdev; + kbdev->pm.backend.metrics.vsync_hit = 0; + kbdev->pm.backend.metrics.utilisation = 0; + kbdev->pm.backend.metrics.util_cl_share[0] = 0; + kbdev->pm.backend.metrics.util_cl_share[1] = 0; + kbdev->pm.backend.metrics.util_gl_share = 0; + + kbdev->pm.backend.metrics.time_period_start = ktime_get(); + kbdev->pm.backend.metrics.time_busy = 0; + kbdev->pm.backend.metrics.time_idle = 0; + kbdev->pm.backend.metrics.prev_busy = 0; + kbdev->pm.backend.metrics.prev_idle = 0; + kbdev->pm.backend.metrics.gpu_active = false; + kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; + kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; + kbdev->pm.backend.metrics.active_gl_ctx = 0; + kbdev->pm.backend.metrics.busy_cl[0] = 0; + kbdev->pm.backend.metrics.busy_cl[1] = 0; + kbdev->pm.backend.metrics.busy_gl = 0; + kbdev->pm.backend.metrics.nr_in_slots = 0; + + spin_lock_init(&kbdev->pm.backend.metrics.lock); + +#ifdef CONFIG_MALI_MIDGARD_DVFS + kbdev->pm.backend.metrics.timer_active = true; + hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + kbdev->pm.backend.metrics.timer.function = dvfs_callback; + + hrtimer_start(&kbdev->pm.backend.metrics.timer, + HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), + HRTIMER_MODE_REL); +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + + kbase_pm_register_vsync_callback(kbdev); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); + +void kbasep_pm_metrics_term(struct kbase_device *kbdev) +{ +#ifdef CONFIG_MALI_MIDGARD_DVFS + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.timer_active = false; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + hrtimer_cancel(&kbdev->pm.backend.metrics.timer); +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + + kbase_pm_unregister_vsync_callback(kbdev); +} + +KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); + +/** + * kbasep_pm_record_gpu_active - update metrics tracking GPU active time + * + * This function updates the time the GPU was busy executing jobs in + * general and specifically for CL and GL jobs. Call this function when + * a job is submitted or removed from the GPU (job issue) slots. + * + * The busy time recorded is the time passed since the last time the + * busy/idle metrics were updated (e.g. by this function, + * kbasep_pm_record_gpu_idle or others). + * + * Note that the time we record towards CL and GL jobs accounts for + * the total number of CL and GL jobs active at that time. If 20ms + * has passed and 3 GL jobs were active, we account 3*20 ms towards + * the GL busy time. The number of CL/GL jobs active is tracked by + * kbase_pm_metrics_run_atom() / kbase_pm_metrics_release_atom(). + * + * The kbdev->pm.backend.metrics.lock needs to be held when calling + * this function. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +static void kbasep_pm_record_gpu_active(struct kbase_device *kbdev) +{ + ktime_t now = ktime_get(); + ktime_t diff; + u32 ns_time; + + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + + /* Record active time */ + diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); + ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + kbdev->pm.backend.metrics.time_busy += ns_time; + kbdev->pm.backend.metrics.busy_gl += ns_time * + kbdev->pm.backend.metrics.active_gl_ctx; + kbdev->pm.backend.metrics.busy_cl[0] += ns_time * + kbdev->pm.backend.metrics.active_cl_ctx[0]; + kbdev->pm.backend.metrics.busy_cl[1] += ns_time * + kbdev->pm.backend.metrics.active_cl_ctx[1]; + /* Reset time period */ + kbdev->pm.backend.metrics.time_period_start = now; +} + +/** + * kbasep_pm_record_gpu_idle - update metrics tracking GPU idle time + * + * This function updates the time the GPU was idle (not executing any + * jobs) based on the time passed when kbasep_pm_record_gpu_active() + * was called last to record the last job on the GPU finishing. + * + * Call this function when no jobs are in the job slots of the GPU and + * a job is about to be submitted to a job slot. + * + * The kbdev->pm.backend.metrics.lock needs to be held when calling + * this function. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +static void kbasep_pm_record_gpu_idle(struct kbase_device *kbdev) +{ + ktime_t now = ktime_get(); + ktime_t diff; + u32 ns_time; + + lockdep_assert_held(&kbdev->pm.backend.metrics.lock); + + /* Record idle time */ + diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); + ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + kbdev->pm.backend.metrics.time_idle += ns_time; + /* Reset time period */ + kbdev->pm.backend.metrics.time_period_start = now; +} + +void kbase_pm_report_vsync(struct kbase_device *kbdev, int buffer_updated) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbdev->pm.backend.metrics.vsync_hit = buffer_updated; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_report_vsync); + +#if defined(CONFIG_PM_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) +/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this + * function + */ +static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, + ktime_t now) +{ + ktime_t diff; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); + + if (kbdev->pm.backend.metrics.gpu_active) { + u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); + + kbdev->pm.backend.metrics.time_busy += ns_time; + kbdev->pm.backend.metrics.busy_cl[0] += ns_time * + kbdev->pm.backend.metrics.active_cl_ctx[0]; + kbdev->pm.backend.metrics.busy_cl[1] += ns_time * + kbdev->pm.backend.metrics.active_cl_ctx[1]; + kbdev->pm.backend.metrics.busy_gl += ns_time * + kbdev->pm.backend.metrics.active_gl_ctx; + } else { + kbdev->pm.backend.metrics.time_idle += (u32) (ktime_to_ns(diff) + >> KBASE_PM_TIME_SHIFT); + } +} + +/* Caller needs to hold kbdev->pm.backend.metrics.lock before calling this + * function. + */ +static void kbase_pm_reset_dvfs_utilisation_unlocked(struct kbase_device *kbdev, + ktime_t now) +{ + /* Store previous value */ + kbdev->pm.backend.metrics.prev_idle = + kbdev->pm.backend.metrics.time_idle; + kbdev->pm.backend.metrics.prev_busy = + kbdev->pm.backend.metrics.time_busy; + + /* Reset current values */ + kbdev->pm.backend.metrics.time_period_start = now; + kbdev->pm.backend.metrics.time_idle = 0; + kbdev->pm.backend.metrics.time_busy = 0; + kbdev->pm.backend.metrics.busy_cl[0] = 0; + kbdev->pm.backend.metrics.busy_cl[1] = 0; + kbdev->pm.backend.metrics.busy_gl = 0; +} + +void kbase_pm_reset_dvfs_utilisation(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, ktime_get()); + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} + +void kbase_pm_get_dvfs_utilisation(struct kbase_device *kbdev, + unsigned long *total_out, unsigned long *busy_out) +{ + ktime_t now = ktime_get(); + unsigned long flags, busy, total; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + kbase_pm_get_dvfs_utilisation_calc(kbdev, now); + + busy = kbdev->pm.backend.metrics.time_busy; + total = busy + kbdev->pm.backend.metrics.time_idle; + + /* Reset stats if older than MALI_UTILIZATION_MAX_PERIOD (default + * 100ms) */ + if (total >= MALI_UTILIZATION_MAX_PERIOD) { + kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now); + } else if (total < (MALI_UTILIZATION_MAX_PERIOD / 2)) { + total += kbdev->pm.backend.metrics.prev_idle + + kbdev->pm.backend.metrics.prev_busy; + busy += kbdev->pm.backend.metrics.prev_busy; + } + + *total_out = total; + *busy_out = busy; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} +#endif + +#ifdef CONFIG_MALI_MIDGARD_DVFS + +/* caller needs to hold kbdev->pm.backend.metrics.lock before calling this + * function + */ +int kbase_pm_get_dvfs_utilisation_old(struct kbase_device *kbdev, + int *util_gl_share, + int util_cl_share[2]) +{ + int utilisation; + int busy; + ktime_t now = ktime_get(); + + kbase_pm_get_dvfs_utilisation_calc(kbdev, now); + + if (kbdev->pm.backend.metrics.time_idle + + kbdev->pm.backend.metrics.time_busy == 0) { + /* No data - so we return NOP */ + utilisation = -1; + if (util_gl_share) + *util_gl_share = -1; + if (util_cl_share) { + util_cl_share[0] = -1; + util_cl_share[1] = -1; + } + goto out; + } + + utilisation = (100 * kbdev->pm.backend.metrics.time_busy) / + (kbdev->pm.backend.metrics.time_idle + + kbdev->pm.backend.metrics.time_busy); + + busy = kbdev->pm.backend.metrics.busy_gl + + kbdev->pm.backend.metrics.busy_cl[0] + + kbdev->pm.backend.metrics.busy_cl[1]; + + if (busy != 0) { + if (util_gl_share) + *util_gl_share = + (100 * kbdev->pm.backend.metrics.busy_gl) / + busy; + if (util_cl_share) { + util_cl_share[0] = + (100 * kbdev->pm.backend.metrics.busy_cl[0]) / + busy; + util_cl_share[1] = + (100 * kbdev->pm.backend.metrics.busy_cl[1]) / + busy; + } + } else { + if (util_gl_share) + *util_gl_share = -1; + if (util_cl_share) { + util_cl_share[0] = -1; + util_cl_share[1] = -1; + } + } + +out: + kbase_pm_reset_dvfs_utilisation_unlocked(kbdev, now); + + return utilisation; +} + +enum kbase_pm_dvfs_action kbase_pm_get_dvfs_action(struct kbase_device *kbdev) +{ + unsigned long flags; + int utilisation, util_gl_share; + int util_cl_share[2]; + enum kbase_pm_dvfs_action action; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + + utilisation = kbase_pm_get_dvfs_utilisation_old(kbdev, &util_gl_share, + util_cl_share); + + if (utilisation < 0 || util_gl_share < 0 || util_cl_share[0] < 0 || + util_cl_share[1] < 0) { + action = KBASE_PM_DVFS_NOP; + utilisation = 0; + util_gl_share = 0; + util_cl_share[0] = 0; + util_cl_share[1] = 0; + goto out; + } + + if (kbdev->pm.backend.metrics.vsync_hit) { + /* VSync is being met */ + if (utilisation < KBASE_PM_VSYNC_MIN_UTILISATION) + action = KBASE_PM_DVFS_CLOCK_DOWN; + else if (utilisation > KBASE_PM_VSYNC_MAX_UTILISATION) + action = KBASE_PM_DVFS_CLOCK_UP; + else + action = KBASE_PM_DVFS_NOP; + } else { + /* VSync is being missed */ + if (utilisation < KBASE_PM_NO_VSYNC_MIN_UTILISATION) + action = KBASE_PM_DVFS_CLOCK_DOWN; + else if (utilisation > KBASE_PM_NO_VSYNC_MAX_UTILISATION) + action = KBASE_PM_DVFS_CLOCK_UP; + else + action = KBASE_PM_DVFS_NOP; + } + + kbdev->pm.backend.metrics.utilisation = utilisation; + kbdev->pm.backend.metrics.util_cl_share[0] = util_cl_share[0]; + kbdev->pm.backend.metrics.util_cl_share[1] = util_cl_share[1]; + kbdev->pm.backend.metrics.util_gl_share = util_gl_share; +out: +#ifdef CONFIG_MALI_MIDGARD_DVFS + kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, + util_cl_share); +#endif /*CONFIG_MALI_MIDGARD_DVFS */ + + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + return action; +} +KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_action); + +bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) +{ + bool isactive; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + isactive = kbdev->pm.backend.metrics.timer_active; + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); + + return isactive; +} +KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); + +#endif /* CONFIG_MALI_MIDGARD_DVFS */ + +/* called when job is submitted to a GPU slot */ +void kbase_pm_metrics_run_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + + /* We may have been idle before */ + if (kbdev->pm.backend.metrics.nr_in_slots == 0) { + WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[0] != 0); + WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[1] != 0); + WARN_ON(kbdev->pm.backend.metrics.active_gl_ctx != 0); + + /* Record idle time */ + kbasep_pm_record_gpu_idle(kbdev); + + /* We are now active */ + WARN_ON(kbdev->pm.backend.metrics.gpu_active); + kbdev->pm.backend.metrics.gpu_active = true; + + } else { + /* Record active time */ + kbasep_pm_record_gpu_active(kbdev); + } + + /* Track number of jobs in GPU slots */ + WARN_ON(kbdev->pm.backend.metrics.nr_in_slots == U8_MAX); + kbdev->pm.backend.metrics.nr_in_slots++; + + /* Track if it was a CL or GL one that was submitted to a GPU slot */ + if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + int device_nr = (katom->core_req & + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) + ? katom->device_nr : 0; + KBASE_DEBUG_ASSERT(device_nr < 2); + kbdev->pm.backend.metrics.active_cl_ctx[device_nr]++; + } else { + kbdev->pm.backend.metrics.active_gl_ctx++; + } + + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} + +/* called when job is removed from a GPU slot */ +void kbase_pm_metrics_release_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); + + /* Track how long CL and/or GL jobs have been busy for */ + kbasep_pm_record_gpu_active(kbdev); + + /* Track number of jobs in GPU slots */ + WARN_ON(kbdev->pm.backend.metrics.nr_in_slots == 0); + kbdev->pm.backend.metrics.nr_in_slots--; + + /* We may become idle */ + if (kbdev->pm.backend.metrics.nr_in_slots == 0) { + KBASE_DEBUG_ASSERT(kbdev->pm.backend.metrics.gpu_active); + kbdev->pm.backend.metrics.gpu_active = false; + } + + /* Track of the GPU jobs that are active which ones are CL and + * which GL */ + if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + int device_nr = (katom->core_req & + BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) + ? katom->device_nr : 0; + KBASE_DEBUG_ASSERT(device_nr < 2); + + WARN_ON(kbdev->pm.backend.metrics.active_cl_ctx[device_nr] + == 0); + kbdev->pm.backend.metrics.active_cl_ctx[device_nr]--; + } else { + WARN_ON(kbdev->pm.backend.metrics.active_gl_ctx == 0); + kbdev->pm.backend.metrics.active_gl_ctx--; + } + + spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c new file mode 100644 index 00000000000..4ee35bbc427 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics_dummy.c @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm_metrics_dummy.c + * Dummy Metrics for power management. + */ + +#include <mali_kbase.h> +#include <mali_kbase_pm.h> + +void kbase_pm_register_vsync_callback(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + /* no VSync metrics will be available */ + kbdev->pm.backend.metrics.platform_data = NULL; +} + +void kbase_pm_unregister_vsync_callback(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); +} diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c new file mode 100644 index 00000000000..ba5c23928b8 --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.c @@ -0,0 +1,895 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_pm_policy.c + * Power policy API implementations + */ + +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#include <mali_kbase_gator.h> +#include <mali_kbase_pm.h> +#include <mali_kbase_config_defaults.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +static const struct kbase_pm_policy *const policy_list[] = { +#ifdef CONFIG_MALI_NO_MALI + &kbase_pm_always_on_policy_ops, + &kbase_pm_demand_policy_ops, + &kbase_pm_coarse_demand_policy_ops, +#if !MALI_CUSTOMER_RELEASE + &kbase_pm_demand_always_powered_policy_ops, + &kbase_pm_fast_start_policy_ops, +#endif +#else /* CONFIG_MALI_NO_MALI */ + &kbase_pm_demand_policy_ops, + &kbase_pm_always_on_policy_ops, + &kbase_pm_coarse_demand_policy_ops, +#if !MALI_CUSTOMER_RELEASE + &kbase_pm_demand_always_powered_policy_ops, + &kbase_pm_fast_start_policy_ops, +#endif +#endif /* CONFIG_MALI_NO_MALI */ +}; + +/** The number of policies available in the system. + * This is derived from the number of functions listed in policy_get_functions. + */ +#define POLICY_COUNT (sizeof(policy_list)/sizeof(*policy_list)) + + +/* Function IDs for looking up Timeline Trace codes in + * kbase_pm_change_state_trace_code */ +enum kbase_pm_func_id { + KBASE_PM_FUNC_ID_REQUEST_CORES_START, + KBASE_PM_FUNC_ID_REQUEST_CORES_END, + KBASE_PM_FUNC_ID_RELEASE_CORES_START, + KBASE_PM_FUNC_ID_RELEASE_CORES_END, + /* Note: kbase_pm_unrequest_cores() is on the slow path, and we neither + * expect to hit it nor tend to hit it very much anyway. We can detect + * whether we need more instrumentation by a difference between + * PM_CHECKTRANS events and PM_SEND/HANDLE_EVENT. */ + + /* Must be the last */ + KBASE_PM_FUNC_ID_COUNT +}; + + +/* State changes during request/unrequest/release-ing cores */ +enum { + KBASE_PM_CHANGE_STATE_SHADER = (1u << 0), + KBASE_PM_CHANGE_STATE_TILER = (1u << 1), + + /* These two must be last */ + KBASE_PM_CHANGE_STATE_MASK = (KBASE_PM_CHANGE_STATE_TILER | + KBASE_PM_CHANGE_STATE_SHADER), + KBASE_PM_CHANGE_STATE_COUNT = KBASE_PM_CHANGE_STATE_MASK + 1 +}; +typedef u32 kbase_pm_change_state; + + +#ifdef CONFIG_MALI_TRACE_TIMELINE +/* Timeline Trace code lookups for each function */ +static u32 kbase_pm_change_state_trace_code[KBASE_PM_FUNC_ID_COUNT] + [KBASE_PM_CHANGE_STATE_COUNT] = { + /* kbase_pm_request_cores */ + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][0] = 0, + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, + [KBASE_PM_FUNC_ID_REQUEST_CORES_START][KBASE_PM_CHANGE_STATE_SHADER | + KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, + + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][0] = 0, + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, + [KBASE_PM_FUNC_ID_REQUEST_CORES_END][KBASE_PM_CHANGE_STATE_SHADER | + KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, + + /* kbase_pm_release_cores */ + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][0] = 0, + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, + [KBASE_PM_FUNC_ID_RELEASE_CORES_START][KBASE_PM_CHANGE_STATE_SHADER | + KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, + + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][0] = 0, + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, + [KBASE_PM_FUNC_ID_RELEASE_CORES_END][KBASE_PM_CHANGE_STATE_SHADER | + KBASE_PM_CHANGE_STATE_TILER] = + SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END +}; + +static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, + enum kbase_pm_func_id func_id, + kbase_pm_change_state state) +{ + int trace_code; + + KBASE_DEBUG_ASSERT(func_id >= 0 && func_id < KBASE_PM_FUNC_ID_COUNT); + KBASE_DEBUG_ASSERT(state != 0 && (state & KBASE_PM_CHANGE_STATE_MASK) == + state); + + trace_code = kbase_pm_change_state_trace_code[func_id][state]; + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code); +} + +#else /* CONFIG_MALI_TRACE_TIMELINE */ +static inline void kbase_timeline_pm_cores_func(struct kbase_device *kbdev, + enum kbase_pm_func_id func_id, kbase_pm_change_state state) +{ +} + +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + +static enum hrtimer_restart +kbasep_pm_do_gpu_poweroff_callback(struct hrtimer *timer) +{ + struct kbase_device *kbdev; + + kbdev = container_of(timer, struct kbase_device, + pm.backend.gpu_poweroff_timer); + + /* It is safe for this call to do nothing if the work item is already + * queued. The worker function will read the must up-to-date state of + * kbdev->pm.backend.gpu_poweroff_pending under lock. + * + * If a state change occurs while the worker function is processing, + * this call will succeed as a work item can be requeued once it has + * started processing. + */ + if (kbdev->pm.backend.gpu_poweroff_pending) + queue_work(kbdev->pm.backend.gpu_poweroff_wq, + &kbdev->pm.backend.gpu_poweroff_work); + + if (kbdev->pm.backend.shader_poweroff_pending) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + if (kbdev->pm.backend.shader_poweroff_pending) { + kbdev->pm.backend.shader_poweroff_pending_time--; + + KBASE_DEBUG_ASSERT( + kbdev->pm.backend.shader_poweroff_pending_time + >= 0); + + if (!kbdev->pm.backend.shader_poweroff_pending_time) { + u64 prev_shader_state = + kbdev->pm.backend.desired_shader_state; + + kbdev->pm.backend.desired_shader_state &= + ~kbdev->pm.backend.shader_poweroff_pending; + + kbdev->pm.backend.shader_poweroff_pending = 0; + + if (prev_shader_state != + kbdev->pm.backend.desired_shader_state + || kbdev->pm.backend.ca_in_transition) { + bool cores_are_available; + + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START); + cores_are_available = + kbase_pm_check_transitions_nolock( + kbdev); + KBASE_TIMELINE_PM_CHECKTRANS(kbdev, SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END); + + /* Don't need 'cores_are_available', + * because we don't return anything */ + CSTD_UNUSED(cores_are_available); + } + } + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + } + + if (kbdev->pm.backend.poweroff_timer_needed) { + hrtimer_add_expires(timer, kbdev->pm.gpu_poweroff_time); + + return HRTIMER_RESTART; + } + + return HRTIMER_NORESTART; +} + +static void kbasep_pm_do_gpu_poweroff_wq(struct work_struct *data) +{ + unsigned long flags; + struct kbase_device *kbdev; + bool do_poweroff = false; + + kbdev = container_of(data, struct kbase_device, + pm.backend.gpu_poweroff_work); + + mutex_lock(&kbdev->pm.lock); + + if (kbdev->pm.backend.gpu_poweroff_pending == 0) { + mutex_unlock(&kbdev->pm.lock); + return; + } + + kbdev->pm.backend.gpu_poweroff_pending--; + + if (kbdev->pm.backend.gpu_poweroff_pending > 0) { + mutex_unlock(&kbdev->pm.lock); + return; + } + + KBASE_DEBUG_ASSERT(kbdev->pm.backend.gpu_poweroff_pending == 0); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + /* Only power off the GPU if a request is still pending */ + if (!kbdev->pm.backend.pm_current_policy->get_core_active(kbdev)) + do_poweroff = true; + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + if (do_poweroff) { + kbdev->pm.backend.poweroff_timer_needed = false; + hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); + /* Power off the GPU */ + if (!kbase_pm_do_poweroff(kbdev, false)) { + /* GPU can not be powered off at present */ + kbdev->pm.backend.poweroff_timer_needed = true; + hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer, + kbdev->pm.gpu_poweroff_time, + HRTIMER_MODE_REL); + } + } + + mutex_unlock(&kbdev->pm.lock); +} + +int kbase_pm_policy_init(struct kbase_device *kbdev) +{ + struct workqueue_struct *wq; + + wq = alloc_workqueue("kbase_pm_do_poweroff", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!wq) + return -ENOMEM; + + kbdev->pm.backend.gpu_poweroff_wq = wq; + INIT_WORK(&kbdev->pm.backend.gpu_poweroff_work, + kbasep_pm_do_gpu_poweroff_wq); + hrtimer_init(&kbdev->pm.backend.gpu_poweroff_timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + kbdev->pm.backend.gpu_poweroff_timer.function = + kbasep_pm_do_gpu_poweroff_callback; + kbdev->pm.backend.pm_current_policy = policy_list[0]; + kbdev->pm.backend.pm_current_policy->init(kbdev); + kbdev->pm.gpu_poweroff_time = + HR_TIMER_DELAY_NSEC(DEFAULT_PM_GPU_POWEROFF_TICK_NS); + kbdev->pm.poweroff_shader_ticks = DEFAULT_PM_POWEROFF_TICK_SHADER; + kbdev->pm.poweroff_gpu_ticks = DEFAULT_PM_POWEROFF_TICK_GPU; + + return 0; +} + +void kbase_pm_policy_term(struct kbase_device *kbdev) +{ + kbdev->pm.backend.pm_current_policy->term(kbdev); + destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wq); +} + +void kbase_pm_cancel_deferred_poweroff(struct kbase_device *kbdev) +{ + unsigned long flags; + + lockdep_assert_held(&kbdev->pm.lock); + + kbdev->pm.backend.poweroff_timer_needed = false; + hrtimer_cancel(&kbdev->pm.backend.gpu_poweroff_timer); + + /* If wq is already running but is held off by pm.lock, make sure it has + * no effect */ + kbdev->pm.backend.gpu_poweroff_pending = 0; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + kbdev->pm.backend.shader_poweroff_pending = 0; + kbdev->pm.backend.shader_poweroff_pending_time = 0; + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +void kbase_pm_update_active(struct kbase_device *kbdev) +{ + unsigned long flags; + bool active; + + lockdep_assert_held(&kbdev->pm.lock); + + /* pm_current_policy will never be NULL while pm.lock is held */ + KBASE_DEBUG_ASSERT(kbdev->pm.backend.pm_current_policy); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + active = kbdev->pm.backend.pm_current_policy->get_core_active(kbdev); + + if (active) { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + if (kbdev->pm.backend.gpu_poweroff_pending) { + /* Cancel any pending power off request */ + kbdev->pm.backend.gpu_poweroff_pending = 0; + + /* If a request was pending then the GPU was still + * powered, so no need to continue */ + if (!kbdev->poweroff_pending) + return; + } + + if (!kbdev->pm.backend.poweroff_timer_needed && + !kbdev->pm.backend.gpu_powered) { + kbdev->pm.backend.poweroff_timer_needed = true; + hrtimer_start(&kbdev->pm.backend.gpu_poweroff_timer, + kbdev->pm.gpu_poweroff_time, + HRTIMER_MODE_REL); + } + + /* Power on the GPU and any cores requested by the policy */ + kbase_pm_do_poweron(kbdev, false); + } else { + /* It is an error for the power policy to power off the GPU + * when there are contexts active */ + KBASE_DEBUG_ASSERT(kbdev->pm.active_count == 0); + + if (kbdev->pm.backend.shader_poweroff_pending) { + kbdev->pm.backend.shader_poweroff_pending = 0; + kbdev->pm.backend.shader_poweroff_pending_time = 0; + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + + /* Request power off */ + if (kbdev->pm.backend.gpu_powered) { + kbdev->pm.backend.gpu_poweroff_pending = + kbdev->pm.poweroff_gpu_ticks; + if (!kbdev->pm.backend.poweroff_timer_needed) { + /* Start timer if not running (eg if power + * policy has been changed from always_on to + * something else). This will ensure the GPU is + * actually powered off */ + kbdev->pm.backend.poweroff_timer_needed = true; + hrtimer_start( + &kbdev->pm.backend.gpu_poweroff_timer, + kbdev->pm.gpu_poweroff_time, + HRTIMER_MODE_REL); + } + } + } +} + +void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev) +{ + u64 desired_bitmap; + bool cores_are_available; + + lockdep_assert_held(&kbdev->pm.power_change_lock); + + if (kbdev->pm.backend.pm_current_policy == NULL) + return; + + desired_bitmap = + kbdev->pm.backend.pm_current_policy->get_core_mask(kbdev); + desired_bitmap &= kbase_pm_ca_get_core_mask(kbdev); + + /* Enable core 0 if tiler required, regardless of core availability */ + if (kbdev->tiler_needed_cnt > 0 || kbdev->tiler_inuse_cnt > 0) + desired_bitmap |= 1; + + if (kbdev->pm.backend.desired_shader_state != desired_bitmap) + KBASE_TRACE_ADD(kbdev, PM_CORES_CHANGE_DESIRED, NULL, NULL, 0u, + (u32)desired_bitmap); + + /* Are any cores being powered on? */ + if (~kbdev->pm.backend.desired_shader_state & desired_bitmap || + kbdev->pm.backend.ca_in_transition) { + /* Check if we are powering off any cores before updating shader + * state */ + if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) { + /* Start timer to power off cores */ + kbdev->pm.backend.shader_poweroff_pending |= + (kbdev->pm.backend.desired_shader_state & + ~desired_bitmap); + kbdev->pm.backend.shader_poweroff_pending_time = + kbdev->pm.poweroff_shader_ticks; + } + + kbdev->pm.backend.desired_shader_state = desired_bitmap; + + /* If any cores are being powered on, transition immediately */ + cores_are_available = kbase_pm_check_transitions_nolock(kbdev); + } else if (kbdev->pm.backend.desired_shader_state & ~desired_bitmap) { + /* Start timer to power off cores */ + kbdev->pm.backend.shader_poweroff_pending |= + (kbdev->pm.backend.desired_shader_state & + ~desired_bitmap); + kbdev->pm.backend.shader_poweroff_pending_time = + kbdev->pm.poweroff_shader_ticks; + } else if (kbdev->pm.active_count == 0 && desired_bitmap != 0 && + kbdev->pm.backend.poweroff_timer_needed) { + /* If power policy is keeping cores on despite there being no + * active contexts then disable poweroff timer as it isn't + * required. + * Only reset poweroff_timer_needed if we're not in the middle + * of the power off callback */ + kbdev->pm.backend.poweroff_timer_needed = false; + hrtimer_try_to_cancel(&kbdev->pm.backend.gpu_poweroff_timer); + } + + /* Ensure timer does not power off wanted cores and make sure to power + * off unwanted cores */ + if (kbdev->pm.backend.shader_poweroff_pending != 0) { + kbdev->pm.backend.shader_poweroff_pending &= + ~(kbdev->pm.backend.desired_shader_state & + desired_bitmap); + if (kbdev->pm.backend.shader_poweroff_pending == 0) + kbdev->pm.backend.shader_poweroff_pending_time = 0; + } + + /* Don't need 'cores_are_available', because we don't return anything */ + CSTD_UNUSED(cores_are_available); +} + +void kbase_pm_update_cores_state(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + kbase_pm_update_cores_state_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +int kbase_pm_list_policies(const struct kbase_pm_policy * const **list) +{ + if (!list) + return POLICY_COUNT; + + *list = policy_list; + + return POLICY_COUNT; +} + +KBASE_EXPORT_TEST_API(kbase_pm_list_policies); + +const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return kbdev->pm.backend.pm_current_policy; +} + +KBASE_EXPORT_TEST_API(kbase_pm_get_policy); + +void kbase_pm_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_policy *new_policy) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + const struct kbase_pm_policy *old_policy; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(new_policy != NULL); + + KBASE_TRACE_ADD(kbdev, PM_SET_POLICY, NULL, NULL, 0u, new_policy->id); + + /* During a policy change we pretend the GPU is active */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread */ + kbase_pm_context_active(kbdev); + + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + /* Remove the policy to prevent IRQ handlers from working on it */ + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + old_policy = kbdev->pm.backend.pm_current_policy; + kbdev->pm.backend.pm_current_policy = NULL; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_TERM, NULL, NULL, 0u, + old_policy->id); + if (old_policy->term) + old_policy->term(kbdev); + + KBASE_TRACE_ADD(kbdev, PM_CURRENT_POLICY_INIT, NULL, NULL, 0u, + new_policy->id); + if (new_policy->init) + new_policy->init(kbdev); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + kbdev->pm.backend.pm_current_policy = new_policy; + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + /* If any core power state changes were previously attempted, but + * couldn't be made because the policy was changing (current_policy was + * NULL), then re-try them here. */ + kbase_pm_update_active(kbdev); + kbase_pm_update_cores_state(kbdev); + + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + + /* Now the policy change is finished, we release our fake context active + * reference */ + kbase_pm_context_idle(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_set_policy); + +/** Check whether a state change has finished, and trace it as completed */ +static void +kbase_pm_trace_check_and_finish_state_change(struct kbase_device *kbdev) +{ + if ((kbdev->shader_available_bitmap & + kbdev->pm.backend.desired_shader_state) + == kbdev->pm.backend.desired_shader_state && + (kbdev->tiler_available_bitmap & + kbdev->pm.backend.desired_tiler_state) + == kbdev->pm.backend.desired_tiler_state) + kbase_timeline_pm_check_handle_event(kbdev, + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED); +} + +void kbase_pm_request_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores) +{ + unsigned long flags; + u64 cores; + + kbase_pm_change_state change_gpu_state = 0u; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + cores = shader_cores; + while (cores) { + int bitnum = fls64(cores) - 1; + u64 bit = 1ULL << bitnum; + + /* It should be almost impossible for this to overflow. It would + * require 2^32 atoms to request a particular core, which would + * require 2^24 contexts to submit. This would require an amount + * of memory that is impossible on a 32-bit system and extremely + * unlikely on a 64-bit system. */ + int cnt = ++kbdev->shader_needed_cnt[bitnum]; + + if (1 == cnt) { + kbdev->shader_needed_bitmap |= bit; + change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; + } + + cores &= ~bit; + } + + if (tiler_required) { + int cnt = ++kbdev->tiler_needed_cnt; + + if (1 == cnt) + change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; + + KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt != 0); + } + + if (change_gpu_state) { + KBASE_TRACE_ADD(kbdev, PM_REQUEST_CHANGE_SHADER_NEEDED, NULL, + NULL, 0u, (u32) kbdev->shader_needed_bitmap); + + kbase_timeline_pm_cores_func(kbdev, + KBASE_PM_FUNC_ID_REQUEST_CORES_START, + change_gpu_state); + kbase_pm_update_cores_state_nolock(kbdev); + kbase_timeline_pm_cores_func(kbdev, + KBASE_PM_FUNC_ID_REQUEST_CORES_END, + change_gpu_state); + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_cores); + +void kbase_pm_unrequest_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores) +{ + unsigned long flags; + + kbase_pm_change_state change_gpu_state = 0u; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + while (shader_cores) { + int bitnum = fls64(shader_cores) - 1; + u64 bit = 1ULL << bitnum; + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); + + cnt = --kbdev->shader_needed_cnt[bitnum]; + + if (0 == cnt) { + kbdev->shader_needed_bitmap &= ~bit; + + change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; + } + + shader_cores &= ~bit; + } + + if (tiler_required) { + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); + + cnt = --kbdev->tiler_needed_cnt; + + if (0 == cnt) + change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; + } + + if (change_gpu_state) { + KBASE_TRACE_ADD(kbdev, PM_UNREQUEST_CHANGE_SHADER_NEEDED, NULL, + NULL, 0u, (u32) kbdev->shader_needed_bitmap); + + kbase_pm_update_cores_state_nolock(kbdev); + + /* Trace that any state change effectively completes immediately + * - no-one will wait on the state change */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_unrequest_cores); + +enum kbase_pm_cores_ready +kbase_pm_register_inuse_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores) +{ + unsigned long flags; + u64 prev_shader_needed; /* Just for tracing */ + u64 prev_shader_inuse; /* Just for tracing */ + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + prev_shader_needed = kbdev->shader_needed_bitmap; + prev_shader_inuse = kbdev->shader_inuse_bitmap; + + /* If desired_shader_state does not contain the requested cores, then + * power management is not attempting to powering those cores (most + * likely due to core availability policy) and a new job affinity must + * be chosen */ + if ((kbdev->pm.backend.desired_shader_state & shader_cores) != + shader_cores) { + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + return KBASE_NEW_AFFINITY; + } + + if ((kbdev->shader_available_bitmap & shader_cores) != shader_cores || + (tiler_required && !kbdev->tiler_available_bitmap)) { + /* Trace ongoing core transition */ + kbase_timeline_pm_l2_transition_start(kbdev); + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + return KBASE_CORES_NOT_READY; + } + + /* If we started to trace a state change, then trace it has being + * finished by now, at the very latest */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + /* Trace core transition done */ + kbase_timeline_pm_l2_transition_done(kbdev); + + while (shader_cores) { + int bitnum = fls64(shader_cores) - 1; + u64 bit = 1ULL << bitnum; + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->shader_needed_cnt[bitnum] > 0); + + cnt = --kbdev->shader_needed_cnt[bitnum]; + + if (0 == cnt) + kbdev->shader_needed_bitmap &= ~bit; + + /* shader_inuse_cnt should not overflow because there can only + * be a very limited number of jobs on the h/w at one time */ + + kbdev->shader_inuse_cnt[bitnum]++; + kbdev->shader_inuse_bitmap |= bit; + + shader_cores &= ~bit; + } + + if (tiler_required) { + KBASE_DEBUG_ASSERT(kbdev->tiler_needed_cnt > 0); + + --kbdev->tiler_needed_cnt; + + kbdev->tiler_inuse_cnt++; + + KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt != 0); + } + + if (prev_shader_needed != kbdev->shader_needed_bitmap) + KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_NEEDED, NULL, + NULL, 0u, (u32) kbdev->shader_needed_bitmap); + + if (prev_shader_inuse != kbdev->shader_inuse_bitmap) + KBASE_TRACE_ADD(kbdev, PM_REGISTER_CHANGE_SHADER_INUSE, NULL, + NULL, 0u, (u32) kbdev->shader_inuse_bitmap); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + + return KBASE_CORES_READY; +} + +KBASE_EXPORT_TEST_API(kbase_pm_register_inuse_cores); + +void kbase_pm_release_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores) +{ + unsigned long flags; + kbase_pm_change_state change_gpu_state = 0u; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + while (shader_cores) { + int bitnum = fls64(shader_cores) - 1; + u64 bit = 1ULL << bitnum; + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->shader_inuse_cnt[bitnum] > 0); + + cnt = --kbdev->shader_inuse_cnt[bitnum]; + + if (0 == cnt) { + kbdev->shader_inuse_bitmap &= ~bit; + change_gpu_state |= KBASE_PM_CHANGE_STATE_SHADER; + } + + shader_cores &= ~bit; + } + + if (tiler_required) { + int cnt; + + KBASE_DEBUG_ASSERT(kbdev->tiler_inuse_cnt > 0); + + cnt = --kbdev->tiler_inuse_cnt; + + if (0 == cnt) + change_gpu_state |= KBASE_PM_CHANGE_STATE_TILER; + } + + if (change_gpu_state) { + KBASE_TRACE_ADD(kbdev, PM_RELEASE_CHANGE_SHADER_INUSE, NULL, + NULL, 0u, (u32) kbdev->shader_inuse_bitmap); + + kbase_timeline_pm_cores_func(kbdev, + KBASE_PM_FUNC_ID_RELEASE_CORES_START, + change_gpu_state); + kbase_pm_update_cores_state_nolock(kbdev); + kbase_timeline_pm_cores_func(kbdev, + KBASE_PM_FUNC_ID_RELEASE_CORES_END, + change_gpu_state); + + /* Trace that any state change completed immediately */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_cores); + +void kbase_pm_request_cores_sync(struct kbase_device *kbdev, + bool tiler_required, + u64 shader_cores) +{ + kbase_pm_request_cores(kbdev, tiler_required, shader_cores); + + kbase_pm_check_transitions_sync(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_cores_sync); + +void kbase_pm_request_l2_caches(struct kbase_device *kbdev) +{ + unsigned long flags; + u32 prior_l2_users_count; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + prior_l2_users_count = kbdev->l2_users_count++; + + KBASE_DEBUG_ASSERT(kbdev->l2_users_count != 0); + + /* if the GPU is reset while the l2 is on, l2 will be off but + * prior_l2_users_count will be > 0. l2_available_bitmap will have been + * set to 0 though by kbase_pm_init_hw */ + if (!prior_l2_users_count || !kbdev->l2_available_bitmap) + kbase_pm_check_transitions_nolock(kbdev); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + wait_event(kbdev->pm.backend.l2_powered_wait, + kbdev->pm.backend.l2_powered == 1); + + /* Trace that any state change completed immediately */ + kbase_pm_trace_check_and_finish_state_change(kbdev); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches); + +void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + kbdev->l2_users_count++; + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_request_l2_caches_l2_is_on); + +void kbase_pm_release_l2_caches(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + KBASE_DEBUG_ASSERT(kbdev->l2_users_count > 0); + + --kbdev->l2_users_count; + + if (!kbdev->l2_users_count) { + kbase_pm_check_transitions_nolock(kbdev); + /* Trace that any state change completed immediately */ + kbase_pm_trace_check_and_finish_state_change(kbdev); + } + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); +} + +KBASE_EXPORT_TEST_API(kbase_pm_release_l2_caches); diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h new file mode 100644 index 00000000000..fabb3cc970e --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_policy.h @@ -0,0 +1,227 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Power policy API definitions + */ + +#ifndef _KBASE_PM_POLICY_H_ +#define _KBASE_PM_POLICY_H_ + +/** + * kbase_pm_policy_init - Initialize power policy framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Must be called before calling any other policy function + * + * Return: 0 if the power policy framework was successfully + * initialized, -errno otherwise. + */ +int kbase_pm_policy_init(struct kbase_device *kbdev); + +/** + * kbase_pm_policy_term - Terminate power policy framework + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_policy_term(struct kbase_device *kbdev); + +/** + * kbase_pm_update_active - Update the active power state of the GPU + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Calls into the current power policy + */ +void kbase_pm_update_active(struct kbase_device *kbdev); + +/** + * kbase_pm_update_cores - Update the desired core state of the GPU + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Calls into the current power policy + */ +void kbase_pm_update_cores(struct kbase_device *kbdev); + + +enum kbase_pm_cores_ready { + KBASE_CORES_NOT_READY = 0, + KBASE_NEW_AFFINITY = 1, + KBASE_CORES_READY = 2 +}; + + +/** + * kbase_pm_request_cores_sync - Synchronous variant of kbase_pm_request_cores() + * + * @kbdev: The kbase device structure for the device + * @tiler_required: true if the tiler is required, false otherwise + * @shader_cores: A bitmask of shader cores which are necessary for the job + * + * When this function returns, the @shader_cores will be in the READY state. + * + * This is safe variant of kbase_pm_check_transitions_sync(): it handles the + * work of ensuring the requested cores will remain powered until a matching + * call to kbase_pm_unrequest_cores()/kbase_pm_release_cores() (as appropriate) + * is made. + */ +void kbase_pm_request_cores_sync(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores); + +/** + * kbase_pm_request_cores - Mark one or more cores as being required + * for jobs to be submitted + * + * @kbdev: The kbase device structure for the device + * @tiler_required: true if the tiler is required, false otherwise + * @shader_cores: A bitmask of shader cores which are necessary for the job + * + * This function is called by the job scheduler to mark one or more cores as + * being required to submit jobs that are ready to run. + * + * The cores requested are reference counted and a subsequent call to + * kbase_pm_register_inuse_cores() or kbase_pm_unrequest_cores() should be + * made to dereference the cores as being 'needed'. + * + * The active power policy will meet or exceed the requirements of the + * requested cores in the system. Any core transitions needed will be begun + * immediately, but they might not complete/the cores might not be available + * until a Power Management IRQ. + * + * Return: 0 if the cores were successfully requested, or -errno otherwise. + */ +void kbase_pm_request_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores); + +/** + * kbase_pm_unrequest_cores - Unmark one or more cores as being required for + * jobs to be submitted. + * + * @kbdev: The kbase device structure for the device + * @tiler_required: true if the tiler is required, false otherwise + * @shader_cores: A bitmask of shader cores (as given to + * kbase_pm_request_cores() ) + * + * This function undoes the effect of kbase_pm_request_cores(). It should be + * used when a job is not going to be submitted to the hardware (e.g. the job is + * cancelled before it is enqueued). + * + * The active power policy will meet or exceed the requirements of the + * requested cores in the system. Any core transitions needed will be begun + * immediately, but they might not complete until a Power Management IRQ. + * + * The policy may use this as an indication that it can power down cores. + */ +void kbase_pm_unrequest_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores); + +/** + * kbase_pm_register_inuse_cores - Register a set of cores as in use by a job + * + * @kbdev: The kbase device structure for the device + * @tiler_required: true if the tiler is required, false otherwise + * @shader_cores: A bitmask of shader cores (as given to + * kbase_pm_request_cores() ) + * + * This function should be called after kbase_pm_request_cores() when the job + * is about to be submitted to the hardware. It will check that the necessary + * cores are available and if so update the 'needed' and 'inuse' bitmasks to + * reflect that the job is now committed to being run. + * + * If the necessary cores are not currently available then the function will + * return %KBASE_CORES_NOT_READY and have no effect. + * + * Return: true if the job can be submitted to the hardware or false + * if the job is not ready to run. + * Return: %KBASE_CORES_NOT_READY if the cores are not immediately ready, + * %KBASE_NEW_AFFINITY if the affinity requested is not allowed, + * %KBASE_CORES_READY if the cores requested are already available + */ +enum kbase_pm_cores_ready kbase_pm_register_inuse_cores( + struct kbase_device *kbdev, + bool tiler_required, + u64 shader_cores); + +/** + * kbase_pm_release_cores - Release cores after a job has run + * + * @kbdev: The kbase device structure for the device + * @tiler_required: true if the tiler is required, false otherwise + * @shader_cores: A bitmask of shader cores (as given to + * kbase_pm_register_inuse_cores() ) + * + * This function should be called when a job has finished running on the + * hardware. A call to kbase_pm_register_inuse_cores() must have previously + * occurred. The reference counts of the specified cores will be decremented + * which may cause the bitmask of 'inuse' cores to be reduced. The power policy + * may then turn off any cores which are no longer 'inuse'. + */ +void kbase_pm_release_cores(struct kbase_device *kbdev, + bool tiler_required, u64 shader_cores); + +/** + * kbase_pm_request_l2_caches - Request l2 caches + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Request the use of l2 caches for all core groups, power up, wait and prevent + * the power manager from powering down the l2 caches. + * + * This tells the power management that the caches should be powered up, and + * they should remain powered, irrespective of the usage of shader cores. This + * does not return until the l2 caches are powered up. + * + * The caller must call kbase_pm_release_l2_caches() when they are finished + * to allow normal power management of the l2 caches to resume. + * + * This should only be used when power management is active. + */ +void kbase_pm_request_l2_caches(struct kbase_device *kbdev); + +/** + * kbase_pm_request_l2_caches_l2_is_on - Request l2 caches but don't power on + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Increment the count of l2 users but do not attempt to power on the l2 + * + * It is the callers responsibility to ensure that the l2 is already powered up + * and to eventually call kbase_pm_release_l2_caches() + */ +void kbase_pm_request_l2_caches_l2_is_on(struct kbase_device *kbdev); + +/** + * kbase_pm_request_l2_caches - Release l2 caches + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * Release the use of l2 caches for all core groups and allow the power manager + * to power them down when necessary. + * + * This tells the power management that the caches can be powered down if + * necessary, with respect to the usage of shader cores. + * + * The caller must have called kbase_pm_request_l2_caches() prior to a call + * to this. + * + * This should only be used when power management is active. + */ +void kbase_pm_release_l2_caches(struct kbase_device *kbdev); + +#endif /* _KBASE_PM_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c new file mode 100644 index 00000000000..be7c3663e2b --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.c @@ -0,0 +1,107 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/** + * + */ + +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_time.h> +#include <backend/gpu/mali_kbase_device_internal.h> +#include <backend/gpu/mali_kbase_pm_internal.h> + +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec *ts) +{ + u32 hi1, hi2; + + kbase_pm_request_gpu_cycle_counter(kbdev); + + /* Read hi, lo, hi to ensure that overflow from lo to hi is handled + * correctly */ + do { + hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), + NULL); + *cycle_counter = kbase_reg_read(kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); + hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(CYCLE_COUNT_HI), + NULL); + *cycle_counter |= (((u64) hi1) << 32); + } while (hi1 != hi2); + + /* Read hi, lo, hi to ensure that overflow from lo to hi is handled + * correctly */ + do { + hi1 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), + NULL); + *system_time = kbase_reg_read(kbdev, + GPU_CONTROL_REG(TIMESTAMP_LO), NULL); + hi2 = kbase_reg_read(kbdev, GPU_CONTROL_REG(TIMESTAMP_HI), + NULL); + *system_time |= (((u64) hi1) << 32); + } while (hi1 != hi2); + + /* Record the CPU's idea of current time */ + getrawmonotonic(ts); + + kbase_pm_release_gpu_cycle_counter(kbdev); +} + +/** + * kbase_wait_write_flush - Wait for GPU write flush + * @kctx: Context pointer + * + * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush + * its write buffer. + * + * Only in use for BASE_HW_ISSUE_6367 + * + * Note : If GPU resets occur then the counters are reset to zero, the delay may + * not be as expected. + */ +#ifndef CONFIG_MALI_NO_MALI +void kbase_wait_write_flush(struct kbase_context *kctx) +{ + u32 base_count = 0; + + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread */ + + kbase_pm_context_active(kctx->kbdev); + kbase_pm_request_gpu_cycle_counter(kctx->kbdev); + + while (true) { + u32 new_count; + + new_count = kbase_reg_read(kctx->kbdev, + GPU_CONTROL_REG(CYCLE_COUNT_LO), NULL); + /* First time around, just store the count. */ + if (base_count == 0) { + base_count = new_count; + continue; + } + + /* No need to handle wrapping, unsigned maths works for this. */ + if ((new_count - base_count) > 1000) + break; + } + + kbase_pm_release_gpu_cycle_counter(kctx->kbdev); + kbase_pm_context_idle(kctx->kbdev); +} +#endif /* CONFIG_MALI_NO_MALI */ diff --git a/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h new file mode 100644 index 00000000000..4ae1b0c304c --- /dev/null +++ b/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_time.h @@ -0,0 +1,57 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/** + * + */ + +#ifndef _KBASE_BACKEND_TIME_H_ +#define _KBASE_BACKEND_TIME_H_ + +/** + * kbase_backend_get_gpu_time() - Get current GPU time + * @kbdev: Device pointer + * @cycle_counter: Pointer to u64 to store cycle counter in + * @system_time: Pointer to u64 to store system time in + * @ts: Pointer to struct timespec to store current monotonic + * time in + */ +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec *ts); + +/** + * kbase_wait_write_flush() - Wait for GPU write flush + * @kctx: Context pointer + * + * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush + * its write buffer. + * + * If GPU resets occur then the counters are reset to zero, the delay may not be + * as expected. + * + * This function is only in use for BASE_HW_ISSUE_6367 + */ +#ifdef CONFIG_MALI_NO_MALI +static inline void kbase_wait_write_flush(struct kbase_context *kctx) +{ +} +#else +void kbase_wait_write_flush(struct kbase_context *kctx); +#endif + +#endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/drivers/gpu/arm/midgard/docs/Doxyfile b/drivers/gpu/arm/midgard/docs/Doxyfile new file mode 100644 index 00000000000..35ff2f1ce4a --- /dev/null +++ b/drivers/gpu/arm/midgard/docs/Doxyfile @@ -0,0 +1,126 @@ +# +# (C) COPYRIGHT 2011-2013, 2015 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +############################################################################## + +# This file contains per-module Doxygen configuration. Please do not add +# extra settings to this file without consulting all stakeholders, as they +# may cause override project-wide settings. +# +# Additionally, when defining aliases, macros, sections etc, use the module +# name as a prefix e.g. gles_my_alias. + +############################################################################## + +@INCLUDE = ../../bldsys/Doxyfile_common + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT += ../../kernel/drivers/gpu/arm/midgard/ + +############################################################################## +# Everything below here is optional, and in most cases not required +############################################################################## + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES += + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS += + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS += + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +EXCLUDE += ../../kernel/drivers/gpu/arm/midgard/platform ../../kernel/drivers/gpu/arm/midgard/platform_dummy ../../kernel/drivers/gpu/arm/midgard/scripts ../../kernel/drivers/gpu/arm/midgard/tests ../../kernel/drivers/gpu/arm/midgard/Makefile ../../kernel/drivers/gpu/arm/midgard/Makefile.kbase ../../kernel/drivers/gpu/arm/midgard/Kbuild ../../kernel/drivers/gpu/arm/midgard/Kconfig ../../kernel/drivers/gpu/arm/midgard/sconscript ../../kernel/drivers/gpu/arm/midgard/docs ../../kernel/drivers/gpu/arm/midgard/pm_test_script.sh ../../kernel/drivers/gpu/arm/midgard/mali_uk.h ../../kernel/drivers/gpu/arm/midgard/Makefile + + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS += + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS += + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH += + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH += + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH += + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED += + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED += + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS += ../../kernel/drivers/gpu/arm/midgard/docs + diff --git a/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot new file mode 100644 index 00000000000..7ae05c2f8de --- /dev/null +++ b/drivers/gpu/arm/midgard/docs/policy_operation_diagram.dot @@ -0,0 +1,112 @@ +/* + * + * (C) COPYRIGHT 2010 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +digraph policy_objects_diagram { + rankdir=LR; + size="12,8"; + compound=true; + + node [ shape = box ]; + + subgraph cluster_policy_queues { + low_queue [ shape=record label = "LowP | {<ql>ctx_lo | ... | <qm>ctx_i | ... | <qr>ctx_hi}" ]; + queues_middle_sep [ label="" shape=plaintext width=0 height=0 ]; + + rt_queue [ shape=record label = "RT | {<ql>ctx_lo | ... | <qm>ctx_j | ... | <qr>ctx_hi}" ]; + + label = "Policy's Queue(s)"; + } + + call_enqueue [ shape=plaintext label="enqueue_ctx()" ]; + + { + rank=same; + ordering=out; + call_dequeue [ shape=plaintext label="dequeue_head_ctx()\n+ runpool_add_ctx()" ]; + call_ctxfinish [ shape=plaintext label="runpool_remove_ctx()" ]; + + call_ctxdone [ shape=plaintext label="don't requeue;\n/* ctx has no more jobs */" ]; + } + + subgraph cluster_runpool { + + as0 [ width=2 height = 0.25 label="AS0: Job_1, ..., Job_n" ]; + as1 [ width=2 height = 0.25 label="AS1: Job_1, ..., Job_m" ]; + as2 [ width=2 height = 0.25 label="AS2: Job_1, ..., Job_p" ]; + as3 [ width=2 height = 0.25 label="AS3: Job_1, ..., Job_q" ]; + + label = "Policy's Run Pool"; + } + + { + rank=same; + call_jdequeue [ shape=plaintext label="dequeue_job()" ]; + sstop_dotfixup [ shape=plaintext label="" width=0 height=0 ]; + } + + { + rank=same; + ordering=out; + sstop [ shape=ellipse label="SS-Timer expires" ] + jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ]; + + irq [ label="IRQ" shape=ellipse ]; + + job_finish [ shape=plaintext label="don't requeue;\n/* job done */" ]; + } + + hstop [ shape=ellipse label="HS-Timer expires" ] + + /* + * Edges + */ + + call_enqueue -> queues_middle_sep [ lhead=cluster_policy_queues ]; + + low_queue:qr -> call_dequeue:w; + rt_queue:qr -> call_dequeue:w; + + call_dequeue -> as1 [lhead=cluster_runpool]; + + as1->call_jdequeue [ltail=cluster_runpool]; + call_jdequeue->jobslots:0; + call_jdequeue->sstop_dotfixup [ arrowhead=none]; + sstop_dotfixup->sstop [label="Spawn SS-Timer"]; + sstop->jobslots [label="SoftStop"]; + sstop->hstop [label="Spawn HS-Timer"]; + hstop->jobslots:ne [label="HardStop"]; + + + as3->call_ctxfinish:ne [ ltail=cluster_runpool ]; + call_ctxfinish:sw->rt_queue:qm [ lhead=cluster_policy_queues label="enqueue_ctx()\n/* ctx still has jobs */" ]; + + call_ctxfinish->call_ctxdone [constraint=false]; + + call_ctxdone->call_enqueue [weight=0.1 labeldistance=20.0 labelangle=0.0 taillabel="Job submitted to the ctx" style=dotted constraint=false]; + + + { + jobslots->irq [constraint=false]; + + irq->job_finish [constraint=false]; + } + + irq->as2 [lhead=cluster_runpool label="requeue_job()\n/* timeslice expired */" ]; + +} diff --git a/drivers/gpu/arm/midgard/docs/policy_overview.dot b/drivers/gpu/arm/midgard/docs/policy_overview.dot new file mode 100644 index 00000000000..159b993b7d6 --- /dev/null +++ b/drivers/gpu/arm/midgard/docs/policy_overview.dot @@ -0,0 +1,63 @@ +/* + * + * (C) COPYRIGHT 2010 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +digraph policy_objects_diagram { + rankdir=LR + size="6,6" + compound=true; + + node [ shape = box ]; + + call_enqueue [ shape=plaintext label="enqueue ctx" ]; + + + policy_queue [ label="Policy's Queue" ]; + + { + rank=same; + runpool [ label="Policy's Run Pool" ]; + + ctx_finish [ label="ctx finished" ]; + } + + { + rank=same; + jobslots [ shape=record label="Jobslots: | <0>js[0] | <1>js[1] | <2>js[2]" ]; + + job_finish [ label="Job finished" ]; + } + + + + /* + * Edges + */ + + call_enqueue -> policy_queue; + + policy_queue->runpool [label="dequeue ctx" weight=0.1]; + runpool->policy_queue [label="requeue ctx" weight=0.1]; + + runpool->ctx_finish [ style=dotted ]; + + runpool->jobslots [label="dequeue job" weight=0.1]; + jobslots->runpool [label="requeue job" weight=0.1]; + + jobslots->job_finish [ style=dotted ]; +} diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h new file mode 100644 index 00000000000..bec9a5acf5c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_features.h @@ -0,0 +1,163 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, + * please update base/tools/hwconfig_generator/hwc_{issues,features}.py + * For more information see base/tools/hwconfig_generator/README + */ + +#ifndef _BASE_HWCONFIG_FEATURES_H_ +#define _BASE_HWCONFIG_FEATURES_H_ + +enum base_hw_feature { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_33BIT_VA, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, + BASE_HW_FEATURE_IMAGES_IN_FRAGMENT_SHADERS, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_FLUSH_REDUCTION, + BASE_HW_FEATURE_V4, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_generic[] = { + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_t60x[] = { + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_V4, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_t62x[] = { + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_V4, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_t72x[] = { + BASE_HW_FEATURE_33BIT_VA, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_OPTIMIZED_COVERAGE_MASK, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_WORKGROUP_ROUND_MULTIPLE_OF_4, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_V4, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_t76x[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_tFxx[] = { + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_MSAA_16X, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_t83x[] = { + BASE_HW_FEATURE_33BIT_VA, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_END +}; + +static const enum base_hw_feature base_hw_features_t82x[] = { + BASE_HW_FEATURE_33BIT_VA, + BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION, + BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS, + BASE_HW_FEATURE_WARPING, + BASE_HW_FEATURE_INTERPIPE_REG_ALIASING, + BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS, + BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL, + BASE_HW_FEATURE_BRNDOUT_CC, + BASE_HW_FEATURE_BRNDOUT_KILL, + BASE_HW_FEATURE_LD_ST_LEA_TEX, + BASE_HW_FEATURE_LD_ST_TILEBUFFER, + BASE_HW_FEATURE_LINEAR_FILTER_FLOAT, + BASE_HW_FEATURE_MRT, + BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE, + BASE_HW_FEATURE_OUT_OF_ORDER_EXEC, + BASE_HW_FEATURE_T7XX_PAIRING_RULES, + BASE_HW_FEATURE_TEST4_DATUM_MODE, + BASE_HW_FEATURE_END +}; + +#endif /* _BASE_HWCONFIG_FEATURES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h new file mode 100644 index 00000000000..7bd30dd7278 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_hwconfig_issues.h @@ -0,0 +1,786 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, + * please update base/tools/hwconfig_generator/hwc_{issues,features}.py + * For more information see base/tools/hwconfig_generator/README + */ + +#ifndef _BASE_HWCONFIG_ISSUES_H_ +#define _BASE_HWCONFIG_ISSUES_H_ + +enum base_hw_issue { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_6367, + BASE_HW_ISSUE_6398, + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_6787, + BASE_HW_ISSUE_7027, + BASE_HW_ISSUE_7144, + BASE_HW_ISSUE_7304, + BASE_HW_ISSUE_8073, + BASE_HW_ISSUE_8186, + BASE_HW_ISSUE_8215, + BASE_HW_ISSUE_8245, + BASE_HW_ISSUE_8250, + BASE_HW_ISSUE_8260, + BASE_HW_ISSUE_8280, + BASE_HW_ISSUE_8316, + BASE_HW_ISSUE_8381, + BASE_HW_ISSUE_8394, + BASE_HW_ISSUE_8401, + BASE_HW_ISSUE_8408, + BASE_HW_ISSUE_8443, + BASE_HW_ISSUE_8456, + BASE_HW_ISSUE_8564, + BASE_HW_ISSUE_8634, + BASE_HW_ISSUE_8778, + BASE_HW_ISSUE_8791, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_8833, + BASE_HW_ISSUE_8879, + BASE_HW_ISSUE_8896, + BASE_HW_ISSUE_8975, + BASE_HW_ISSUE_8986, + BASE_HW_ISSUE_8987, + BASE_HW_ISSUE_9010, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9418, + BASE_HW_ISSUE_9423, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_9510, + BASE_HW_ISSUE_9566, + BASE_HW_ISSUE_9630, + BASE_HW_ISSUE_10127, + BASE_HW_ISSUE_10327, + BASE_HW_ISSUE_10410, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10487, + BASE_HW_ISSUE_10607, + BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10676, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10797, + BASE_HW_ISSUE_10817, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10959, + BASE_HW_ISSUE_10969, + BASE_HW_ISSUE_10984, + BASE_HW_ISSUE_10995, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_26, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3542, + BASE_HW_ISSUE_T76X_3556, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_generic[] = { + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t60x_r0p0_15dev0[] = { + BASE_HW_ISSUE_6367, + BASE_HW_ISSUE_6398, + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_6787, + BASE_HW_ISSUE_7027, + BASE_HW_ISSUE_7144, + BASE_HW_ISSUE_7304, + BASE_HW_ISSUE_8073, + BASE_HW_ISSUE_8186, + BASE_HW_ISSUE_8215, + BASE_HW_ISSUE_8245, + BASE_HW_ISSUE_8250, + BASE_HW_ISSUE_8260, + BASE_HW_ISSUE_8280, + BASE_HW_ISSUE_8316, + BASE_HW_ISSUE_8381, + BASE_HW_ISSUE_8394, + BASE_HW_ISSUE_8401, + BASE_HW_ISSUE_8408, + BASE_HW_ISSUE_8443, + BASE_HW_ISSUE_8456, + BASE_HW_ISSUE_8564, + BASE_HW_ISSUE_8634, + BASE_HW_ISSUE_8778, + BASE_HW_ISSUE_8791, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_8833, + BASE_HW_ISSUE_8896, + BASE_HW_ISSUE_8975, + BASE_HW_ISSUE_8986, + BASE_HW_ISSUE_8987, + BASE_HW_ISSUE_9010, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9418, + BASE_HW_ISSUE_9423, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_9510, + BASE_HW_ISSUE_9566, + BASE_HW_ISSUE_9630, + BASE_HW_ISSUE_10410, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10487, + BASE_HW_ISSUE_10607, + BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10676, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10969, + BASE_HW_ISSUE_10984, + BASE_HW_ISSUE_10995, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t60x_r0p0_eac[] = { + BASE_HW_ISSUE_6367, + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_6787, + BASE_HW_ISSUE_7027, + BASE_HW_ISSUE_7304, + BASE_HW_ISSUE_8408, + BASE_HW_ISSUE_8564, + BASE_HW_ISSUE_8778, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_8975, + BASE_HW_ISSUE_9010, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9418, + BASE_HW_ISSUE_9423, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_9510, + BASE_HW_ISSUE_10410, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10487, + BASE_HW_ISSUE_10607, + BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10676, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10969, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t60x_r0p1[] = { + BASE_HW_ISSUE_6367, + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_6787, + BASE_HW_ISSUE_7027, + BASE_HW_ISSUE_7304, + BASE_HW_ISSUE_8408, + BASE_HW_ISSUE_8564, + BASE_HW_ISSUE_8778, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_8975, + BASE_HW_ISSUE_9010, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_9510, + BASE_HW_ISSUE_10410, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10487, + BASE_HW_ISSUE_10607, + BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10676, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t62x_r0p1[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10127, + BASE_HW_ISSUE_10327, + BASE_HW_ISSUE_10410, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10487, + BASE_HW_ISSUE_10607, + BASE_HW_ISSUE_10632, + BASE_HW_ISSUE_10676, + BASE_HW_ISSUE_10682, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10817, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10959, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_11035, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t62x_r1p0[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10959, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t62x_r1p1[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_10959, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t76x_r0p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_26, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3542, + BASE_HW_ISSUE_T76X_3556, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t76x_r0p1[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_26, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3542, + BASE_HW_ISSUE_T76X_3556, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t76x_r0p1_50rel0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_26, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3542, + BASE_HW_ISSUE_T76X_3556, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t76x_r0p2[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_26, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3542, + BASE_HW_ISSUE_T76X_3556, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t76x_r0p3[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_26, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3542, + BASE_HW_ISSUE_T76X_3556, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t76x_r1p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t72x_r0p0[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10797, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t72x_r1p0[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10797, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t72x_r1p1[] = { + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10684, + BASE_HW_ISSUE_10797, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_t72x[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10471, + BASE_HW_ISSUE_10797, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_t76x[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_t60x[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_8778, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_t62x[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_6402, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10472, + BASE_HW_ISSUE_10931, + BASE_HW_ISSUE_11012, + BASE_HW_ISSUE_11020, + BASE_HW_ISSUE_11024, + BASE_HW_ISSUE_11042, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tFRx_r0p1[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tFRx_r0p2[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tFRx_r1p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_tFRx_r2p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_tFRx[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t86x_r0p2[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t86x_r1p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t86x_r2p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3966, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_t86x[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t83x_r0p1[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t83x_r1p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_t83x[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t82x_r0p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t82x_r0p1[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_t82x_r1p0[] = { + BASE_HW_ISSUE_8803, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_10821, + BASE_HW_ISSUE_10883, + BASE_HW_ISSUE_10946, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_T76X_3953, + BASE_HW_ISSUE_T76X_3960, + BASE_HW_ISSUE_END +}; + +static const enum base_hw_issue base_hw_issues_model_t82x[] = { + BASE_HW_ISSUE_5736, + BASE_HW_ISSUE_9275, + BASE_HW_ISSUE_9435, + BASE_HW_ISSUE_T76X_1909, + BASE_HW_ISSUE_T76X_1963, + BASE_HW_ISSUE_T76X_3086, + BASE_HW_ISSUE_T76X_3700, + BASE_HW_ISSUE_T76X_3793, + BASE_HW_ISSUE_END +}; + +#endif /* _BASE_HWCONFIG_ISSUES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_kernel.h b/drivers/gpu/arm/midgard/mali_base_kernel.h new file mode 100644 index 00000000000..21fe319b202 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_kernel.h @@ -0,0 +1,1613 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file + * Base structures shared with the kernel. + */ + +#ifndef _BASE_KERNEL_H_ +#define _BASE_KERNEL_H_ + +#ifndef __user +#define __user +#endif + +/* Support UK6 IOCTLS */ +#define BASE_LEGACY_UK6_SUPPORT 1 + +/* Support UK7 IOCTLS */ +/* NB: To support UK6 we also need to support UK7 */ +#define BASE_LEGACY_UK7_SUPPORT 1 + +typedef u64 base_mem_handle; + +#include "mali_base_mem_priv.h" +#include "mali_kbase_profiling_gator_api.h" + +/* + * Dependency stuff, keep it private for now. May want to expose it if + * we decide to make the number of semaphores a configurable + * option. + */ +#define BASE_JD_ATOM_COUNT 256 + +#define BASEP_JD_SEM_PER_WORD_LOG2 5 +#define BASEP_JD_SEM_PER_WORD (1 << BASEP_JD_SEM_PER_WORD_LOG2) +#define BASEP_JD_SEM_WORD_NR(x) ((x) >> BASEP_JD_SEM_PER_WORD_LOG2) +#define BASEP_JD_SEM_MASK_IN_WORD(x) (1 << ((x) & (BASEP_JD_SEM_PER_WORD - 1))) +#define BASEP_JD_SEM_ARRAY_SIZE BASEP_JD_SEM_WORD_NR(BASE_JD_ATOM_COUNT) + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 + +#define BASE_MAX_COHERENT_GROUPS 16 + +#if defined CDBG_ASSERT +#define LOCAL_ASSERT CDBG_ASSERT +#elif defined KBASE_DEBUG_ASSERT +#define LOCAL_ASSERT KBASE_DEBUG_ASSERT +#else +#error assert macro not defined! +#endif + +#if defined PAGE_MASK +#define LOCAL_PAGE_LSB ~PAGE_MASK +#else +#include <osu/mali_osu.h> + +#if defined OSU_CONFIG_CPU_PAGE_SIZE_LOG2 +#define LOCAL_PAGE_LSB ((1ul << OSU_CONFIG_CPU_PAGE_SIZE_LOG2) - 1) +#else +#error Failed to find page size +#endif +#endif + +/** 32/64-bit neutral way to represent pointers */ +typedef union kbase_pointer { + void __user *value; /**< client should store their pointers here */ + u32 compat_value; /**< 64-bit kernels should fetch value here when handling 32-bit clients */ + u64 sizer; /**< Force 64-bit storage for all clients regardless */ +} kbase_pointer; + +/** + * @addtogroup base_user_api User-side Base APIs + * @{ + */ + +/** + * @addtogroup base_user_api_memory User-side Base Memory APIs + * @{ + */ + +/** + * @brief Memory allocation, access/hint flags + * + * A combination of MEM_PROT/MEM_HINT flags must be passed to each allocator + * in order to determine the best cache policy. Some combinations are + * of course invalid (eg @c MEM_PROT_CPU_WR | @c MEM_HINT_CPU_RD), + * which defines a @a write-only region on the CPU side, which is + * heavily read by the CPU... + * Other flags are only meaningful to a particular allocator. + * More flags can be added to this list, as long as they don't clash + * (see ::BASE_MEM_FLAGS_NR_BITS for the number of the first free bit). + */ +typedef u32 base_mem_alloc_flags; + +/** + * @brief Memory allocation, access/hint flags + * + * See ::base_mem_alloc_flags. + * + */ +enum { +/* IN */ + BASE_MEM_PROT_CPU_RD = (1U << 0), /**< Read access CPU side */ + BASE_MEM_PROT_CPU_WR = (1U << 1), /**< Write access CPU side */ + BASE_MEM_PROT_GPU_RD = (1U << 2), /**< Read access GPU side */ + BASE_MEM_PROT_GPU_WR = (1U << 3), /**< Write access GPU side */ + BASE_MEM_PROT_GPU_EX = (1U << 4), /**< Execute allowed on the GPU + side */ + + /* BASE_MEM_HINT flags have been removed, but their values are reserved + * for backwards compatibility with older user-space drivers. The values + * can be re-used once support for r5p0 user-space drivers is removed, + * presumably in r7p0. + * + * RESERVED: (1U << 5) + * RESERVED: (1U << 6) + * RESERVED: (1U << 7) + * RESERVED: (1U << 8) + */ + + BASE_MEM_GROW_ON_GPF = (1U << 9), /**< Grow backing store on GPU + Page Fault */ + + BASE_MEM_COHERENT_SYSTEM = (1U << 10), /**< Page coherence Outer + shareable, if available */ + BASE_MEM_COHERENT_LOCAL = (1U << 11), /**< Page coherence Inner + shareable */ + BASE_MEM_CACHED_CPU = (1U << 12), /**< Should be cached on the + CPU */ + +/* IN/OUT */ + BASE_MEM_SAME_VA = (1U << 13), /**< Must have same VA on both the GPU + and the CPU */ +/* OUT */ + BASE_MEM_NEED_MMAP = (1U << 14), /**< Must call mmap to aquire a GPU + address for the alloc */ + +/* IN */ + BASE_MEM_COHERENT_SYSTEM_REQUIRED = (1U << 15), /**< Page coherence + Outer shareable, required. */ + BASE_MEM_SECURE = (1U << 16) /**< Secure memory */ + +}; + +/** + * @brief Number of bits used as flags for base memory management + * + * Must be kept in sync with the ::base_mem_alloc_flags flags + */ +#define BASE_MEM_FLAGS_NR_BITS 17 + +/** + * A mask for all output bits, excluding IN/OUT bits. + */ +#define BASE_MEM_FLAGS_OUTPUT_MASK BASE_MEM_NEED_MMAP + +/** + * A mask for all input bits, including IN/OUT bits. + */ +#define BASE_MEM_FLAGS_INPUT_MASK \ + (((1 << BASE_MEM_FLAGS_NR_BITS) - 1) & ~BASE_MEM_FLAGS_OUTPUT_MASK) + + +/** + * @brief Memory types supported by @a base_mem_import + * + * Each type defines what the supported handle type is. + * + * If any new type is added here ARM must be contacted + * to allocate a numeric value for it. + * Do not just add a new type without synchronizing with ARM + * as future releases from ARM might include other new types + * which could clash with your custom types. + */ +typedef enum base_mem_import_type { + BASE_MEM_IMPORT_TYPE_INVALID = 0, + /** UMP import. Handle type is ump_secure_id. */ + BASE_MEM_IMPORT_TYPE_UMP = 1, + /** UMM import. Handle type is a file descriptor (int) */ + BASE_MEM_IMPORT_TYPE_UMM = 2 +} base_mem_import_type; + +/* legacy API wrappers */ +#define base_tmem_import_type base_mem_import_type +#define BASE_TMEM_IMPORT_TYPE_INVALID BASE_MEM_IMPORT_TYPE_INVALID +#define BASE_TMEM_IMPORT_TYPE_UMP BASE_MEM_IMPORT_TYPE_UMP +#define BASE_TMEM_IMPORT_TYPE_UMM BASE_MEM_IMPORT_TYPE_UMM + +/** + * @brief Invalid memory handle type. + * Return value from functions returning @a base_mem_handle on error. + */ +#define BASE_MEM_INVALID_HANDLE (0ull << 12) +#define BASE_MEM_MMU_DUMP_HANDLE (1ull << 12) +#define BASE_MEM_TRACE_BUFFER_HANDLE (2ull << 12) +#define BASE_MEM_MAP_TRACKING_HANDLE (3ull << 12) +#define BASE_MEM_WRITE_ALLOC_PAGES_HANDLE (4ull << 12) +/* reserved handles ..-64<<PAGE_SHIFT> for future special handles */ +#define BASE_MEM_COOKIE_BASE (64ul << 12) +#define BASE_MEM_FIRST_FREE_ADDRESS ((BITS_PER_LONG << 12) + \ + BASE_MEM_COOKIE_BASE) + +/* Bit mask of cookies used for for memory allocation setup */ +#define KBASE_COOKIE_MASK ~1UL /* bit 0 is reserved */ + + +/** + * @brief Result codes of changing the size of the backing store allocated to a tmem region + */ +typedef enum base_backing_threshold_status { + BASE_BACKING_THRESHOLD_OK = 0, /**< Resize successful */ + BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE = -1, /**< Not a growable tmem object */ + BASE_BACKING_THRESHOLD_ERROR_OOM = -2, /**< Increase failed due to an out-of-memory condition */ + BASE_BACKING_THRESHOLD_ERROR_MAPPED = -3, /**< Resize attempted on buffer while it was mapped, which is not permitted */ + BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS = -4 /**< Invalid arguments (not tmem, illegal size request, etc.) */ +} base_backing_threshold_status; + +/** + * @addtogroup base_user_api_memory_defered User-side Base Defered Memory Coherency APIs + * @{ + */ + +/** + * @brief a basic memory operation (sync-set). + * + * The content of this structure is private, and should only be used + * by the accessors. + */ +typedef struct base_syncset { + struct basep_syncset basep_sset; +} base_syncset; + +/** @} end group base_user_api_memory_defered */ + +/** + * Handle to represent imported memory object. + * Simple opague handle to imported memory, can't be used + * with anything but base_external_resource_init to bind to an atom. + */ +typedef struct base_import_handle { + struct { + u64 handle; + } basep; +} base_import_handle; + +/** @} end group base_user_api_memory */ + +/** + * @addtogroup base_user_api_job_dispatch User-side Base Job Dispatcher APIs + * @{ + */ + +typedef int platform_fence_type; +#define INVALID_PLATFORM_FENCE ((platform_fence_type)-1) + +/** + * Base stream handle. + * + * References an underlying base stream object. + */ +typedef struct base_stream { + struct { + int fd; + } basep; +} base_stream; + +/** + * Base fence handle. + * + * References an underlying base fence object. + */ +typedef struct base_fence { + struct { + int fd; + int stream_fd; + } basep; +} base_fence; + +/** + * @brief Per-job data + * + * This structure is used to store per-job data, and is completly unused + * by the Base driver. It can be used to store things such as callback + * function pointer, data to handle job completion. It is guaranteed to be + * untouched by the Base driver. + */ +typedef struct base_jd_udata { + u64 blob[2]; /**< per-job data array */ +} base_jd_udata; + +/** + * @brief Memory aliasing info + * + * Describes a memory handle to be aliased. + * A subset of the handle can be chosen for aliasing, given an offset and a + * length. + * A special handle BASE_MEM_WRITE_ALLOC_PAGES_HANDLE is used to represent a + * region where a special page is mapped with a write-alloc cache setup, + * typically used when the write result of the GPU isn't needed, but the GPU + * must write anyway. + * + * Offset and length are specified in pages. + * Offset must be within the size of the handle. + * Offset+length must not overrun the size of the handle. + * + * @handle Handle to alias, can be BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * @offset Offset within the handle to start aliasing from, in pages. + * Not used with BASE_MEM_WRITE_ALLOC_PAGES_HANDLE. + * @length Length to alias, in pages. For BASE_MEM_WRITE_ALLOC_PAGES_HANDLE + * specifies the number of times the special page is needed. + */ +struct base_mem_aliasing_info { + base_mem_handle handle; + u64 offset; + u64 length; +}; + +/** + * @brief Job dependency type. + * + * A flags field will be inserted into the atom structure to specify whether a dependency is a data or + * ordering dependency (by putting it before/after 'core_req' in the structure it should be possible to add without + * changing the structure size). + * When the flag is set for a particular dependency to signal that it is an ordering only dependency then + * errors will not be propagated. + */ +typedef u8 base_jd_dep_type; + + +#define BASE_JD_DEP_TYPE_INVALID (0) /**< Invalid dependency */ +#define BASE_JD_DEP_TYPE_DATA (1U << 0) /**< Data dependency */ +#define BASE_JD_DEP_TYPE_ORDER (1U << 1) /**< Order dependency */ + +/** + * @brief Job chain hardware requirements. + * + * A job chain must specify what GPU features it needs to allow the + * driver to schedule the job correctly. By not specifying the + * correct settings can/will cause an early job termination. Multiple + * values can be ORed together to specify multiple requirements. + * Special case is ::BASE_JD_REQ_DEP, which is used to express complex + * dependencies, and that doesn't execute anything on the hardware. + */ +typedef u16 base_jd_core_req; + +/* Requirements that come from the HW */ +#define BASE_JD_REQ_DEP 0 /**< No requirement, dependency only */ +#define BASE_JD_REQ_FS (1U << 0) /**< Requires fragment shaders */ +/** + * Requires compute shaders + * This covers any of the following Midgard Job types: + * - Vertex Shader Job + * - Geometry Shader Job + * - An actual Compute Shader Job + * + * Compare this with @ref BASE_JD_REQ_ONLY_COMPUTE, which specifies that the + * job is specifically just the "Compute Shader" job type, and not the "Vertex + * Shader" nor the "Geometry Shader" job type. + */ +#define BASE_JD_REQ_CS (1U << 1) +#define BASE_JD_REQ_T (1U << 2) /**< Requires tiling */ +#define BASE_JD_REQ_CF (1U << 3) /**< Requires cache flushes */ +#define BASE_JD_REQ_V (1U << 4) /**< Requires value writeback */ + +/* SW-only requirements - the HW does not expose these as part of the job slot capabilities */ + +/* Requires fragment job with AFBC encoding */ +#define BASE_JD_REQ_FS_AFBC (1U << 13) + +/** + * SW Only requirement: the job chain requires a coherent core group. We don't + * mind which coherent core group is used. + */ +#define BASE_JD_REQ_COHERENT_GROUP (1U << 6) + +/** + * SW Only requirement: The performance counters should be enabled only when + * they are needed, to reduce power consumption. + */ + +#define BASE_JD_REQ_PERMON (1U << 7) + +/** + * SW Only requirement: External resources are referenced by this atom. + * When external resources are referenced no syncsets can be bundled with the atom + * but should instead be part of a NULL jobs inserted into the dependency tree. + * The first pre_dep object must be configured for the external resouces to use, + * the second pre_dep object can be used to create other dependencies. + */ +#define BASE_JD_REQ_EXTERNAL_RESOURCES (1U << 8) + +/** + * SW Only requirement: Software defined job. Jobs with this bit set will not be submitted + * to the hardware but will cause some action to happen within the driver + */ +#define BASE_JD_REQ_SOFT_JOB (1U << 9) + +#define BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME (BASE_JD_REQ_SOFT_JOB | 0x1) +#define BASE_JD_REQ_SOFT_FENCE_TRIGGER (BASE_JD_REQ_SOFT_JOB | 0x2) +#define BASE_JD_REQ_SOFT_FENCE_WAIT (BASE_JD_REQ_SOFT_JOB | 0x3) + +/** + * SW Only requirement : Replay job. + * + * If the preceeding job fails, the replay job will cause the jobs specified in + * the list of base_jd_replay_payload pointed to by the jc pointer to be + * replayed. + * + * A replay job will only cause jobs to be replayed up to BASEP_JD_REPLAY_LIMIT + * times. If a job fails more than BASEP_JD_REPLAY_LIMIT times then the replay + * job is failed, as well as any following dependencies. + * + * The replayed jobs will require a number of atom IDs. If there are not enough + * free atom IDs then the replay job will fail. + * + * If the preceeding job does not fail, then the replay job is returned as + * completed. + * + * The replayed jobs will never be returned to userspace. The preceeding failed + * job will be returned to userspace as failed; the status of this job should + * be ignored. Completion should be determined by the status of the replay soft + * job. + * + * In order for the jobs to be replayed, the job headers will have to be + * modified. The Status field will be reset to NOT_STARTED. If the Job Type + * field indicates a Vertex Shader Job then it will be changed to Null Job. + * + * The replayed jobs have the following assumptions : + * + * - No external resources. Any required external resources will be held by the + * replay atom. + * - Pre-dependencies are created based on job order. + * - Atom numbers are automatically assigned. + * - device_nr is set to 0. This is not relevant as + * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set. + * - Priority is inherited from the replay job. + */ +#define BASE_JD_REQ_SOFT_REPLAY (BASE_JD_REQ_SOFT_JOB | 0x4) + +/** + * HW Requirement: Requires Compute shaders (but not Vertex or Geometry Shaders) + * + * This indicates that the Job Chain contains Midgard Jobs of the 'Compute Shaders' type. + * + * In contrast to @ref BASE_JD_REQ_CS, this does \b not indicate that the Job + * Chain contains 'Geometry Shader' or 'Vertex Shader' jobs. + * + * @note This is a more flexible variant of the @ref BASE_CONTEXT_HINT_ONLY_COMPUTE flag, + * allowing specific jobs to be marked as 'Only Compute' instead of the entire context + */ +#define BASE_JD_REQ_ONLY_COMPUTE (1U << 10) + +/** + * HW Requirement: Use the base_jd_atom::device_nr field to specify a + * particular core group + * + * If both BASE_JD_REQ_COHERENT_GROUP and this flag are set, this flag takes priority + * + * This is only guaranteed to work for BASE_JD_REQ_ONLY_COMPUTE atoms. + * + * If the core availability policy is keeping the required core group turned off, then + * the job will fail with a BASE_JD_EVENT_PM_EVENT error code. + */ +#define BASE_JD_REQ_SPECIFIC_COHERENT_GROUP (1U << 11) + +/** + * SW Flag: If this bit is set then the successful completion of this atom + * will not cause an event to be sent to userspace + */ +#define BASE_JD_REQ_EVENT_ONLY_ON_FAILURE (1U << 12) + +/** + * SW Flag: If this bit is set then completion of this atom will not cause an + * event to be sent to userspace, whether successful or not. + */ +#define BASEP_JD_REQ_EVENT_NEVER (1U << 14) + +/** +* These requirement bits are currently unused in base_jd_core_req (currently a u16) +*/ + +#define BASEP_JD_REQ_RESERVED_BIT5 (1U << 5) +#define BASEP_JD_REQ_RESERVED_BIT15 (1U << 15) + +/** +* Mask of all the currently unused requirement bits in base_jd_core_req. +*/ + +#define BASEP_JD_REQ_RESERVED (BASEP_JD_REQ_RESERVED_BIT5 | \ + BASEP_JD_REQ_RESERVED_BIT15) + +/** + * Mask of all bits in base_jd_core_req that control the type of the atom. + * + * This allows dependency only atoms to have flags set + */ +#define BASEP_JD_REQ_ATOM_TYPE (~(BASEP_JD_REQ_RESERVED | BASE_JD_REQ_EVENT_ONLY_ON_FAILURE |\ + BASE_JD_REQ_EXTERNAL_RESOURCES | BASEP_JD_REQ_EVENT_NEVER)) + +/** + * @brief States to model state machine processed by kbasep_js_job_check_ref_cores(), which + * handles retaining cores for power management and affinity management. + * + * The state @ref KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY prevents an attack + * where lots of atoms could be submitted before powerup, and each has an + * affinity chosen that causes other atoms to have an affinity + * violation. Whilst the affinity was not causing violations at the time it + * was chosen, it could cause violations thereafter. For example, 1000 jobs + * could have had their affinity chosen during the powerup time, so any of + * those 1000 jobs could cause an affinity violation later on. + * + * The attack would otherwise occur because other atoms/contexts have to wait for: + * -# the currently running atoms (which are causing the violation) to + * finish + * -# and, the atoms that had their affinity chosen during powerup to + * finish. These are run preferrentially because they don't cause a + * violation, but instead continue to cause the violation in others. + * -# or, the attacker is scheduled out (which might not happen for just 2 + * contexts) + * + * By re-choosing the affinity (which is designed to avoid violations at the + * time it's chosen), we break condition (2) of the wait, which minimizes the + * problem to just waiting for current jobs to finish (which can be bounded if + * the Job Scheduling Policy has a timer). + */ +enum kbase_atom_coreref_state { + /** Starting state: No affinity chosen, and cores must be requested. kbase_jd_atom::affinity==0 */ + KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED, + /** Cores requested, but waiting for them to be powered. Requested cores given by kbase_jd_atom::affinity */ + KBASE_ATOM_COREREF_STATE_WAITING_FOR_REQUESTED_CORES, + /** Cores given by kbase_jd_atom::affinity are powered, but affinity might be out-of-date, so must recheck */ + KBASE_ATOM_COREREF_STATE_RECHECK_AFFINITY, + /** Cores given by kbase_jd_atom::affinity are powered, and affinity is up-to-date, but must check for violations */ + KBASE_ATOM_COREREF_STATE_CHECK_AFFINITY_VIOLATIONS, + /** Cores are powered, kbase_jd_atom::affinity up-to-date, no affinity violations: atom can be submitted to HW */ + KBASE_ATOM_COREREF_STATE_READY +}; + +/* + * Base Atom priority + * + * Only certain priority levels are actually implemented, as specified by the + * BASE_JD_PRIO_<...> definitions below. It is undefined to use a priority + * level that is not one of those defined below. + * + * Priority levels only affect scheduling between atoms of the same type within + * a base context, and only after the atoms have had dependencies resolved. + * Fragment atoms does not affect non-frament atoms with lower priorities, and + * the other way around. For example, a low priority atom that has had its + * dependencies resolved might run before a higher priority atom that has not + * had its dependencies resolved. + * + * The scheduling between base contexts/processes and between atoms from + * different base contexts/processes is unaffected by atom priority. + * + * The atoms are scheduled as follows with respect to their priorities: + * - Let atoms 'X' and 'Y' be for the same job slot who have dependencies + * resolved, and atom 'X' has a higher priority than atom 'Y' + * - If atom 'Y' is currently running on the HW, then it is interrupted to + * allow atom 'X' to run soon after + * - If instead neither atom 'Y' nor atom 'X' are running, then when choosing + * the next atom to run, atom 'X' will always be chosen instead of atom 'Y' + * - Any two atoms that have the same priority could run in any order with + * respect to each other. That is, there is no ordering constraint between + * atoms of the same priority. + */ +typedef u8 base_jd_prio; + +/* Medium atom priority. This is a priority higher than BASE_JD_PRIO_LOW */ +#define BASE_JD_PRIO_MEDIUM ((base_jd_prio)0) +/* High atom priority. This is a priority higher than BASE_JD_PRIO_MEDIUM and + * BASE_JD_PRIO_LOW */ +#define BASE_JD_PRIO_HIGH ((base_jd_prio)1) +/* Low atom priority. */ +#define BASE_JD_PRIO_LOW ((base_jd_prio)2) + +/* Count of the number of priority levels. This itself is not a valid + * base_jd_prio setting */ +#define BASE_JD_NR_PRIO_LEVELS 3 + +enum kbase_jd_atom_state { + /** Atom is not used */ + KBASE_JD_ATOM_STATE_UNUSED, + /** Atom is queued in JD */ + KBASE_JD_ATOM_STATE_QUEUED, + /** Atom has been given to JS (is runnable/running) */ + KBASE_JD_ATOM_STATE_IN_JS, + /** Atom has been completed, but not yet handed back to job dispatcher + * for dependency resolution */ + KBASE_JD_ATOM_STATE_HW_COMPLETED, + /** Atom has been completed, but not yet handed back to userspace */ + KBASE_JD_ATOM_STATE_COMPLETED +}; + +typedef u8 base_atom_id; /**< Type big enough to store an atom number in */ + +struct base_dependency { + base_atom_id atom_id; /**< An atom number */ + base_jd_dep_type dependency_type; /**< Dependency type */ +}; + +typedef struct base_jd_atom_v2 { + u64 jc; /**< job-chain GPU address */ + struct base_jd_udata udata; /**< user data */ + kbase_pointer extres_list; /**< list of external resources */ + u16 nr_extres; /**< nr of external resources */ + base_jd_core_req core_req; /**< core requirements */ + const struct base_dependency pre_dep[2]; /**< pre-dependencies, one need to use SETTER function to assign this field, + this is done in order to reduce possibility of improper assigment of a dependency field */ + base_atom_id atom_number; /**< unique number to identify the atom */ + base_jd_prio prio; /**< Atom priority. Refer to @ref base_jd_prio for more details */ + u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ + u8 padding[5]; +} base_jd_atom_v2; + +#ifdef BASE_LEGACY_UK6_SUPPORT +struct base_jd_atom_v2_uk6 { + u64 jc; /**< job-chain GPU address */ + struct base_jd_udata udata; /**< user data */ + kbase_pointer extres_list; /**< list of external resources */ + u16 nr_extres; /**< nr of external resources */ + base_jd_core_req core_req; /**< core requirements */ + base_atom_id pre_dep[2]; /**< pre-dependencies */ + base_atom_id atom_number; /**< unique number to identify the atom */ + base_jd_prio prio; /**< priority - smaller is higher priority */ + u8 device_nr; /**< coregroup when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP specified */ + u8 padding[7]; +}; +#endif + +typedef enum base_external_resource_access { + BASE_EXT_RES_ACCESS_SHARED, + BASE_EXT_RES_ACCESS_EXCLUSIVE +} base_external_resource_access; + +typedef struct base_external_resource { + u64 ext_resource; +} base_external_resource; + +/** + * @brief Setter for a dependency structure + * + * @param[in] dep The kbase jd atom dependency to be initialized. + * @param id The atom_id to be assigned. + * @param dep_type The dep_type to be assigned. + * + */ +static inline void base_jd_atom_dep_set(const struct base_dependency *const_dep, base_atom_id id, base_jd_dep_type dep_type) +{ + struct base_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + /* make sure we don't set not allowed combinations of atom_id/dependency_type */ + LOCAL_ASSERT((id == 0 && dep_type == BASE_JD_DEP_TYPE_INVALID) || + (id > 0 && dep_type != BASE_JD_DEP_TYPE_INVALID)); + + dep = (struct base_dependency *)const_dep; + + dep->atom_id = id; + dep->dependency_type = dep_type; +} + +/** + * @brief Make a copy of a dependency structure + * + * @param[in,out] dep The kbase jd atom dependency to be written. + * @param[in] from The dependency to make a copy from. + * + */ +static inline void base_jd_atom_dep_copy(const struct base_dependency *const_dep, const struct base_dependency *from) +{ + LOCAL_ASSERT(const_dep != NULL); + + base_jd_atom_dep_set(const_dep, from->atom_id, from->dependency_type); +} + +/** + * @brief Soft-atom fence trigger setup. + * + * Sets up an atom to be a SW-only atom signaling a fence + * when it reaches the run state. + * + * Using the existing base dependency system the fence can + * be set to trigger when a GPU job has finished. + * + * The base fence object must not be terminated until the atom + * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned. + * + * @a fence must be a valid fence set up with @a base_fence_init. + * Calling this function with a uninitialized fence results in undefined behavior. + * + * @param[out] atom A pre-allocated atom to configure as a fence trigger SW atom + * @param[in] fence The base fence object to trigger. + */ +static inline void base_jd_fence_trigger_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) +{ + LOCAL_ASSERT(atom); + LOCAL_ASSERT(fence); + LOCAL_ASSERT(fence->basep.fd == INVALID_PLATFORM_FENCE); + LOCAL_ASSERT(fence->basep.stream_fd >= 0); + atom->jc = (uintptr_t) fence; + atom->core_req = BASE_JD_REQ_SOFT_FENCE_TRIGGER; +} + +/** + * @brief Soft-atom fence wait setup. + * + * Sets up an atom to be a SW-only atom waiting on a fence. + * When the fence becomes triggered the atom becomes runnable + * and completes immediately. + * + * Using the existing base dependency system the fence can + * be set to block a GPU job until it has been triggered. + * + * The base fence object must not be terminated until the atom + * has been submitted to @a base_jd_submit_bag and @a base_jd_submit_bag has returned. + * + * @a fence must be a valid fence set up with @a base_fence_init or @a base_fence_import. + * Calling this function with a uninitialized fence results in undefined behavior. + * + * @param[out] atom A pre-allocated atom to configure as a fence wait SW atom + * @param[in] fence The base fence object to wait on + */ +static inline void base_jd_fence_wait_setup_v2(struct base_jd_atom_v2 *atom, struct base_fence *fence) +{ + LOCAL_ASSERT(atom); + LOCAL_ASSERT(fence); + LOCAL_ASSERT(fence->basep.fd >= 0); + atom->jc = (uintptr_t) fence; + atom->core_req = BASE_JD_REQ_SOFT_FENCE_WAIT; +} + +/** + * @brief External resource info initialization. + * + * Sets up a external resource object to reference + * a memory allocation and the type of access requested. + * + * @param[in] res The resource object to initialize + * @param handle The handle to the imported memory object + * @param access The type of access requested + */ +static inline void base_external_resource_init(struct base_external_resource *res, struct base_import_handle handle, base_external_resource_access access) +{ + u64 address; + + address = handle.basep.handle; + + LOCAL_ASSERT(res != NULL); + LOCAL_ASSERT(0 == (address & LOCAL_PAGE_LSB)); + LOCAL_ASSERT(access == BASE_EXT_RES_ACCESS_SHARED || access == BASE_EXT_RES_ACCESS_EXCLUSIVE); + + res->ext_resource = address | (access & LOCAL_PAGE_LSB); +} + +/** + * @brief Job chain event code bits + * Defines the bits used to create ::base_jd_event_code + */ +enum { + BASE_JD_SW_EVENT_KERNEL = (1u << 15), /**< Kernel side event */ + BASE_JD_SW_EVENT = (1u << 14), /**< SW defined event */ + BASE_JD_SW_EVENT_SUCCESS = (1u << 13), /**< Event idicates success (SW events only) */ + BASE_JD_SW_EVENT_JOB = (0u << 11), /**< Job related event */ + BASE_JD_SW_EVENT_BAG = (1u << 11), /**< Bag related event */ + BASE_JD_SW_EVENT_INFO = (2u << 11), /**< Misc/info event */ + BASE_JD_SW_EVENT_RESERVED = (3u << 11), /**< Reserved event type */ + BASE_JD_SW_EVENT_TYPE_MASK = (3u << 11) /**< Mask to extract the type from an event code */ +}; + +/** + * @brief Job chain event codes + * + * HW and low-level SW events are represented by event codes. + * The status of jobs which succeeded are also represented by + * an event code (see ::BASE_JD_EVENT_DONE). + * Events are usually reported as part of a ::base_jd_event. + * + * The event codes are encoded in the following way: + * @li 10:0 - subtype + * @li 12:11 - type + * @li 13 - SW success (only valid if the SW bit is set) + * @li 14 - SW event (HW event if not set) + * @li 15 - Kernel event (should never be seen in userspace) + * + * Events are split up into ranges as follows: + * - BASE_JD_EVENT_RANGE_\<description\>_START + * - BASE_JD_EVENT_RANGE_\<description\>_END + * + * \a code is in \<description\>'s range when: + * - <tt>BASE_JD_EVENT_RANGE_\<description\>_START <= code < BASE_JD_EVENT_RANGE_\<description\>_END </tt> + * + * Ranges can be asserted for adjacency by testing that the END of the previous + * is equal to the START of the next. This is useful for optimizing some tests + * for range. + * + * A limitation is that the last member of this enum must explicitly be handled + * (with an assert-unreachable statement) in switch statements that use + * variables of this type. Otherwise, the compiler warns that we have not + * handled that enum value. + */ +typedef enum base_jd_event_code { + /* HW defined exceptions */ + + /** Start of HW Non-fault status codes + * + * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, + * because the job was hard-stopped + */ + BASE_JD_EVENT_RANGE_HW_NONFAULT_START = 0, + + /* non-fatal exceptions */ + BASE_JD_EVENT_NOT_STARTED = 0x00, /**< Can't be seen by userspace, treated as 'previous job done' */ + BASE_JD_EVENT_DONE = 0x01, + BASE_JD_EVENT_STOPPED = 0x03, /**< Can't be seen by userspace, becomes TERMINATED, DONE or JOB_CANCELLED */ + BASE_JD_EVENT_TERMINATED = 0x04, /**< This is actually a fault status code - the job was hard stopped */ + BASE_JD_EVENT_ACTIVE = 0x08, /**< Can't be seen by userspace, jobs only returned on complete/fail/cancel */ + + /** End of HW Non-fault status codes + * + * @note Obscurely, BASE_JD_EVENT_TERMINATED indicates a real fault, + * because the job was hard-stopped + */ + BASE_JD_EVENT_RANGE_HW_NONFAULT_END = 0x40, + + /** Start of HW fault and SW Error status codes */ + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_START = 0x40, + + /* job exceptions */ + BASE_JD_EVENT_JOB_CONFIG_FAULT = 0x40, + BASE_JD_EVENT_JOB_POWER_FAULT = 0x41, + BASE_JD_EVENT_JOB_READ_FAULT = 0x42, + BASE_JD_EVENT_JOB_WRITE_FAULT = 0x43, + BASE_JD_EVENT_JOB_AFFINITY_FAULT = 0x44, + BASE_JD_EVENT_JOB_BUS_FAULT = 0x48, + BASE_JD_EVENT_INSTR_INVALID_PC = 0x50, + BASE_JD_EVENT_INSTR_INVALID_ENC = 0x51, + BASE_JD_EVENT_INSTR_TYPE_MISMATCH = 0x52, + BASE_JD_EVENT_INSTR_OPERAND_FAULT = 0x53, + BASE_JD_EVENT_INSTR_TLS_FAULT = 0x54, + BASE_JD_EVENT_INSTR_BARRIER_FAULT = 0x55, + BASE_JD_EVENT_INSTR_ALIGN_FAULT = 0x56, + BASE_JD_EVENT_DATA_INVALID_FAULT = 0x58, + BASE_JD_EVENT_TILE_RANGE_FAULT = 0x59, + BASE_JD_EVENT_STATE_FAULT = 0x5A, + BASE_JD_EVENT_OUT_OF_MEMORY = 0x60, + BASE_JD_EVENT_UNKNOWN = 0x7F, + + /* GPU exceptions */ + BASE_JD_EVENT_DELAYED_BUS_FAULT = 0x80, + BASE_JD_EVENT_SHAREABILITY_FAULT = 0x88, + + /* MMU exceptions */ + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL1 = 0xC1, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL2 = 0xC2, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL3 = 0xC3, + BASE_JD_EVENT_TRANSLATION_FAULT_LEVEL4 = 0xC4, + BASE_JD_EVENT_PERMISSION_FAULT = 0xC8, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL1 = 0xD1, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL2 = 0xD2, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL3 = 0xD3, + BASE_JD_EVENT_TRANSTAB_BUS_FAULT_LEVEL4 = 0xD4, + BASE_JD_EVENT_ACCESS_FLAG = 0xD8, + + /* SW defined exceptions */ + BASE_JD_EVENT_MEM_GROWTH_FAILED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_TIMED_OUT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x001, + BASE_JD_EVENT_JOB_CANCELLED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x002, + BASE_JD_EVENT_JOB_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x003, + BASE_JD_EVENT_PM_EVENT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x004, + BASE_JD_EVENT_FORCE_REPLAY = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_JOB | 0x005, + + BASE_JD_EVENT_BAG_INVALID = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_BAG | 0x003, + + /** End of HW fault and SW Error status codes */ + BASE_JD_EVENT_RANGE_HW_FAULT_OR_SW_ERROR_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + /** Start of SW Success status codes */ + BASE_JD_EVENT_RANGE_SW_SUCCESS_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | 0x000, + + BASE_JD_EVENT_PROGRESS_REPORT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_JOB | 0x000, + BASE_JD_EVENT_BAG_DONE = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_BAG | 0x000, + BASE_JD_EVENT_DRV_TERMINATED = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_INFO | 0x000, + + /** End of SW Success status codes */ + BASE_JD_EVENT_RANGE_SW_SUCCESS_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_SUCCESS | BASE_JD_SW_EVENT_RESERVED | 0x3FF, + + /** Start of Kernel-only status codes. Such codes are never returned to user-space */ + BASE_JD_EVENT_RANGE_KERNEL_ONLY_START = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | 0x000, + BASE_JD_EVENT_REMOVED_FROM_NEXT = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_JOB | 0x000, + + /** End of Kernel-only status codes. */ + BASE_JD_EVENT_RANGE_KERNEL_ONLY_END = BASE_JD_SW_EVENT | BASE_JD_SW_EVENT_KERNEL | BASE_JD_SW_EVENT_RESERVED | 0x3FF +} base_jd_event_code; + +/** + * @brief Event reporting structure + * + * This structure is used by the kernel driver to report information + * about GPU events. The can either be HW-specific events or low-level + * SW events, such as job-chain completion. + * + * The event code contains an event type field which can be extracted + * by ANDing with ::BASE_JD_SW_EVENT_TYPE_MASK. + * + * Based on the event type base_jd_event::data holds: + * @li ::BASE_JD_SW_EVENT_JOB : the offset in the ring-buffer for the completed + * job-chain + * @li ::BASE_JD_SW_EVENT_BAG : The address of the ::base_jd_bag that has + * been completed (ie all contained job-chains have been completed). + * @li ::BASE_JD_SW_EVENT_INFO : base_jd_event::data not used + */ +typedef struct base_jd_event_v2 { + base_jd_event_code event_code; /**< event code */ + base_atom_id atom_number; /**< the atom number that has completed */ + struct base_jd_udata udata; /**< user data */ +} base_jd_event_v2; + +/** + * Padding required to ensure that the @ref struct base_dump_cpu_gpu_counters structure fills + * a full cache line. + */ + +#define BASE_CPU_GPU_CACHE_LINE_PADDING (36) + + +/** + * @brief Structure for BASE_JD_REQ_SOFT_DUMP_CPU_GPU_COUNTERS jobs. + * + * This structure is stored into the memory pointed to by the @c jc field of @ref base_jd_atom. + * + * This structure must be padded to ensure that it will occupy whole cache lines. This is to avoid + * cases where access to pages containing the structure is shared between cached and un-cached + * memory regions, which would cause memory corruption. Here we set the structure size to be 64 bytes + * which is the cache line for ARM A15 processors. + */ + +typedef struct base_dump_cpu_gpu_counters { + u64 system_time; + u64 cycle_counter; + u64 sec; + u32 usec; + u8 padding[BASE_CPU_GPU_CACHE_LINE_PADDING]; +} base_dump_cpu_gpu_counters; + + + +/** @} end group base_user_api_job_dispatch */ + +#ifdef __KERNEL__ +/* + * The following typedefs should be removed when a midg types header is added. + * See MIDCOM-1657 for details. + */ +typedef u32 gpu_product_id; +typedef u32 gpu_cache_features; +typedef u32 gpu_tiler_features; +typedef u32 gpu_mem_features; +typedef u32 gpu_mmu_features; +typedef u32 gpu_js_features; +typedef u32 gpu_as_present; +typedef u32 gpu_js_present; + +#define GPU_MAX_JOB_SLOTS 16 + +#else +#include <gpu/mali_gpu_registers.h> +#include <gpu/mali_gpu_props.h> +#endif + +/** + * @page page_base_user_api_gpuprops User-side Base GPU Property Query API + * + * The User-side Base GPU Property Query API encapsulates two + * sub-modules: + * + * - @ref base_user_api_gpuprops_dyn "Dynamic GPU Properties" + * - @ref base_plat_config_gpuprops "Base Platform Config GPU Properties" + * + * There is a related third module outside of Base, which is owned by the MIDG + * module: + * - @ref gpu_props_static "Midgard Compile-time GPU Properties" + * + * Base only deals with properties that vary between different Midgard + * implementations - the Dynamic GPU properties and the Platform Config + * properties. + * + * For properties that are constant for the Midgard Architecture, refer to the + * MIDG module. However, we will discuss their relevance here <b>just to + * provide background information.</b> + * + * @section sec_base_user_api_gpuprops_about About the GPU Properties in Base and MIDG modules + * + * The compile-time properties (Platform Config, Midgard Compile-time + * properties) are exposed as pre-processor macros. + * + * Complementing the compile-time properties are the Dynamic GPU + * Properties, which act as a conduit for the Midgard Configuration + * Discovery. + * + * In general, the dynamic properties are present to verify that the platform + * has been configured correctly with the right set of Platform Config + * Compile-time Properties. + * + * As a consistant guide across the entire DDK, the choice for dynamic or + * compile-time should consider the following, in order: + * -# Can the code be written so that it doesn't need to know the + * implementation limits at all? + * -# If you need the limits, get the information from the Dynamic Property + * lookup. This should be done once as you fetch the context, and then cached + * as part of the context data structure, so it's cheap to access. + * -# If there's a clear and arguable inefficiency in using Dynamic Properties, + * then use a Compile-Time Property (Platform Config, or Midgard Compile-time + * property). Examples of where this might be sensible follow: + * - Part of a critical inner-loop + * - Frequent re-use throughout the driver, causing significant extra load + * instructions or control flow that would be worthwhile optimizing out. + * + * We cannot provide an exhaustive set of examples, neither can we provide a + * rule for every possible situation. Use common sense, and think about: what + * the rest of the driver will be doing; how the compiler might represent the + * value if it is a compile-time constant; whether an OEM shipping multiple + * devices would benefit much more from a single DDK binary, instead of + * insignificant micro-optimizations. + * + * @section sec_base_user_api_gpuprops_dyn Dynamic GPU Properties + * + * Dynamic GPU properties are presented in two sets: + * -# the commonly used properties in @ref base_gpu_props, which have been + * unpacked from GPU register bitfields. + * -# The full set of raw, unprocessed properties in @ref gpu_raw_gpu_props + * (also a member of @ref base_gpu_props). All of these are presented in + * the packed form, as presented by the GPU registers themselves. + * + * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to + * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device + * behaving differently?". In this case, all information about the + * configuration is potentially useful, but it <b>does not need to be processed + * by the driver</b>. Instead, the raw registers can be processed by the Mali + * Tools software on the host PC. + * + * The properties returned extend the Midgard Configuration Discovery + * registers. For example, GPU clock speed is not specified in the Midgard + * Architecture, but is <b>necessary for OpenCL's clGetDeviceInfo() function</b>. + * + * The GPU properties are obtained by a call to + * _mali_base_get_gpu_props(). This simply returns a pointer to a const + * base_gpu_props structure. It is constant for the life of a base + * context. Multiple calls to _mali_base_get_gpu_props() to a base context + * return the same pointer to a constant structure. This avoids cache pollution + * of the common data. + * + * This pointer must not be freed, because it does not point to the start of a + * region allocated by the memory allocator; instead, just close the @ref + * base_context. + * + * + * @section sec_base_user_api_gpuprops_config Platform Config Compile-time Properties + * + * The Platform Config File sets up gpu properties that are specific to a + * certain platform. Properties that are 'Implementation Defined' in the + * Midgard Architecture spec are placed here. + * + * @note Reference configurations are provided for Midgard Implementations, such as + * the Mali-T600 family. The customer need not repeat this information, and can select one of + * these reference configurations. For example, VA_BITS, PA_BITS and the + * maximum number of samples per pixel might vary between Midgard Implementations, but + * \b not for platforms using the Mali-T604. This information is placed in + * the reference configuration files. + * + * The System Integrator creates the following structure: + * - platform_XYZ + * - platform_XYZ/plat + * - platform_XYZ/plat/plat_config.h + * + * They then edit plat_config.h, using the example plat_config.h files as a + * guide. + * + * At the very least, the customer must set @ref CONFIG_GPU_CORE_TYPE, and will + * receive a helpful \#error message if they do not do this correctly. This + * selects the Reference Configuration for the Midgard Implementation. The rationale + * behind this decision (against asking the customer to write \#include + * <gpus/mali_t600.h> in their plat_config.h) is as follows: + * - This mechanism 'looks' like a regular config file (such as Linux's + * .config) + * - It is difficult to get wrong in a way that will produce strange build + * errors: + * - They need not know where the mali_t600.h, other_midg_gpu.h etc. files are stored - and + * so they won't accidentally pick another file with 'mali_t600' in its name + * - When the build doesn't work, the System Integrator may think the DDK is + * doesn't work, and attempt to fix it themselves: + * - For the @ref CONFIG_GPU_CORE_TYPE mechanism, the only way to get past the + * error is to set @ref CONFIG_GPU_CORE_TYPE, and this is what the \#error tells + * you. + * - For a \#include mechanism, checks must still be made elsewhere, which the + * System Integrator may try working around by setting \#defines (such as + * VA_BITS) themselves in their plat_config.h. In the worst case, they may + * set the prevention-mechanism \#define of + * "A_CORRECT_MIDGARD_CORE_WAS_CHOSEN". + * - In this case, they would believe they are on the right track, because + * the build progresses with their fix, but with errors elsewhere. + * + * However, there is nothing to prevent the customer using \#include to organize + * their own configurations files hierarchically. + * + * The mechanism for the header file processing is as follows: + * + * @dot + digraph plat_config_mechanism { + rankdir=BT + size="6,6" + + "mali_base.h"; + "gpu/mali_gpu.h"; + + node [ shape=box ]; + { + rank = same; ordering = out; + + "gpu/mali_gpu_props.h"; + "base/midg_gpus/mali_t600.h"; + "base/midg_gpus/other_midg_gpu.h"; + } + { rank = same; "plat/plat_config.h"; } + { + rank = same; + "gpu/mali_gpu.h" [ shape=box ]; + gpu_chooser [ label="" style="invisible" width=0 height=0 fixedsize=true ]; + select_gpu [ label="Mali-T600 | Other\n(select_gpu.h)" shape=polygon,sides=4,distortion=0.25 width=3.3 height=0.99 fixedsize=true ] ; + } + node [ shape=box ]; + { rank = same; "plat/plat_config.h"; } + { rank = same; "mali_base.h"; } + + "mali_base.h" -> "gpu/mali_gpu.h" -> "gpu/mali_gpu_props.h"; + "mali_base.h" -> "plat/plat_config.h" ; + "mali_base.h" -> select_gpu ; + + "plat/plat_config.h" -> gpu_chooser [style="dotted,bold" dir=none weight=4] ; + gpu_chooser -> select_gpu [style="dotted,bold"] ; + + select_gpu -> "base/midg_gpus/mali_t600.h" ; + select_gpu -> "base/midg_gpus/other_midg_gpu.h" ; + } + @enddot + * + * + * @section sec_base_user_api_gpuprops_kernel Kernel Operation + * + * During Base Context Create time, user-side makes a single kernel call: + * - A call to fill user memory with GPU information structures + * + * The kernel-side will fill the provided the entire processed @ref base_gpu_props + * structure, because this information is required in both + * user and kernel side; it does not make sense to decode it twice. + * + * Coherency groups must be derived from the bitmasks, but this can be done + * kernel side, and just once at kernel startup: Coherency groups must already + * be known kernel-side, to support chains that specify a 'Only Coherent Group' + * SW requirement, or 'Only Coherent Group with Tiler' SW requirement. + * + * @section sec_base_user_api_gpuprops_cocalc Coherency Group calculation + * Creation of the coherent group data is done at device-driver startup, and so + * is one-time. This will most likely involve a loop with CLZ, shifting, and + * bit clearing on the L2_PRESENT mask, depending on whether the + * system is L2 Coherent. The number of shader cores is done by a + * population count, since faulty cores may be disabled during production, + * producing a non-contiguous mask. + * + * The memory requirements for this algoirthm can be determined either by a u64 + * population count on the L2_PRESENT mask (a LUT helper already is + * requried for the above), or simple assumption that there can be no more than + * 16 coherent groups, since core groups are typically 4 cores. + */ + +/** + * @addtogroup base_user_api_gpuprops User-side Base GPU Property Query APIs + * @{ + */ + +/** + * @addtogroup base_user_api_gpuprops_dyn Dynamic HW Properties + * @{ + */ + +#define BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS 3 + +#define BASE_MAX_COHERENT_GROUPS 16 + +struct mali_base_gpu_core_props { + /** + * Product specific value. + */ + gpu_product_id product_id; + + /** + * Status of the GPU release. + * No defined values, but starts at 0 and increases by one for each release + * status (alpha, beta, EAC, etc.). + * 4 bit values (0-15). + */ + u16 version_status; + + /** + * Minor release number of the GPU. "P" part of an "RnPn" release number. + * 8 bit values (0-255). + */ + u16 minor_revision; + + /** + * Major release number of the GPU. "R" part of an "RnPn" release number. + * 4 bit values (0-15). + */ + u16 major_revision; + + u16 padding; + + /** + * @usecase GPU clock speed is not specified in the Midgard Architecture, but is + * <b>necessary for OpenCL's clGetDeviceInfo() function</b>. + */ + u32 gpu_speed_mhz; + + /** + * @usecase GPU clock max/min speed is required for computing best/worst case + * in tasks as job scheduling ant irq_throttling. (It is not specified in the + * Midgard Architecture). + */ + u32 gpu_freq_khz_max; + u32 gpu_freq_khz_min; + + /** + * Size of the shader program counter, in bits. + */ + u32 log2_program_counter_size; + + /** + * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a + * bitpattern where a set bit indicates that the format is supported. + * + * Before using a texture format, it is recommended that the corresponding + * bit be checked. + */ + u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + + /** + * Theoretical maximum memory available to the GPU. It is unlikely that a + * client will be able to allocate all of this memory for their own + * purposes, but this at least provides an upper bound on the memory + * available to the GPU. + * + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The + * client will not be expecting to allocate anywhere near this value. + */ + u64 gpu_available_memory_size; +}; + +/** + * + * More information is possible - but associativity and bus width are not + * required by upper-level apis. + */ +struct mali_base_gpu_l2_cache_props { + u8 log2_line_size; + u8 log2_cache_size; + u8 num_l2_slices; /* Number of L2C slices. 1 or higher */ + u8 padding[5]; +}; + +struct mali_base_gpu_tiler_props { + u32 bin_size_bytes; /* Max is 4*2^15 */ + u32 max_active_levels; /* Max is 2^15 */ +}; + +/** + * GPU threading system details. + */ +struct mali_base_gpu_thread_props { + u32 max_threads; /* Max. number of threads per core */ + u32 max_workgroup_size; /* Max. number of threads per workgroup */ + u32 max_barrier_size; /* Max. number of threads that can synchronize on a simple barrier */ + u16 max_registers; /* Total size [1..65535] of the register file available per core. */ + u8 max_task_queue; /* Max. tasks [1..255] which may be sent to a core before it becomes blocked. */ + u8 max_thread_group_split; /* Max. allowed value [1..15] of the Thread Group Split field. */ + u8 impl_tech; /* 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */ + u8 padding[7]; +}; + +/** + * @brief descriptor for a coherent group + * + * \c core_mask exposes all cores in that coherent group, and \c num_cores + * provides a cached population-count for that mask. + * + * @note Whilst all cores are exposed in the mask, not all may be available to + * the application, depending on the Kernel Power policy. + * + * @note if u64s must be 8-byte aligned, then this structure has 32-bits of wastage. + */ +struct mali_base_gpu_coherent_group { + u64 core_mask; /**< Core restriction mask required for the group */ + u16 num_cores; /**< Number of cores in the group */ + u16 padding[3]; +}; + +/** + * @brief Coherency group information + * + * Note that the sizes of the members could be reduced. However, the \c group + * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte + * aligned, thus leading to wastage if the other members sizes were reduced. + * + * The groups are sorted by core mask. The core masks are non-repeating and do + * not intersect. + */ +struct mali_base_gpu_coherent_group_info { + u32 num_groups; + + /** + * Number of core groups (coherent or not) in the GPU. Equivalent to the number of L2 Caches. + * + * The GPU Counter dumping writes 2048 bytes per core group, regardless of + * whether the core groups are coherent or not. Hence this member is needed + * to calculate how much memory is required for dumping. + * + * @note Do not use it to work out how many valid elements are in the + * group[] member. Use num_groups instead. + */ + u32 num_core_groups; + + /** + * Coherency features of the memory, accessed by @ref gpu_mem_features + * methods + */ + gpu_mem_features coherency; + + u32 padding; + + /** + * Descriptors of coherent groups + */ + struct mali_base_gpu_coherent_group group[BASE_MAX_COHERENT_GROUPS]; +}; + +/** + * A complete description of the GPU's Hardware Configuration Discovery + * registers. + * + * The information is presented inefficiently for access. For frequent access, + * the values should be better expressed in an unpacked form in the + * base_gpu_props structure. + * + * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to + * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device + * behaving differently?". In this case, all information about the + * configuration is potentially useful, but it <b>does not need to be processed + * by the driver</b>. Instead, the raw registers can be processed by the Mali + * Tools software on the host PC. + * + */ +struct gpu_raw_gpu_props { + u64 shader_present; + u64 tiler_present; + u64 l2_present; + u64 unused_1; /* keep for backward compatibility */ + + gpu_cache_features l2_features; + u32 suspend_size; /* API 8.2+ */ + gpu_mem_features mem_features; + gpu_mmu_features mmu_features; + + gpu_as_present as_present; + + u32 js_present; + gpu_js_features js_features[GPU_MAX_JOB_SLOTS]; + gpu_tiler_features tiler_features; + u32 texture_features[3]; + + u32 gpu_id; + + u32 thread_max_threads; + u32 thread_max_workgroup_size; + u32 thread_max_barrier_size; + u32 thread_features; + + u32 coherency_features; +}; + +/** + * Return structure for _mali_base_get_gpu_props(). + * + * NOTE: the raw_props member in this datastructure contains the register + * values from which the value of the other members are derived. The derived + * members exist to allow for efficient access and/or shielding the details + * of the layout of the registers. + * + */ +typedef struct mali_base_gpu_props { + struct mali_base_gpu_core_props core_props; + struct mali_base_gpu_l2_cache_props l2_props; + u64 unused_1; /* keep for backwards compatibility */ + struct mali_base_gpu_tiler_props tiler_props; + struct mali_base_gpu_thread_props thread_props; + + /** This member is large, likely to be 128 bytes */ + struct gpu_raw_gpu_props raw_props; + + /** This must be last member of the structure */ + struct mali_base_gpu_coherent_group_info coherency_info; +} base_gpu_props; + +/** @} end group base_user_api_gpuprops_dyn */ + +/** @} end group base_user_api_gpuprops */ + +/** + * @addtogroup base_user_api_core User-side Base core APIs + * @{ + */ + +/** + * \enum base_context_create_flags + * + * Flags to pass to ::base_context_init. + * Flags can be ORed together to enable multiple things. + * + * These share the same space as @ref basep_context_private_flags, and so must + * not collide with them. + */ +enum base_context_create_flags { + /** No flags set */ + BASE_CONTEXT_CREATE_FLAG_NONE = 0, + + /** Base context is embedded in a cctx object (flag used for CINSTR software counter macros) */ + BASE_CONTEXT_CCTX_EMBEDDED = (1u << 0), + + /** Base context is a 'System Monitor' context for Hardware counters. + * + * One important side effect of this is that job submission is disabled. */ + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED = (1u << 1), + + /** Base context flag indicating a 'hint' that this context uses Compute + * Jobs only. + * + * Specifially, this means that it only sends atoms that <b>do not</b> + * contain the following @ref base_jd_core_req : + * - BASE_JD_REQ_FS + * - BASE_JD_REQ_T + * + * Violation of these requirements will cause the Job-Chains to be rejected. + * + * In addition, it is inadvisable for the atom's Job-Chains to contain Jobs + * of the following @ref gpu_job_type (whilst it may work now, it may not + * work in future) : + * - @ref GPU_JOB_VERTEX + * - @ref GPU_JOB_GEOMETRY + * + * @note An alternative to using this is to specify the BASE_JD_REQ_ONLY_COMPUTE + * requirement in atoms. + */ + BASE_CONTEXT_HINT_ONLY_COMPUTE = (1u << 2) +}; + +/** + * Bitpattern describing the ::base_context_create_flags that can be passed to base_context_init() + */ +#define BASE_CONTEXT_CREATE_ALLOWED_FLAGS \ + (((u32)BASE_CONTEXT_CCTX_EMBEDDED) | \ + ((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \ + ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE)) + +/** + * Bitpattern describing the ::base_context_create_flags that can be passed to the kernel + */ +#define BASE_CONTEXT_CREATE_KERNEL_FLAGS \ + (((u32)BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) | \ + ((u32)BASE_CONTEXT_HINT_ONLY_COMPUTE)) + +/** + * Private flags used on the base context + * + * These start at bit 31, and run down to zero. + * + * They share the same space as @ref base_context_create_flags, and so must + * not collide with them. + */ +enum basep_context_private_flags { + /** Private flag tracking whether job descriptor dumping is disabled */ + BASEP_CONTEXT_FLAG_JOB_DUMP_DISABLED = (1 << 31) +}; + +/** @} end group base_user_api_core */ + +/** @} end group base_user_api */ + +/** + * @addtogroup base_plat_config_gpuprops Base Platform Config GPU Properties + * @{ + * + * C Pre-processor macros are exposed here to do with Platform + * Config. + * + * These include: + * - GPU Properties that are constant on a particular Midgard Family + * Implementation e.g. Maximum samples per pixel on Mali-T600. + * - General platform config for the GPU, such as the GPU major and minor + * revison. + */ + +/** @} end group base_plat_config_gpuprops */ + +/** + * @addtogroup base_api Base APIs + * @{ + */ + +/** + * @brief The payload for a replay job. This must be in GPU memory. + */ +typedef struct base_jd_replay_payload { + /** + * Pointer to the first entry in the base_jd_replay_jc list. These + * will be replayed in @b reverse order (so that extra ones can be added + * to the head in future soft jobs without affecting this soft job) + */ + u64 tiler_jc_list; + + /** + * Pointer to the fragment job chain. + */ + u64 fragment_jc; + + /** + * Pointer to the tiler heap free FBD field to be modified. + */ + u64 tiler_heap_free; + + /** + * Hierarchy mask for the replayed fragment jobs. May be zero. + */ + u16 fragment_hierarchy_mask; + + /** + * Hierarchy mask for the replayed tiler jobs. May be zero. + */ + u16 tiler_hierarchy_mask; + + /** + * Default weight to be used for hierarchy levels not in the original + * mask. + */ + u32 hierarchy_default_weight; + + /** + * Core requirements for the tiler job chain + */ + base_jd_core_req tiler_core_req; + + /** + * Core requirements for the fragment job chain + */ + base_jd_core_req fragment_core_req; + + u8 padding[4]; +} base_jd_replay_payload; + +/** + * @brief An entry in the linked list of job chains to be replayed. This must + * be in GPU memory. + */ +typedef struct base_jd_replay_jc { + /** + * Pointer to next entry in the list. A setting of NULL indicates the + * end of the list. + */ + u64 next; + + /** + * Pointer to the job chain. + */ + u64 jc; + +} base_jd_replay_jc; + +/* Maximum number of jobs allowed in a fragment chain in the payload of a + * replay job */ +#define BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT 256 + +/** @} end group base_api */ + +typedef struct base_profiling_controls { + u32 profiling_controls[FBDUMP_CONTROL_MAX]; +} base_profiling_controls; + +#endif /* _BASE_KERNEL_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_kernel_sync.h b/drivers/gpu/arm/midgard/mali_base_kernel_sync.h new file mode 100644 index 00000000000..a24791f4af7 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_kernel_sync.h @@ -0,0 +1,47 @@ +/* + * + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file + * Base cross-proccess sync API. + */ + +#ifndef _BASE_KERNEL_SYNC_H_ +#define _BASE_KERNEL_SYNC_H_ + +#include <linux/ioctl.h> + +#define STREAM_IOC_MAGIC '~' + +/* Fence insert. + * + * Inserts a fence on the stream operated on. + * Fence can be waited via a base fence wait soft-job + * or triggered via a base fence trigger soft-job. + * + * Fences must be cleaned up with close when no longer needed. + * + * No input/output arguments. + * Returns + * >=0 fd + * <0 error code + */ +#define STREAM_IOC_FENCE_INSERT _IO(STREAM_IOC_MAGIC, 0) + +#endif /* _BASE_KERNEL_SYNC_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_base_mem_priv.h b/drivers/gpu/arm/midgard/mali_base_mem_priv.h new file mode 100644 index 00000000000..4a98a72cc37 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_mem_priv.h @@ -0,0 +1,52 @@ +/* + * + * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _BASE_MEM_PRIV_H_ +#define _BASE_MEM_PRIV_H_ + +#define BASE_SYNCSET_OP_MSYNC (1U << 0) +#define BASE_SYNCSET_OP_CSYNC (1U << 1) + +/* + * This structure describe a basic memory coherency operation. + * It can either be: + * @li a sync from CPU to Memory: + * - type = ::BASE_SYNCSET_OP_MSYNC + * - mem_handle = a handle to the memory object on which the operation + * is taking place + * - user_addr = the address of the range to be synced + * - size = the amount of data to be synced, in bytes + * - offset is ignored. + * @li a sync from Memory to CPU: + * - type = ::BASE_SYNCSET_OP_CSYNC + * - mem_handle = a handle to the memory object on which the operation + * is taking place + * - user_addr = the address of the range to be synced + * - size = the amount of data to be synced, in bytes. + * - offset is ignored. + */ +struct basep_syncset { + base_mem_handle mem_handle; + u64 user_addr; + u64 size; + u8 type; + u8 padding[7]; +}; + +#endif diff --git a/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h new file mode 100644 index 00000000000..be454a216a3 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_base_vendor_specific_func.h @@ -0,0 +1,24 @@ +/* + * + * (C) COPYRIGHT 2010, 2012-2013, 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +#ifndef _BASE_VENDOR_SPEC_FUNC_H_ +#define _BASE_VENDOR_SPEC_FUNC_H_ + +int kbase_get_vendor_specific_cpu_clock_speed(u32 * const); + +#endif /*_BASE_VENDOR_SPEC_FUNC_H_*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase.h b/drivers/gpu/arm/midgard/mali_kbase.h new file mode 100644 index 00000000000..be3527820fa --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase.h @@ -0,0 +1,499 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_H_ +#define _KBASE_H_ + +#include <mali_malisw.h> + +#include <mali_kbase_debug.h> + +#include <asm/page.h> + +#include <linux/atomic.h> +#include <linux/highmem.h> +#include <linux/hrtimer.h> +#include <linux/ktime.h> +#include <linux/list.h> +#include <linux/mm_types.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/vmalloc.h> +#include <linux/wait.h> +#include <linux/workqueue.h> + +#include "mali_base_kernel.h" +#include <mali_kbase_uku.h> +#include <mali_kbase_linux.h> + +#include "mali_kbase_pm.h" +#include "mali_kbase_mem_lowlevel.h" +#include "mali_kbase_defs.h" +#include "mali_kbase_trace_timeline.h" +#include "mali_kbase_js.h" +#include "mali_kbase_mem.h" +#include "mali_kbase_security.h" +#include "mali_kbase_utility.h" +#include "mali_kbase_gpu_memory_debugfs.h" +#include "mali_kbase_mem_profile_debugfs.h" +#include "mali_kbase_jd_debugfs.h" +#include "mali_kbase_cpuprops.h" +#include "mali_kbase_gpuprops.h" +#include "mali_kbase_jm.h" +#include "mali_kbase_vinstr.h" +#ifdef CONFIG_GPU_TRACEPOINTS +#include <trace/events/gpu.h> +#endif +/** + * @page page_base_kernel_main Kernel-side Base (KBase) APIs + * + * The Kernel-side Base (KBase) APIs are divided up as follows: + * - @subpage page_kbase_js_policy + */ + +/** + * @defgroup base_kbase_api Kernel-side Base (KBase) APIs + */ + +struct kbase_device *kbase_device_alloc(void); +/* +* note: configuration attributes member of kbdev needs to have +* been setup before calling kbase_device_init +*/ + +/* +* API to acquire device list semaphone and return pointer +* to the device list head +*/ +const struct list_head *kbase_dev_list_get(void); +/* API to release the device list semaphore */ +void kbase_dev_list_put(const struct list_head *dev_list); + +int kbase_device_init(struct kbase_device * const kbdev); +void kbase_device_term(struct kbase_device *kbdev); +void kbase_device_free(struct kbase_device *kbdev); +int kbase_device_has_feature(struct kbase_device *kbdev, u32 feature); + +/* Needed for gator integration and for reporting vsync information */ +struct kbase_device *kbase_find_device(int minor); +void kbase_release_device(struct kbase_device *kbdev); + +void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value); + +u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control); + +struct kbase_context * +kbase_create_context(struct kbase_device *kbdev, bool is_compat); +void kbase_destroy_context(struct kbase_context *kctx); +int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags); + +int kbase_jd_init(struct kbase_context *kctx); +void kbase_jd_exit(struct kbase_context *kctx); +#ifdef BASE_LEGACY_UK6_SUPPORT +int kbase_jd_submit(struct kbase_context *kctx, + const struct kbase_uk_job_submit *submit_data, + int uk6_atom); +#else +int kbase_jd_submit(struct kbase_context *kctx, + const struct kbase_uk_job_submit *submit_data); +#endif +void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timestamp, + kbasep_js_atom_done_code done_code); +void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom); +void kbase_jd_zap_context(struct kbase_context *kctx); +bool jd_done_nolock(struct kbase_jd_atom *katom); +void kbase_jd_free_external_resources(struct kbase_jd_atom *katom); +bool jd_submit_atom(struct kbase_context *kctx, + const struct base_jd_atom_v2 *user_atom, + struct kbase_jd_atom *katom); + +void kbase_job_done(struct kbase_device *kbdev, u32 done); + +/** + * kbase_job_slot_ctx_priority_check_locked(): - Check for lower priority atoms + * and soft stop them + * @kctx: Pointer to context to check. + * @katom: Pointer to priority atom. + * + * Atoms from @kctx on the same job slot as @katom, which have lower priority + * than @katom will be soft stopped and put back in the queue, so that atoms + * with higher priority can run. + * + * The js_data.runpool_irq.lock must be held when calling this function. + */ +void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +void kbase_job_slot_softstop(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom); +void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js, + struct kbase_jd_atom *target_katom, u32 sw_flags); +void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom); +void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action, + u16 core_reqs, struct kbase_jd_atom *target_katom); +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom); + +void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *event); +int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent); +int kbase_event_pending(struct kbase_context *ctx); +int kbase_event_init(struct kbase_context *kctx); +void kbase_event_close(struct kbase_context *kctx); +void kbase_event_cleanup(struct kbase_context *kctx); +void kbase_event_wakeup(struct kbase_context *kctx); + +int kbase_process_soft_job(struct kbase_jd_atom *katom); +int kbase_prepare_soft_job(struct kbase_jd_atom *katom); +void kbase_finish_soft_job(struct kbase_jd_atom *katom); +void kbase_cancel_soft_job(struct kbase_jd_atom *katom); +void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev); + +bool kbase_replay_process(struct kbase_jd_atom *katom); + +/* api used internally for register access. Contains validation and tracing */ +void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value); +void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size); +void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx); + +/* api to be ported per OS, only need to do the raw register access */ +void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value); +u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset); + + +void kbasep_as_do_poke(struct work_struct *work); + +/** Returns the name associated with a Mali exception code + * + * This function is called from the interrupt handler when a GPU fault occurs. + * It reports the details of the fault using KBASE_DEBUG_PRINT_WARN. + * + * @param[in] kbdev The kbase device that the GPU fault occurred from. + * @param[in] exception_code exception code + * @return name associated with the exception code + */ +const char *kbase_exception_name(struct kbase_device *kbdev, + u32 exception_code); + +/** + * Check whether a system suspend is in progress, or has already been suspended + * + * The caller should ensure that either kbdev->pm.active_count_lock is held, or + * a dmb was executed recently (to ensure the value is most + * up-to-date). However, without a lock the value could change afterwards. + * + * @return false if a suspend is not in progress + * @return !=false otherwise + */ +static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) +{ + return kbdev->pm.suspending; +} + +/** + * Return the atom's ID, as was originally supplied by userspace in + * base_jd_atom_v2::atom_number + */ +static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + int result; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->kctx == kctx); + + result = katom - &kctx->jctx.atoms[0]; + KBASE_DEBUG_ASSERT(result >= 0 && result <= BASE_JD_ATOM_COUNT); + return result; +} + +/** + * kbase_jd_atom_from_id - Return the atom structure for the given atom ID + * @kctx: Context pointer + * @id: ID of atom to retrieve + * + * Return: Pointer to struct kbase_jd_atom associated with the supplied ID + */ +static inline struct kbase_jd_atom *kbase_jd_atom_from_id( + struct kbase_context *kctx, int id) +{ + return &kctx->jctx.atoms[id]; +} + +/** + * Initialize the disjoint state + * + * The disjoint event count and state are both set to zero. + * + * Disjoint functions usage: + * + * The disjoint event count should be incremented whenever a disjoint event occurs. + * + * There are several cases which are regarded as disjoint behavior. Rather than just increment + * the counter during disjoint events we also increment the counter when jobs may be affected + * by what the GPU is currently doing. To facilitate this we have the concept of disjoint state. + * + * Disjoint state is entered during GPU reset and for the entire time that an atom is replaying + * (as part of the replay workaround). Increasing the disjoint state also increases the count of + * disjoint events. + * + * The disjoint state is then used to increase the count of disjoint events during job submission + * and job completion. Any atom submitted or completed while the disjoint state is greater than + * zero is regarded as a disjoint event. + * + * The disjoint event counter is also incremented immediately whenever a job is soft stopped + * and during context creation. + * + * @param kbdev The kbase device + */ +void kbase_disjoint_init(struct kbase_device *kbdev); + +/** + * Increase the count of disjoint events + * called when a disjoint event has happened + * + * @param kbdev The kbase device + */ +void kbase_disjoint_event(struct kbase_device *kbdev); + +/** + * Increase the count of disjoint events only if the GPU is in a disjoint state + * + * This should be called when something happens which could be disjoint if the GPU + * is in a disjoint state. The state refcount keeps track of this. + * + * @param kbdev The kbase device + */ +void kbase_disjoint_event_potential(struct kbase_device *kbdev); + +/** + * Returns the count of disjoint events + * + * @param kbdev The kbase device + * @return the count of disjoint events + */ +u32 kbase_disjoint_event_get(struct kbase_device *kbdev); + +/** + * Increment the refcount state indicating that the GPU is in a disjoint state. + * + * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) + * eventually after the disjoint state has completed @ref kbase_disjoint_state_down + * should be called + * + * @param kbdev The kbase device + */ +void kbase_disjoint_state_up(struct kbase_device *kbdev); + +/** + * Decrement the refcount state + * + * Also Increment the disjoint event count (calls @ref kbase_disjoint_event) + * + * Called after @ref kbase_disjoint_state_up once the disjoint state is over + * + * @param kbdev The kbase device + */ +void kbase_disjoint_state_down(struct kbase_device *kbdev); + +/** + * If a job is soft stopped and the number of contexts is >= this value + * it is reported as a disjoint event + */ +#define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2 + +#if KBASE_TRACE_ENABLE +void kbasep_trace_debugfs_init(struct kbase_device *kbdev); + +#ifndef CONFIG_MALI_SYSTEM_TRACE +/** Add trace values about a job-slot + * + * @note Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any + * functions called to get the parameters supplied to this macro must: + * - be static or static inline + * - must just return 0 and have no other statements present in the body. + */ +#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot) \ + kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ + KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, 0) + +/** Add trace values about a job-slot, with info + * + * @note Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any + * functions called to get the parameters supplied to this macro must: + * - be static or static inline + * - must just return 0 and have no other statements present in the body. + */ +#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val) \ + kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ + KBASE_TRACE_FLAG_JOBSLOT, 0, jobslot, info_val) + +/** Add trace values about a ctx refcount + * + * @note Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any + * functions called to get the parameters supplied to this macro must: + * - be static or static inline + * - must just return 0 and have no other statements present in the body. + */ +#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount) \ + kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ + KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, 0) +/** Add trace values about a ctx refcount, and info + * + * @note Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any + * functions called to get the parameters supplied to this macro must: + * - be static or static inline + * - must just return 0 and have no other statements present in the body. + */ +#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val) \ + kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ + KBASE_TRACE_FLAG_REFCOUNT, refcount, 0, info_val) + +/** Add trace values (no slot or refcount) + * + * @note Any functions called through this macro will still be evaluated in + * Release builds (CONFIG_MALI_DEBUG not defined). Therefore, when KBASE_TRACE_ENABLE == 0 any + * functions called to get the parameters supplied to this macro must: + * - be static or static inline + * - must just return 0 and have no other statements present in the body. + */ +#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val) \ + kbasep_trace_add(kbdev, KBASE_TRACE_CODE(code), ctx, katom, gpu_addr, \ + 0, 0, 0, info_val) + +/** Clear the trace */ +#define KBASE_TRACE_CLEAR(kbdev) \ + kbasep_trace_clear(kbdev) + +/** Dump the slot trace */ +#define KBASE_TRACE_DUMP(kbdev) \ + kbasep_trace_dump(kbdev) + +/** PRIVATE - do not use directly. Use KBASE_TRACE_ADD() instead */ +void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val); +/** PRIVATE - do not use directly. Use KBASE_TRACE_CLEAR() instead */ +void kbasep_trace_clear(struct kbase_device *kbdev); +#else /* #ifndef CONFIG_MALI_SYSTEM_TRACE */ +/* Dispatch kbase trace events as system trace events */ +#include <mali_linux_kbase_trace.h> +#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\ + trace_mali_##code(jobslot, 0) + +#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ + trace_mali_##code(jobslot, info_val) + +#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ + trace_mali_##code(refcount, 0) + +#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ + trace_mali_##code(refcount, info_val) + +#define KBASE_TRACE_ADD(kbdev, code, ctx, katom, gpu_addr, info_val)\ + trace_mali_##code(gpu_addr, info_val) + +#define KBASE_TRACE_CLEAR(kbdev)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(0);\ + } while (0) +#define KBASE_TRACE_DUMP(kbdev)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(0);\ + } while (0) + +#endif /* #ifndef CONFIG_MALI_SYSTEM_TRACE */ +#else +#define KBASE_TRACE_ADD_SLOT(kbdev, code, ctx, katom, gpu_addr, jobslot)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(ctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + } while (0) + +#define KBASE_TRACE_ADD_SLOT_INFO(kbdev, code, ctx, katom, gpu_addr, jobslot, info_val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(ctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(jobslot);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_TRACE_ADD_REFCOUNT(kbdev, code, ctx, katom, gpu_addr, refcount)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(ctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(refcount);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_TRACE_ADD_REFCOUNT_INFO(kbdev, code, ctx, katom, gpu_addr, refcount, info_val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(ctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(gpu_addr);\ + CSTD_UNUSED(info_val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_TRACE_ADD(kbdev, code, subcode, ctx, katom, val)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(code);\ + CSTD_UNUSED(subcode);\ + CSTD_UNUSED(ctx);\ + CSTD_UNUSED(katom);\ + CSTD_UNUSED(val);\ + CSTD_NOP(0);\ + } while (0) + +#define KBASE_TRACE_CLEAR(kbdev)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(0);\ + } while (0) +#define KBASE_TRACE_DUMP(kbdev)\ + do {\ + CSTD_UNUSED(kbdev);\ + CSTD_NOP(0);\ + } while (0) +#endif /* KBASE_TRACE_ENABLE */ +/** PRIVATE - do not use directly. Use KBASE_TRACE_DUMP() instead */ +void kbasep_trace_dump(struct kbase_device *kbdev); +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c new file mode 100644 index 00000000000..933aa285469 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.c @@ -0,0 +1,209 @@ +/* + * + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + +#include <linux/dma-mapping.h> +#include <mali_kbase.h> +#include <mali_kbase_10969_workaround.h> + +/* This function is used to solve an HW issue with single iterator GPUs. + * If a fragment job is soft-stopped on the edge of its bounding box, can happen that the + * restart index is out of bounds and the rerun causes a tile range fault. If this happens + * we try to clamp the restart index to a correct value and rerun the job. + */ +/* Mask of X and Y coordinates for the coordinates words in the descriptors*/ +#define X_COORDINATE_MASK 0x00000FFF +#define Y_COORDINATE_MASK 0x0FFF0000 +/* Max number of words needed from the fragment shader job descriptor */ +#define JOB_HEADER_SIZE_IN_WORDS 10 +#define JOB_HEADER_SIZE (JOB_HEADER_SIZE_IN_WORDS*sizeof(u32)) + +/* Word 0: Status Word */ +#define JOB_DESC_STATUS_WORD 0 +/* Word 1: Restart Index */ +#define JOB_DESC_RESTART_INDEX_WORD 1 +/* Word 2: Fault address low word */ +#define JOB_DESC_FAULT_ADDR_LOW_WORD 2 +/* Word 8: Minimum Tile Coordinates */ +#define FRAG_JOB_DESC_MIN_TILE_COORD_WORD 8 +/* Word 9: Maximum Tile Coordinates */ +#define FRAG_JOB_DESC_MAX_TILE_COORD_WORD 9 + +int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom) +{ + struct device *dev = katom->kctx->kbdev->dev; + u32 clamped = 0; + struct kbase_va_region *region; + phys_addr_t *page_array; + u64 page_index; + u32 offset = katom->jc & (~PAGE_MASK); + u32 *page_1 = NULL; + u32 *page_2 = NULL; + u32 job_header[JOB_HEADER_SIZE_IN_WORDS]; + void *dst = job_header; + u32 minX, minY, maxX, maxY; + u32 restartX, restartY; + struct page *p; + u32 copy_size; + + dev_warn(dev, "Called TILE_RANGE_FAULT workaround clamping function.\n"); + if (!(katom->core_req & BASE_JD_REQ_FS)) + return 0; + + kbase_gpu_vm_lock(katom->kctx); + region = kbase_region_tracker_find_region_enclosing_address(katom->kctx, + katom->jc); + if (!region || (region->flags & KBASE_REG_FREE)) + goto out_unlock; + + page_array = kbase_get_cpu_phy_pages(region); + if (!page_array) + goto out_unlock; + + page_index = (katom->jc >> PAGE_SHIFT) - region->start_pfn; + + p = pfn_to_page(PFN_DOWN(page_array[page_index])); + + /* we need the first 10 words of the fragment shader job descriptor. + * We need to check that the offset + 10 words is less that the page + * size otherwise we need to load the next page. + * page_size_overflow will be equal to 0 in case the whole descriptor + * is within the page > 0 otherwise. + */ + copy_size = MIN(PAGE_SIZE - offset, JOB_HEADER_SIZE); + + page_1 = kmap_atomic(p); + + /* page_1 is a u32 pointer, offset is expressed in bytes */ + page_1 += offset>>2; + + kbase_sync_single_for_cpu(katom->kctx->kbdev, + kbase_dma_addr(p) + offset, + copy_size, DMA_BIDIRECTIONAL); + + memcpy(dst, page_1, copy_size); + + /* The data needed overflows page the dimension, + * need to map the subsequent page */ + if (copy_size < JOB_HEADER_SIZE) { + p = pfn_to_page(PFN_DOWN(page_array[page_index + 1])); + page_2 = kmap_atomic(p); + + kbase_sync_single_for_cpu(katom->kctx->kbdev, + kbase_dma_addr(p), + JOB_HEADER_SIZE - copy_size, DMA_BIDIRECTIONAL); + + memcpy(dst + copy_size, page_2, JOB_HEADER_SIZE - copy_size); + } + + /* We managed to correctly map one or two pages (in case of overflow) */ + /* Get Bounding Box data and restart index from fault address low word */ + minX = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & X_COORDINATE_MASK; + minY = job_header[FRAG_JOB_DESC_MIN_TILE_COORD_WORD] & Y_COORDINATE_MASK; + maxX = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & X_COORDINATE_MASK; + maxY = job_header[FRAG_JOB_DESC_MAX_TILE_COORD_WORD] & Y_COORDINATE_MASK; + restartX = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & X_COORDINATE_MASK; + restartY = job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] & Y_COORDINATE_MASK; + + dev_warn(dev, "Before Clamping:\n" + "Jobstatus: %08x\n" + "restartIdx: %08x\n" + "Fault_addr_low: %08x\n" + "minCoordsX: %08x minCoordsY: %08x\n" + "maxCoordsX: %08x maxCoordsY: %08x\n", + job_header[JOB_DESC_STATUS_WORD], + job_header[JOB_DESC_RESTART_INDEX_WORD], + job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], + minX, minY, + maxX, maxY); + + /* Set the restart index to the one which generated the fault*/ + job_header[JOB_DESC_RESTART_INDEX_WORD] = + job_header[JOB_DESC_FAULT_ADDR_LOW_WORD]; + + if (restartX < minX) { + job_header[JOB_DESC_RESTART_INDEX_WORD] = (minX) | restartY; + dev_warn(dev, + "Clamping restart X index to minimum. %08x clamped to %08x\n", + restartX, minX); + clamped = 1; + } + if (restartY < minY) { + job_header[JOB_DESC_RESTART_INDEX_WORD] = (minY) | restartX; + dev_warn(dev, + "Clamping restart Y index to minimum. %08x clamped to %08x\n", + restartY, minY); + clamped = 1; + } + if (restartX > maxX) { + job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxX) | restartY; + dev_warn(dev, + "Clamping restart X index to maximum. %08x clamped to %08x\n", + restartX, maxX); + clamped = 1; + } + if (restartY > maxY) { + job_header[JOB_DESC_RESTART_INDEX_WORD] = (maxY) | restartX; + dev_warn(dev, + "Clamping restart Y index to maximum. %08x clamped to %08x\n", + restartY, maxY); + clamped = 1; + } + + if (clamped) { + /* Reset the fault address low word + * and set the job status to STOPPED */ + job_header[JOB_DESC_FAULT_ADDR_LOW_WORD] = 0x0; + job_header[JOB_DESC_STATUS_WORD] = BASE_JD_EVENT_STOPPED; + dev_warn(dev, "After Clamping:\n" + "Jobstatus: %08x\n" + "restartIdx: %08x\n" + "Fault_addr_low: %08x\n" + "minCoordsX: %08x minCoordsY: %08x\n" + "maxCoordsX: %08x maxCoordsY: %08x\n", + job_header[JOB_DESC_STATUS_WORD], + job_header[JOB_DESC_RESTART_INDEX_WORD], + job_header[JOB_DESC_FAULT_ADDR_LOW_WORD], + minX, minY, + maxX, maxY); + + /* Flush CPU cache to update memory for future GPU reads*/ + memcpy(page_1, dst, copy_size); + p = pfn_to_page(PFN_DOWN(page_array[page_index])); + + kbase_sync_single_for_device(katom->kctx->kbdev, + kbase_dma_addr(p) + offset, + copy_size, DMA_TO_DEVICE); + + if (copy_size < JOB_HEADER_SIZE) { + memcpy(page_2, dst + copy_size, + JOB_HEADER_SIZE - copy_size); + p = pfn_to_page(PFN_DOWN(page_array[page_index + 1])); + + kbase_sync_single_for_device(katom->kctx->kbdev, + kbase_dma_addr(p), + JOB_HEADER_SIZE - copy_size, + DMA_TO_DEVICE); + } + } + if (copy_size < JOB_HEADER_SIZE) + kunmap_atomic(page_2); + + kunmap_atomic(page_1); + +out_unlock: + kbase_gpu_vm_unlock(katom->kctx); + return clamped; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h new file mode 100644 index 00000000000..099a2986167 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_10969_workaround.h @@ -0,0 +1,23 @@ +/* + * + * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_10969_WORKAROUND_ +#define _KBASE_10969_WORKAROUND_ + +int kbasep_10969_workaround_clamp_coordinates(struct kbase_jd_atom *katom); + +#endif /* _KBASE_10969_WORKAROUND_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c new file mode 100644 index 00000000000..51f934404e5 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.c @@ -0,0 +1,61 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Cache Policy API. + */ + +#include "mali_kbase_cache_policy.h" + +/* + * The output flags should be a combination of the following values: + * KBASE_REG_CPU_CACHED: CPU cache should be enabled. + */ +u32 kbase_cache_enabled(u32 flags, u32 nr_pages) +{ + u32 cache_flags = 0; + + CSTD_UNUSED(nr_pages); + +#ifdef CONFIG_MALI_CACHE_COHERENT + /* Cache is completely coherent at hardware level. So always allocate + * cached memory. + */ + cache_flags |= KBASE_REG_CPU_CACHED; +#else + if (flags & BASE_MEM_CACHED_CPU) + cache_flags |= KBASE_REG_CPU_CACHED; +#endif /* (CONFIG_MALI_CACHE_COHERENT) */ + + return cache_flags; +} + + +void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + dma_sync_single_for_device(kbdev->dev, handle, size, dir); +} + + +void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(kbdev->dev, handle, size, dir); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h new file mode 100644 index 00000000000..d2487fdf21d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_cache_policy.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Cache Policy API. + */ + +#ifndef _KBASE_CACHE_POLICY_H_ +#define _KBASE_CACHE_POLICY_H_ + +#include "mali_kbase.h" +#include "mali_base_kernel.h" +#include "mali_kbase_device_internal.h" +/** + * kbase_cache_enabled - Choose the cache policy for a specific region + * @flags: flags describing attributes of the region + * @nr_pages: total number of pages (backed or not) for the region + * + * Tells whether the CPU and GPU caches should be enabled or not for a specific + * region. + * This function can be modified to customize the cache policy depending on the + * flags and size of the region. + * + * Return: a combination of %KBASE_REG_CPU_CACHED and %KBASE_REG_GPU_CACHED + * depending on the cache policy + */ +u32 kbase_cache_enabled(u32 flags, u32 nr_pages); + +#endif /* _KBASE_CACHE_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.c b/drivers/gpu/arm/midgard/mali_kbase_config.c new file mode 100644 index 00000000000..fb615ae02ea --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_config.c @@ -0,0 +1,51 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include <mali_kbase.h> +#include <mali_kbase_defs.h> +#include <mali_kbase_config_defaults.h> + +int kbasep_platform_device_init(struct kbase_device *kbdev) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_init_func) + return platform_funcs_p->platform_init_func(kbdev); + + return 0; +} + +void kbasep_platform_device_term(struct kbase_device *kbdev) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf *)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_term_func) + platform_funcs_p->platform_term_func(kbdev); +} + +int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed) +{ + KBASE_DEBUG_ASSERT(NULL != clock_speed); + + *clock_speed = 100; + return 0; +} + diff --git a/drivers/gpu/arm/midgard/mali_kbase_config.h b/drivers/gpu/arm/midgard/mali_kbase_config.h new file mode 100644 index 00000000000..5fb226dfd68 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_config.h @@ -0,0 +1,327 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_config.h + * Configuration API and Attributes for KBase + */ + +#ifndef _KBASE_CONFIG_H_ +#define _KBASE_CONFIG_H_ + +#include <asm/page.h> + +#include <mali_malisw.h> +#include <mali_kbase_backend_config.h> + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_config Configuration API and Attributes + * @{ + */ + +#if !MALI_CUSTOMER_RELEASE +/* This flag is set for internal builds so we can run tests without credentials. */ +#define KBASE_HWCNT_DUMP_BYPASS_ROOT 1 +#else +#define KBASE_HWCNT_DUMP_BYPASS_ROOT 0 +#endif + +#include <linux/rbtree.h> + +/* Forward declaration of struct kbase_device */ +struct kbase_device; + +/** + * kbase_platform_funcs_conf - Specifies platform init/term function pointers + * + * Specifies the functions pointers for platform specific initialization and + * termination. By default no functions are required. No additional platform + * specific control is necessary. + */ +struct kbase_platform_funcs_conf { + /** + * platform_init_func - platform specific init function pointer + * @kbdev - kbase_device pointer + * + * Returns 0 on success, negative error code otherwise. + * + * Function pointer for platform specific initialization or NULL if no + * initialization function is required. This function will be called + * before any other callbacks listed in the struct kbase_attribute + * struct (such as Power Management callbacks). + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed (and possibly initialized) in here. + */ + int (*platform_init_func)(struct kbase_device *kbdev); + /** + * platform_term_func - platform specific termination function pointer + * @kbdev - kbase_device pointer + * + * Function pointer for platform specific termination or NULL if no + * termination function is required. This function will be called + * after any other callbacks listed in the struct kbase_attribute struct + * (such as Power Management callbacks). + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed (and possibly terminated) in here. + */ + void (*platform_term_func)(struct kbase_device *kbdev); +}; + +/* + * @brief Specifies the callbacks for power management + * + * By default no callbacks will be made and the GPU must not be powered off. + */ +struct kbase_pm_callback_conf { + /** Callback for when the GPU is idle and the power to it can be switched off. + * + * The system integrator can decide whether to either do nothing, just switch off + * the clocks to the GPU, or to completely power down the GPU. + * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the + * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + */ + void (*power_off_callback)(struct kbase_device *kbdev); + + /** Callback for when the GPU is about to become active and power must be supplied. + * + * This function must not return until the GPU is powered and clocked sufficiently for register access to + * succeed. The return value specifies whether the GPU was powered down since the call to power_off_callback. + * If the GPU state has been lost then this function must return 1, otherwise it should return 0. + * The platform specific private pointer kbase_device::platform_context can be accessed and modified in here. It is the + * platform \em callbacks responsiblity to initialize and terminate this pointer if used (see @ref kbase_platform_funcs_conf). + * + * The return value of the first call to this function is ignored. + * + * @return 1 if the GPU state may have been lost, 0 otherwise. + */ + int (*power_on_callback)(struct kbase_device *kbdev); + + /** Callback for when the system is requesting a suspend and GPU power + * must be switched off. + * + * Note that if this callback is present, then this may be called + * without a preceding call to power_off_callback. Therefore this + * callback must be able to take any action that might otherwise happen + * in power_off_callback. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed and modified in here. It is the platform \em + * callbacks responsibility to initialize and terminate this pointer if + * used (see @ref kbase_platform_funcs_conf). + */ + void (*power_suspend_callback)(struct kbase_device *kbdev); + + /** Callback for when the system is resuming from a suspend and GPU + * power must be switched on. + * + * Note that if this callback is present, then this may be called + * without a following call to power_on_callback. Therefore this + * callback must be able to take any action that might otherwise happen + * in power_on_callback. + * + * The platform specific private pointer kbase_device::platform_context + * can be accessed and modified in here. It is the platform \em + * callbacks responsibility to initialize and terminate this pointer if + * used (see @ref kbase_platform_funcs_conf). + */ + void (*power_resume_callback)(struct kbase_device *kbdev); + + /** Callback for handling runtime power management initialization. + * + * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback + * will become active from calls made to the OS from within this function. + * The runtime calls can be triggered by calls from @ref power_off_callback and @ref power_on_callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + * + * @return 0 on success, else int erro code. + */ + int (*power_runtime_init_callback)(struct kbase_device *kbdev); + + /** Callback for handling runtime power management termination. + * + * The runtime power management callbacks @ref power_runtime_off_callback and @ref power_runtime_on_callback + * should no longer be called by the OS on completion of this function. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + */ + void (*power_runtime_term_callback)(struct kbase_device *kbdev); + + /** Callback for runtime power-off power management callback + * + * For linux this callback will be called by the kernel runtime_suspend callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + * + * @return 0 on success, else OS error code. + */ + void (*power_runtime_off_callback)(struct kbase_device *kbdev); + + /** Callback for runtime power-on power management callback + * + * For linux this callback will be called by the kernel runtime_resume callback. + * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature. + */ + int (*power_runtime_on_callback)(struct kbase_device *kbdev); +}; + +/** + * kbase_cpuprops_get_default_clock_speed - default for CPU_SPEED_FUNC + * @clock_speed - see kbase_cpu_clk_speed_func for details on the parameters + * + * Returns 0 on success, negative error code otherwise. + * + * Default implementation of CPU_SPEED_FUNC. This function sets clock_speed + * to 100, so will be an underestimate for any real system. + */ +int kbase_cpuprops_get_default_clock_speed(u32 * const clock_speed); + +/** + * kbase_cpu_clk_speed_func - Type of the function pointer for CPU_SPEED_FUNC + * @param clock_speed - pointer to store the current CPU clock speed in MHz + * + * Returns 0 on success, otherwise negative error code. + * + * This is mainly used to implement OpenCL's clGetDeviceInfo(). + */ +typedef int (*kbase_cpu_clk_speed_func) (u32 *clock_speed); + +/** + * kbase_gpu_clk_speed_func - Type of the function pointer for GPU_SPEED_FUNC + * @param clock_speed - pointer to store the current GPU clock speed in MHz + * + * Returns 0 on success, otherwise negative error code. + * When an error is returned the caller assumes maximum GPU speed stored in + * gpu_freq_khz_max. + * + * If the system timer is not available then this function is required + * for the OpenCL queue profiling to return correct timing information. + * + */ +typedef int (*kbase_gpu_clk_speed_func) (u32 *clock_speed); + +#ifdef CONFIG_OF +struct kbase_platform_config { +}; +#else + +/* + * @brief Specifies start and end of I/O memory region. + */ +struct kbase_io_memory_region { + u64 start; + u64 end; +}; + +/* + * @brief Specifies I/O related resources like IRQs and memory region for I/O operations. + */ +struct kbase_io_resources { + u32 job_irq_number; + u32 mmu_irq_number; + u32 gpu_irq_number; + struct kbase_io_memory_region io_memory_region; +}; + +struct kbase_platform_config { + const struct kbase_io_resources *io_resources; +}; + +#endif /* CONFIG_OF */ + +/** + * @brief Gets the pointer to platform config. + * + * @return Pointer to the platform config + */ +struct kbase_platform_config *kbase_get_platform_config(void); + +/** + * kbasep_platform_device_init: - Platform specific call to initialize hardware + * @kbdev: kbase device pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine can initialize any hardware and context state that + * is required for the GPU block to function. + * + * Return: 0 if no errors have been found in the config. + * Negative error code otherwise. + */ +int kbasep_platform_device_init(struct kbase_device *kbdev); + +/** + * kbasep_platform_device_term - Platform specific call to terminate hardware + * @kbdev: Kbase device pointer + * + * Function calls a platform defined routine if specified in the configuration + * attributes. The routine can destroy any platform specific context state and + * shut down any hardware functionality that are outside of the Power Management + * callbacks. + * + */ +void kbasep_platform_device_term(struct kbase_device *kbdev); + + +/** + * kbase_platform_early_init - Early initialisation of the platform code + * + * This function will be called when the module is loaded to perform any + * early initialisation required by the platform code. Such as reading + * platform specific device tree entries for the GPU. + * + * Return: 0 for success, any other fail causes module initialisation to fail + */ +int kbase_platform_early_init(void); + +#ifndef CONFIG_OF +#ifdef CONFIG_MALI_PLATFORM_FAKE +/** + * kbase_platform_fake_register - Register a platform device for the GPU + * + * This can be used to register a platform device on systems where device tree + * is not enabled and the platform initialisation code in the kernel doesn't + * create the GPU device. Where possible device tree should be used instead. + * + * Return: 0 for success, any other fail causes module initialisation to fail + */ +int kbase_platform_fake_register(void); + +/** + * kbase_platform_fake_unregister - Unregister a fake platform device + * + * Unregister the platform device created with kbase_platform_fake_register() + */ +void kbase_platform_fake_unregister(void); +#endif +#endif + + /** @} *//* end group kbase_config */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_CONFIG_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h new file mode 100644 index 00000000000..ce5d0703911 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_config_defaults.h @@ -0,0 +1,259 @@ +/* + * + * (C) COPYRIGHT 2013-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_config_defaults.h + * + * Default values for configuration settings + * + */ + +#ifndef _KBASE_CONFIG_DEFAULTS_H_ +#define _KBASE_CONFIG_DEFAULTS_H_ + +/* Include mandatory definitions per platform */ +#include <mali_kbase_config_platform.h> + +/** + * Irq throttle. It is the minimum desired time in between two + * consecutive gpu interrupts (given in 'us'). The irq throttle + * gpu register will be configured after this, taking into + * account the configured max frequency. + * + * Attached value: number in micro seconds + */ +#define DEFAULT_IRQ_THROTTLE_TIME_US 20 + +/** + * Default Job Scheduler initial runtime of a context for the CFS Policy, + * in time-slices. + * + * This value is relative to that of the least-run context, and defines + * where in the CFS queue a new context is added. A value of 1 means 'after + * the least-run context has used its timeslice'. Therefore, when all + * contexts consistently use the same amount of time, a value of 1 models a + * FIFO. A value of 0 would model a LIFO. + * + * The value is represented in "numbers of time slices". Multiply this + * value by that defined in @ref DEFAULT_JS_CTX_TIMESLICE_NS to get + * the time value for this in nanoseconds. + */ +#define DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES 1 + +/** + * Default Job Scheduler minimum runtime value of a context for CFS, in + * time_slices relative to that of the least-run context. + * + * This is a measure of how much preferrential treatment is given to a + * context that is not run very often. + * + * Specficially, this value defines how many timeslices such a context is + * (initially) allowed to use at once. Such contexts (e.g. 'interactive' + * processes) will appear near the front of the CFS queue, and can initially + * use more time than contexts that run continuously (e.g. 'batch' + * processes). + * + * This limit \b prevents a "stored-up timeslices" DoS attack, where a ctx + * not run for a long time attacks the system by using a very large initial + * number of timeslices when it finally does run. + * + * @note A value of zero allows not-run-often contexts to get scheduled in + * quickly, but to only use a single timeslice when they get scheduled in. + */ +#define DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES 2 + +/** +* Boolean indicating whether the driver is configured to be secure at +* a potential loss of performance. +* +* This currently affects only r0p0-15dev0 HW and earlier. +* +* On r0p0-15dev0 HW and earlier, there are tradeoffs between security and +* performance: +* +* - When this is set to true, the driver remains fully secure, +* but potentially loses performance compared with setting this to +* false. +* - When set to false, the driver is open to certain security +* attacks. +* +* From r0p0-00rel0 and onwards, there is no security loss by setting +* this to false, and no performance loss by setting it to +* true. +*/ +#define DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE false + +enum { + /** + * Use unrestricted Address ID width on the AXI bus. + */ + KBASE_AID_32 = 0x0, + + /** + * Restrict GPU to a half of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_16 = 0x3, + + /** + * Restrict GPU to a quarter of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_8 = 0x2, + + /** + * Restrict GPU to an eighth of maximum Address ID count. + * This will reduce performance, but reduce bus load due to GPU. + */ + KBASE_AID_4 = 0x1 +}; + +/** + * Default setting for read Address ID limiting on AXI bus. + * + * Attached value: u32 register value + * KBASE_AID_32 - use the full 32 IDs (5 ID bits) + * KBASE_AID_16 - use 16 IDs (4 ID bits) + * KBASE_AID_8 - use 8 IDs (3 ID bits) + * KBASE_AID_4 - use 4 IDs (2 ID bits) + * Default value: KBASE_AID_32 (no limit). Note hardware implementation + * may limit to a lower value. + */ +#define DEFAULT_ARID_LIMIT KBASE_AID_32 + +/** + * Default setting for write Address ID limiting on AXI. + * + * Attached value: u32 register value + * KBASE_AID_32 - use the full 32 IDs (5 ID bits) + * KBASE_AID_16 - use 16 IDs (4 ID bits) + * KBASE_AID_8 - use 8 IDs (3 ID bits) + * KBASE_AID_4 - use 4 IDs (2 ID bits) + * Default value: KBASE_AID_32 (no limit). Note hardware implementation + * may limit to a lower value. + */ +#define DEFAULT_AWID_LIMIT KBASE_AID_32 + +/** + * Default setting for using alternative hardware counters. + */ +#define DEFAULT_ALTERNATIVE_HWC false + +/** + * Default UMP device mapping. A UMP_DEVICE_<device>_SHIFT value which + * defines which UMP device this GPU should be mapped to. + */ +#define DEFAULT_UMP_GPU_DEVICE_SHIFT UMP_DEVICE_Z_SHIFT + +/* + * Default period for DVFS sampling + */ +#define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */ + +/* + * Power Management poweroff tick granuality. This is in nanoseconds to + * allow HR timer support. + * + * On each scheduling tick, the power manager core may decide to: + * -# Power off one or more shader cores + * -# Power off the entire GPU + */ +#define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */ + +/* + * Power Manager number of ticks before shader cores are powered off + */ +#define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */ + +/* + * Power Manager number of ticks before GPU is powered off + */ +#define DEFAULT_PM_POWEROFF_TICK_GPU (2) /* 400-800us */ + +/* + * Default scheduling tick granuality + */ +#define DEFAULT_JS_SCHEDULING_PERIOD_NS (100000000u) /* 100ms */ + +/* + * Default minimum number of scheduling ticks before jobs are soft-stopped. + * + * This defines the time-slice for a job (which may be different from that of a + * context) + */ +#define DEFAULT_JS_SOFT_STOP_TICKS (1) /* 100ms-200ms */ + +/* + * Default minimum number of scheduling ticks before CL jobs are soft-stopped. + */ +#define DEFAULT_JS_SOFT_STOP_TICKS_CL (1) /* 100ms-200ms */ + +/* + * Default minimum number of scheduling ticks before jobs are hard-stopped + */ +#define DEFAULT_JS_HARD_STOP_TICKS_SS (50) /* 5s */ +#define DEFAULT_JS_HARD_STOP_TICKS_SS_8408 (300) /* 30s */ + +/* + * Default minimum number of scheduling ticks before CL jobs are hard-stopped. + */ +#define DEFAULT_JS_HARD_STOP_TICKS_CL (50) /* 5s */ + +/* + * Default minimum number of scheduling ticks before jobs are hard-stopped + * during dumping + */ +#define DEFAULT_JS_HARD_STOP_TICKS_DUMPING (15000) /* 1500s */ + +/* + * Default minimum number of scheduling ticks before the GPU is reset to clear a + * "stuck" job + */ +#define DEFAULT_JS_RESET_TICKS_SS (55) /* 5.5s */ +#define DEFAULT_JS_RESET_TICKS_SS_8408 (450) /* 45s */ + +/* + * Default minimum number of scheduling ticks before the GPU is reset to clear a + * "stuck" CL job. + */ +#define DEFAULT_JS_RESET_TICKS_CL (55) /* 5.5s */ + +/* + * Default minimum number of scheduling ticks before the GPU is reset to clear a + * "stuck" job during dumping. + */ +#define DEFAULT_JS_RESET_TICKS_DUMPING (15020) /* 1502s */ + +/* + * Default number of milliseconds given for other jobs on the GPU to be + * soft-stopped when the GPU needs to be reset. + */ +#define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */ + +/* + * Default timeslice that a context is scheduled in for, in nanoseconds. + * + * When a context has used up this amount of time across its jobs, it is + * scheduled out to let another run. + * + * @note the resolution is nanoseconds (ns) here, because that's the format + * often used by the OS. + */ +#define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */ + +#endif /* _KBASE_CONFIG_DEFAULTS_H_ */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_context.c b/drivers/gpu/arm/midgard/mali_kbase_context.c new file mode 100644 index 00000000000..e066b286c51 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_context.c @@ -0,0 +1,274 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Base kernel context APIs + */ + +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#include <mali_kbase_instr.h> + +#define MEMPOOL_PAGES 16384 + + +/** + * kbase_create_context() - Create a kernel base context. + * @kbdev: Kbase device + * @is_compat: Force creation of a 32-bit context + * + * Allocate and init a kernel base context. + * + * Return: new kbase context + */ +struct kbase_context * +kbase_create_context(struct kbase_device *kbdev, bool is_compat) +{ + struct kbase_context *kctx; + int mali_err; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + /* zero-inited as lot of code assume it's zero'ed out on create */ + kctx = vzalloc(sizeof(*kctx)); + + if (!kctx) + goto out; + + /* creating a context is considered a disjoint event */ + kbase_disjoint_event(kbdev); + + kctx->kbdev = kbdev; + kctx->as_nr = KBASEP_AS_NR_INVALID; + kctx->is_compat = is_compat; +#ifdef CONFIG_MALI_TRACE_TIMELINE + kctx->timeline.owner_tgid = task_tgid_nr(current); +#endif + atomic_set(&kctx->setup_complete, 0); + atomic_set(&kctx->setup_in_progress, 0); + kctx->infinite_cache_active = 0; + spin_lock_init(&kctx->mm_update_lock); + kctx->process_mm = NULL; + atomic_set(&kctx->nonmapped_pages, 0); + kctx->slots_pullable = 0; + + if (kbase_mem_allocator_init(&kctx->osalloc, MEMPOOL_PAGES, kctx->kbdev) != 0) + goto free_kctx; + + kctx->pgd_allocator = &kctx->osalloc; + atomic_set(&kctx->used_pages, 0); + + if (kbase_jd_init(kctx)) + goto free_allocator; + + mali_err = kbasep_js_kctx_init(kctx); + if (mali_err) + goto free_jd; /* safe to call kbasep_js_kctx_term in this case */ + + mali_err = kbase_event_init(kctx); + if (mali_err) + goto free_jd; + + mutex_init(&kctx->reg_lock); + + INIT_LIST_HEAD(&kctx->waiting_soft_jobs); +#ifdef CONFIG_KDS + INIT_LIST_HEAD(&kctx->waiting_kds_resource); +#endif + + mali_err = kbase_mmu_init(kctx); + if (mali_err) + goto free_event; + + kctx->pgd = kbase_mmu_alloc_pgd(kctx); + if (!kctx->pgd) + goto free_mmu; + + if (kbase_mem_allocator_alloc(&kctx->osalloc, 1, &kctx->aliasing_sink_page) != 0) + goto no_sink_page; + + kctx->tgid = current->tgid; + kctx->pid = current->pid; + init_waitqueue_head(&kctx->event_queue); + + kctx->cookies = KBASE_COOKIE_MASK; + + /* Make sure page 0 is not used... */ + if (kbase_region_tracker_init(kctx)) + goto no_region_tracker; +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_set(&kctx->jctx.work_id, 0); +#endif +#ifdef CONFIG_MALI_TRACE_TIMELINE + atomic_set(&kctx->timeline.jd_atoms_in_flight, 0); +#endif + + kctx->id = atomic_add_return(1, &(kbdev->ctx_num)) - 1; + + mutex_init(&kctx->vinstr_cli_lock); + + return kctx; + +no_region_tracker: +no_sink_page: + kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0); + /* VM lock needed for the call to kbase_mmu_free_pgd */ + kbase_gpu_vm_lock(kctx); + kbase_mmu_free_pgd(kctx); + kbase_gpu_vm_unlock(kctx); +free_mmu: + kbase_mmu_term(kctx); +free_event: + kbase_event_cleanup(kctx); +free_jd: + /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ + kbasep_js_kctx_term(kctx); + kbase_jd_exit(kctx); +free_allocator: + kbase_mem_allocator_term(&kctx->osalloc); +free_kctx: + vfree(kctx); +out: + return NULL; +} +KBASE_EXPORT_SYMBOL(kbase_create_context); + +static void kbase_reg_pending_dtor(struct kbase_va_region *reg) +{ + dev_dbg(reg->kctx->kbdev->dev, "Freeing pending unmapped region\n"); + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); + kfree(reg); +} + +/** + * kbase_destroy_context - Destroy a kernel base context. + * @kctx: Context to destroy + * + * Calls kbase_destroy_os_context() to free OS specific structures. + * Will release all outstanding regions. + */ +void kbase_destroy_context(struct kbase_context *kctx) +{ + struct kbase_device *kbdev; + int pages; + unsigned long pending_regions_to_clean; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(NULL != kbdev); + + KBASE_TRACE_ADD(kbdev, CORE_CTX_DESTROY, kctx, NULL, 0u, 0u); + + /* Ensure the core is powered up for the destroy process */ + /* A suspend won't happen here, because we're in a syscall from a userspace + * thread. */ + kbase_pm_context_active(kbdev); + + kbase_jd_zap_context(kctx); + kbase_event_cleanup(kctx); + + kbase_gpu_vm_lock(kctx); + + /* MMU is disabled as part of scheduling out the context */ + kbase_mmu_free_pgd(kctx); + + /* drop the aliasing sink page now that it can't be mapped anymore */ + kbase_mem_allocator_free(&kctx->osalloc, 1, &kctx->aliasing_sink_page, 0); + + /* free pending region setups */ + pending_regions_to_clean = (~kctx->cookies) & KBASE_COOKIE_MASK; + while (pending_regions_to_clean) { + unsigned int cookie = __ffs(pending_regions_to_clean); + + BUG_ON(!kctx->pending_regions[cookie]); + + kbase_reg_pending_dtor(kctx->pending_regions[cookie]); + + kctx->pending_regions[cookie] = NULL; + pending_regions_to_clean &= ~(1UL << cookie); + } + + kbase_region_tracker_term(kctx); + kbase_gpu_vm_unlock(kctx); + + /* Safe to call this one even when didn't initialize (assuming kctx was sufficiently zeroed) */ + kbasep_js_kctx_term(kctx); + + kbase_jd_exit(kctx); + + kbase_pm_context_idle(kbdev); + + kbase_mmu_term(kctx); + + pages = atomic_read(&kctx->used_pages); + if (pages != 0) + dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); + + kbase_mem_allocator_term(&kctx->osalloc); + WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0); + + vfree(kctx); +} +KBASE_EXPORT_SYMBOL(kbase_destroy_context); + +/** + * kbase_context_set_create_flags - Set creation flags on a context + * @kctx: Kbase context + * @flags: Flags to set + * + * Return: 0 on success + */ +int kbase_context_set_create_flags(struct kbase_context *kctx, u32 flags) +{ + int err = 0; + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long irq_flags; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + js_kctx_info = &kctx->jctx.sched_info; + + /* Validate flags */ + if (flags != (flags & BASE_CONTEXT_CREATE_KERNEL_FLAGS)) { + err = -EINVAL; + goto out; + } + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags); + + /* Translate the flags */ + if ((flags & BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED) == 0) + js_kctx_info->ctx.flags &= ~((u32) KBASE_CTX_FLAG_SUBMIT_DISABLED); + + if ((flags & BASE_CONTEXT_HINT_ONLY_COMPUTE) != 0) + js_kctx_info->ctx.flags |= (u32) KBASE_CTX_FLAG_HINT_ONLY_COMPUTE; + + /* Latch the initial attributes into the Job Scheduler */ + kbasep_js_ctx_attr_set_initial_attrs(kctx->kbdev, kctx); + + spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, + irq_flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + out: + return err; +} +KBASE_EXPORT_SYMBOL(kbase_context_set_create_flags); diff --git a/drivers/gpu/arm/midgard/mali_kbase_core_linux.c b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c new file mode 100644 index 00000000000..8cffa55fa98 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_core_linux.c @@ -0,0 +1,3930 @@ + +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase.h> +#include <mali_kbase_hwaccess_gpuprops.h> +#include <mali_kbase_config_defaults.h> +#include <mali_kbase_uku.h> +#include <mali_midg_regmap.h> +#include <mali_kbase_instr.h> +#include <mali_kbase_gator.h> +#include <backend/gpu/mali_kbase_js_affinity.h> +#include <mali_kbase_mem_linux.h> +#ifdef CONFIG_MALI_DEVFREQ +#include <backend/gpu/mali_kbase_devfreq.h> +#endif /* CONFIG_MALI_DEVFREQ */ +#include <mali_kbase_cpuprops.h> +#ifdef CONFIG_MALI_NO_MALI +#include "mali_kbase_model_linux.h" +#endif /* CONFIG_MALI_NO_MALI */ +#include "mali_kbase_mem_profile_debugfs_buf_size.h" +#include "mali_kbase_debug_mem_view.h" +#include <mali_kbase_hwaccess_backend.h> + +#ifdef CONFIG_KDS +#include <linux/kds.h> +#include <linux/anon_inodes.h> +#include <linux/syscalls.h> +#endif /* CONFIG_KDS */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/miscdevice.h> +#include <linux/list.h> +#include <linux/semaphore.h> +#include <linux/fs.h> +#include <linux/uaccess.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/compat.h> /* is_compat_task */ +#include <linux/version.h> +#ifdef CONFIG_MALI_PLATFORM_DEVICETREE +#include <linux/pm_runtime.h> +#endif /* CONFIG_MALI_PLATFORM_DEVICETREE */ +#include <mali_kbase_hw.h> +#include <platform/mali_kbase_platform_common.h> +#ifdef CONFIG_MALI_PLATFORM_FAKE +#include <platform/mali_kbase_platform_fake.h> +#endif /*CONFIG_MALI_PLATFORM_FAKE */ +#ifdef CONFIG_SYNC +#include <mali_kbase_sync.h> +#endif /* CONFIG_SYNC */ +#ifdef CONFIG_PM_DEVFREQ +#include <linux/devfreq.h> +#endif /* CONFIG_PM_DEVFREQ */ +#include <linux/clk.h> + +#include <mali_kbase_config.h> + +#ifdef CONFIG_MACH_MANTA +#include <plat/devs.h> +#endif + + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) +#include <linux/pm_opp.h> +#else +#include <linux/opp.h> +#endif + +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include <mali_kbase_tlstream.h> +#endif + +/* GPU IRQ Tags */ +#define JOB_IRQ_TAG 0 +#define MMU_IRQ_TAG 1 +#define GPU_IRQ_TAG 2 + +#if MALI_UNIT_TEST +static struct kbase_exported_test_data shared_kernel_test_data; +EXPORT_SYMBOL(shared_kernel_test_data); +#endif /* MALI_UNIT_TEST */ + +#define KBASE_DRV_NAME "mali" + +static const char kbase_drv_name[] = KBASE_DRV_NAME; + +static int kbase_dev_nr; + +static DEFINE_MUTEX(kbase_dev_list_lock); +static LIST_HEAD(kbase_dev_list); + +#define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)" +static inline void __compile_time_asserts(void) +{ + CSTD_COMPILE_TIME_ASSERT(sizeof(KERNEL_SIDE_DDK_VERSION_STRING) <= KBASE_GET_VERSION_BUFFER_SIZE); +} + +#ifdef CONFIG_KDS + +struct kbasep_kds_resource_set_file_data { + struct kds_resource_set *lock; +}; + +static int kds_resource_release(struct inode *inode, struct file *file); + +static const struct file_operations kds_resource_fops = { + .release = kds_resource_release +}; + +struct kbase_kds_resource_list_data { + struct kds_resource **kds_resources; + unsigned long *kds_access_bitmap; + int num_elems; +}; + +static int kds_resource_release(struct inode *inode, struct file *file) +{ + struct kbasep_kds_resource_set_file_data *data; + + data = (struct kbasep_kds_resource_set_file_data *)file->private_data; + if (NULL != data) { + if (NULL != data->lock) + kds_resource_set_release(&data->lock); + + kfree(data); + } + return 0; +} + +static int kbasep_kds_allocate_resource_list_data(struct kbase_context *kctx, struct base_external_resource *ext_res, int num_elems, struct kbase_kds_resource_list_data *resources_list) +{ + struct base_external_resource *res = ext_res; + int res_id; + + /* assume we have to wait for all */ + + KBASE_DEBUG_ASSERT(0 != num_elems); + resources_list->kds_resources = kmalloc_array(num_elems, + sizeof(struct kds_resource *), GFP_KERNEL); + + if (NULL == resources_list->kds_resources) + return -ENOMEM; + + KBASE_DEBUG_ASSERT(0 != num_elems); + resources_list->kds_access_bitmap = kzalloc( + sizeof(unsigned long) * + ((num_elems + BITS_PER_LONG - 1) / BITS_PER_LONG), + GFP_KERNEL); + + if (NULL == resources_list->kds_access_bitmap) { + kfree(resources_list->kds_access_bitmap); + return -ENOMEM; + } + + kbase_gpu_vm_lock(kctx); + for (res_id = 0; res_id < num_elems; res_id++, res++) { + int exclusive; + struct kbase_va_region *reg; + struct kds_resource *kds_res = NULL; + + exclusive = res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE; + reg = kbase_region_tracker_find_region_enclosing_address(kctx, res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + + /* did we find a matching region object? */ + if (NULL == reg || (reg->flags & KBASE_REG_FREE)) + break; + + /* no need to check reg->alloc as only regions with an alloc has + * a size, and kbase_region_tracker_find_region_enclosing_address + * only returns regions with size > 0 */ + switch (reg->gpu_alloc->type) { +#if defined(CONFIG_UMP) && defined(CONFIG_KDS) + case KBASE_MEM_TYPE_IMPORTED_UMP: + kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle); + break; +#endif /* defined(CONFIG_UMP) && defined(CONFIG_KDS) */ + default: + break; + } + + /* no kds resource for the region ? */ + if (!kds_res) + break; + + resources_list->kds_resources[res_id] = kds_res; + + if (exclusive) + set_bit(res_id, resources_list->kds_access_bitmap); + } + kbase_gpu_vm_unlock(kctx); + + /* did the loop run to completion? */ + if (res_id == num_elems) + return 0; + + /* Clean up as the resource list is not valid. */ + kfree(resources_list->kds_resources); + kfree(resources_list->kds_access_bitmap); + + return -EINVAL; +} + +static bool kbasep_validate_kbase_pointer( + struct kbase_context *kctx, union kbase_pointer *p) +{ + if (kctx->is_compat) { + if (p->compat_value == 0) + return false; + } else { + if (NULL == p->value) + return false; + } + return true; +} + +static int kbase_external_buffer_lock(struct kbase_context *kctx, + struct kbase_uk_ext_buff_kds_data *args, u32 args_size) +{ + struct base_external_resource *ext_res_copy; + size_t ext_resource_size; + int ret = -EINVAL; + int fd = -EBADF; + struct base_external_resource __user *ext_res_user; + int __user *file_desc_usr; + struct kbasep_kds_resource_set_file_data *fdata; + struct kbase_kds_resource_list_data resource_list_data; + + if (args_size != sizeof(struct kbase_uk_ext_buff_kds_data)) + return -EINVAL; + + /* Check user space has provided valid data */ + if (!kbasep_validate_kbase_pointer(kctx, &args->external_resource) || + !kbasep_validate_kbase_pointer(kctx, &args->file_descriptor) || + (0 == args->num_res) || + (args->num_res > KBASE_MAXIMUM_EXT_RESOURCES)) + return -EINVAL; + + ext_resource_size = sizeof(struct base_external_resource) * args->num_res; + + KBASE_DEBUG_ASSERT(0 != ext_resource_size); + ext_res_copy = kmalloc(ext_resource_size, GFP_KERNEL); + + if (!ext_res_copy) + return -EINVAL; +#ifdef CONFIG_COMPAT + if (kctx->is_compat) { + ext_res_user = compat_ptr(args->external_resource.compat_value); + file_desc_usr = compat_ptr(args->file_descriptor.compat_value); + } else { +#endif /* CONFIG_COMPAT */ + ext_res_user = args->external_resource.value; + file_desc_usr = args->file_descriptor.value; +#ifdef CONFIG_COMPAT + } +#endif /* CONFIG_COMPAT */ + + /* Copy the external resources to lock from user space */ + if (copy_from_user(ext_res_copy, ext_res_user, ext_resource_size)) + goto out; + + /* Allocate data to be stored in the file */ + fdata = kmalloc(sizeof(*fdata), GFP_KERNEL); + + if (!fdata) { + ret = -ENOMEM; + goto out; + } + + /* Parse given elements and create resource and access lists */ + ret = kbasep_kds_allocate_resource_list_data(kctx, + ext_res_copy, args->num_res, &resource_list_data); + if (!ret) { + long err; + + fdata->lock = NULL; + + fd = anon_inode_getfd("kds_ext", &kds_resource_fops, fdata, 0); + + err = copy_to_user(file_desc_usr, &fd, sizeof(fd)); + + /* If the file descriptor was valid and we successfully copied + * it to user space, then we can try and lock the requested + * kds resources. + */ + if ((fd >= 0) && (0 == err)) { + struct kds_resource_set *lock; + + lock = kds_waitall(args->num_res, + resource_list_data.kds_access_bitmap, + resource_list_data.kds_resources, + KDS_WAIT_BLOCKING); + + if (IS_ERR_OR_NULL(lock)) { + ret = -EINVAL; + } else { + ret = 0; + fdata->lock = lock; + } + } else { + ret = -EINVAL; + } + + kfree(resource_list_data.kds_resources); + kfree(resource_list_data.kds_access_bitmap); + } + + if (ret) { + /* If the file was opened successfully then close it which will + * clean up the file data, otherwise we clean up the file data + * ourself. + */ + if (fd >= 0) + sys_close(fd); + else + kfree(fdata); + } +out: + kfree(ext_res_copy); + + return ret; +} +#endif /* CONFIG_KDS */ + +#ifdef CONFIG_MALI_MIPE_ENABLED +static void kbase_create_timeline_objects(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + unsigned int lpu_id; + struct kbasep_kctx_list_element *element; + + /* Create LPU objects. */ + for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { + gpu_js_features *lpu = + &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; + kbase_tlstream_tl_summary_new_lpu(lpu, lpu_id, (u32)*lpu); + } + + /* Create GPU object and make it retain all LPUs. */ + kbase_tlstream_tl_summary_new_gpu( + kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, + kbdev->gpu_props.num_cores); + + for (lpu_id = 0; lpu_id < kbdev->gpu_props.num_job_slots; lpu_id++) { + void *lpu = + &kbdev->gpu_props.props.raw_props.js_features[lpu_id]; + kbase_tlstream_tl_summary_lifelink_lpu_gpu(lpu, kbdev); + } + + /* Create object for each known context. */ + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(element, &kbdev->kctx_list, link) { + kbase_tlstream_tl_summary_new_ctx( + element->kctx, + (u32)(element->kctx->id)); + } + /* Before releasing the lock, reset body stream buffers. + * This will prevent context creation message to be directed to both + * summary and body stream. */ + kbase_tlstream_reset_body_streams(); + mutex_unlock(&kbdev->kctx_list_lock); + /* Static object are placed into summary packet that needs to be + * transmitted first. Flush all streams to make it available to + * user space. */ + kbase_tlstream_flush_streams(); +} +#endif + +static void kbase_api_handshake(struct uku_version_check_args *version) +{ + switch (version->major) { +#ifdef BASE_LEGACY_UK6_SUPPORT + case 6: + /* We are backwards compatible with version 6, + * so pretend to be the old version */ + version->major = 6; + version->minor = 1; + break; +#endif /* BASE_LEGACY_UK6_SUPPORT */ +#ifdef BASE_LEGACY_UK7_SUPPORT + case 7: + /* We are backwards compatible with version 7, + * so pretend to be the old version */ + version->major = 7; + version->minor = 1; + break; +#endif /* BASE_LEGACY_UK7_SUPPORT */ + case BASE_UK_VERSION_MAJOR: + /* set minor to be the lowest common */ + version->minor = min_t(int, BASE_UK_VERSION_MINOR, + (int)version->minor); + break; + default: + /* We return our actual version regardless if it + * matches the version returned by userspace - + * userspace can bail if it can't handle this + * version */ + version->major = BASE_UK_VERSION_MAJOR; + version->minor = BASE_UK_VERSION_MINOR; + break; + } +} + +/** + * enum mali_error - Mali error codes shared with userspace + * + * This is subset of those common Mali errors that can be returned to userspace. + * Values of matching user and kernel space enumerators MUST be the same. + * MALI_ERROR_NONE is guaranteed to be 0. + */ +enum mali_error { + MALI_ERROR_NONE = 0, + MALI_ERROR_OUT_OF_GPU_MEMORY, + MALI_ERROR_OUT_OF_MEMORY, + MALI_ERROR_FUNCTION_FAILED, +}; + +static int kbase_dispatch(struct kbase_context *kctx, void * const args, u32 args_size) +{ + struct kbase_device *kbdev; + union uk_header *ukh = args; + u32 id; + + KBASE_DEBUG_ASSERT(ukh != NULL); + + kbdev = kctx->kbdev; + id = ukh->id; + ukh->ret = MALI_ERROR_NONE; /* Be optimistic */ + + if (UKP_FUNC_ID_CHECK_VERSION == id) { + struct uku_version_check_args *version_check; + + if (args_size != sizeof(struct uku_version_check_args)) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + return 0; + } + version_check = (struct uku_version_check_args *)args; + kbase_api_handshake(version_check); + /* save the proposed version number for later use */ + kctx->api_version = KBASE_API_VERSION(version_check->major, + version_check->minor); + ukh->ret = MALI_ERROR_NONE; + return 0; + } + + /* block calls until version handshake */ + if (kctx->api_version == 0) + return -EINVAL; + + if (!atomic_read(&kctx->setup_complete)) { + struct kbase_uk_set_flags *kbase_set_flags; + + /* setup pending, try to signal that we'll do the setup, + * if setup was already in progress, err this call + */ + if (atomic_cmpxchg(&kctx->setup_in_progress, 0, 1)) + return -EINVAL; + + /* if unexpected call, will stay stuck in setup mode + * (is it the only call we accept?) + */ + if (id != KBASE_FUNC_SET_FLAGS) + return -EINVAL; + + kbase_set_flags = (struct kbase_uk_set_flags *)args; + + /* if not matching the expected call, stay in setup mode */ + if (sizeof(*kbase_set_flags) != args_size) + goto bad_size; + + /* if bad flags, will stay stuck in setup mode */ + if (kbase_context_set_create_flags(kctx, + kbase_set_flags->create_flags) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + + atomic_set(&kctx->setup_complete, 1); + return 0; + } + + /* setup complete, perform normal operation */ + switch (id) { + case KBASE_FUNC_MEM_ALLOC: + { + struct kbase_uk_mem_alloc *mem = args; + struct kbase_va_region *reg; + + if (sizeof(*mem) != args_size) + goto bad_size; + + reg = kbase_mem_alloc(kctx, mem->va_pages, + mem->commit_pages, mem->extent, + &mem->flags, &mem->gpu_va, + &mem->va_alignment); + if (!reg) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + case KBASE_FUNC_MEM_IMPORT: + { + struct kbase_uk_mem_import *mem_import = args; + int __user *phandle; + int handle; + + if (sizeof(*mem_import) != args_size) + goto bad_size; +#ifdef CONFIG_COMPAT + if (kctx->is_compat) + phandle = compat_ptr(mem_import->phandle.compat_value); + else +#endif + phandle = mem_import->phandle.value; + + switch (mem_import->type) { + case BASE_MEM_IMPORT_TYPE_UMP: + get_user(handle, phandle); + break; + case BASE_MEM_IMPORT_TYPE_UMM: + get_user(handle, phandle); + break; + default: + mem_import->type = BASE_MEM_IMPORT_TYPE_INVALID; + break; + } + + if (mem_import->type == BASE_MEM_IMPORT_TYPE_INVALID || + kbase_mem_import(kctx, mem_import->type, + handle, &mem_import->gpu_va, + &mem_import->va_pages, + &mem_import->flags)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + case KBASE_FUNC_MEM_ALIAS: { + struct kbase_uk_mem_alias *alias = args; + struct base_mem_aliasing_info __user *user_ai; + struct base_mem_aliasing_info *ai; + + if (sizeof(*alias) != args_size) + goto bad_size; + + if (alias->nents > 2048) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + if (!alias->nents) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + +#ifdef CONFIG_COMPAT + if (kctx->is_compat) + user_ai = compat_ptr(alias->ai.compat_value); + else +#endif + user_ai = alias->ai.value; + + ai = vmalloc(sizeof(*ai) * alias->nents); + + if (!ai) { + ukh->ret = MALI_ERROR_OUT_OF_MEMORY; + break; + } + + if (copy_from_user(ai, user_ai, + sizeof(*ai) * alias->nents)) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + goto copy_failed; + } + + alias->gpu_va = kbase_mem_alias(kctx, &alias->flags, + alias->stride, + alias->nents, ai, + &alias->va_pages); + if (!alias->gpu_va) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + goto no_alias; + } +no_alias: +copy_failed: + vfree(ai); + break; + } + case KBASE_FUNC_MEM_COMMIT: + { + struct kbase_uk_mem_commit *commit = args; + + if (sizeof(*commit) != args_size) + goto bad_size; + + if (commit->gpu_addr & ~PAGE_MASK) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_COMMIT: commit->gpu_addr: passed parameter is invalid"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (kbase_mem_commit(kctx, commit->gpu_addr, + commit->pages, + (base_backing_threshold_status *)&commit->result_subcode)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + + break; + } + + case KBASE_FUNC_MEM_QUERY: + { + struct kbase_uk_mem_query *query = args; + + if (sizeof(*query) != args_size) + goto bad_size; + + if (query->gpu_addr & ~PAGE_MASK) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->gpu_addr: passed parameter is invalid"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + if (query->query != KBASE_MEM_QUERY_COMMIT_SIZE && + query->query != KBASE_MEM_QUERY_VA_SIZE && + query->query != KBASE_MEM_QUERY_FLAGS) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_QUERY: query->query = %lld unknown", (unsigned long long)query->query); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (kbase_mem_query(kctx, query->gpu_addr, + query->query, &query->value) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + else + ukh->ret = MALI_ERROR_NONE; + break; + } + break; + + case KBASE_FUNC_MEM_FLAGS_CHANGE: + { + struct kbase_uk_mem_flags_change *fc = args; + + if (sizeof(*fc) != args_size) + goto bad_size; + + if ((fc->gpu_va & ~PAGE_MASK) && (fc->gpu_va >= PAGE_SIZE)) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FLAGS_CHANGE: mem->gpu_va: passed parameter is invalid"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (kbase_mem_flags_change(kctx, fc->gpu_va, fc->flags, fc->mask)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + + break; + } + case KBASE_FUNC_MEM_FREE: + { + struct kbase_uk_mem_free *mem = args; + + if (sizeof(*mem) != args_size) + goto bad_size; + + if ((mem->gpu_addr & ~PAGE_MASK) && (mem->gpu_addr >= PAGE_SIZE)) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_MEM_FREE: mem->gpu_addr: passed parameter is invalid"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (kbase_mem_free(kctx, mem->gpu_addr)) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + case KBASE_FUNC_JOB_SUBMIT: + { + struct kbase_uk_job_submit *job = args; + + if (sizeof(*job) != args_size) + goto bad_size; + +#ifdef BASE_LEGACY_UK6_SUPPORT + if (kbase_jd_submit(kctx, job, 0) != 0) +#else + if (kbase_jd_submit(kctx, job) != 0) +#endif /* BASE_LEGACY_UK6_SUPPORT */ + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + +#ifdef BASE_LEGACY_UK6_SUPPORT + case KBASE_FUNC_JOB_SUBMIT_UK6: + { + struct kbase_uk_job_submit *job = args; + + if (sizeof(*job) != args_size) + goto bad_size; + + if (kbase_jd_submit(kctx, job, 1) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } +#endif + + case KBASE_FUNC_SYNC: + { + struct kbase_uk_sync_now *sn = args; + + if (sizeof(*sn) != args_size) + goto bad_size; + + if (sn->sset.basep_sset.mem_handle & ~PAGE_MASK) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_SYNC: sn->sset.basep_sset.mem_handle: passed parameter is invalid"); + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + +#ifndef CONFIG_MALI_CACHE_COHERENT + if (kbase_sync_now(kctx, &sn->sset) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; +#endif + break; + } + + case KBASE_FUNC_DISJOINT_QUERY: + { + struct kbase_uk_disjoint_query *dquery = args; + + if (sizeof(*dquery) != args_size) + goto bad_size; + + /* Get the disjointness counter value. */ + dquery->counter = kbase_disjoint_event_get(kctx->kbdev); + break; + } + + case KBASE_FUNC_POST_TERM: + { + kbase_event_close(kctx); + break; + } + + case KBASE_FUNC_HWCNT_SETUP: + { + struct kbase_uk_hwcnt_setup *setup = args; + + if (sizeof(*setup) != args_size) + goto bad_size; + + mutex_lock(&kctx->vinstr_cli_lock); + if (kbase_instr_hwcnt_setup(kctx, setup) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + mutex_unlock(&kctx->vinstr_cli_lock); + break; + } + + case KBASE_FUNC_HWCNT_DUMP: + { + /* args ignored */ + mutex_lock(&kctx->vinstr_cli_lock); + if (kbase_instr_hwcnt_dump(kctx) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + mutex_unlock(&kctx->vinstr_cli_lock); + break; + } + + case KBASE_FUNC_HWCNT_CLEAR: + { + /* args ignored */ + mutex_lock(&kctx->vinstr_cli_lock); + if (kbase_vinstr_clear(kbdev->vinstr_ctx, + kctx->vinstr_cli) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + mutex_unlock(&kctx->vinstr_cli_lock); + break; + } + +#ifdef BASE_LEGACY_UK7_SUPPORT + case KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE: + { + struct kbase_uk_cpuprops *setup = args; + + if (sizeof(*setup) != args_size) + goto bad_size; + + if (kbase_cpuprops_uk_get_props(kctx, setup) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } +#endif /* BASE_LEGACY_UK7_SUPPORT */ + + case KBASE_FUNC_GPU_PROPS_REG_DUMP: + { + struct kbase_uk_gpuprops *setup = args; + + if (sizeof(*setup) != args_size) + goto bad_size; + + if (kbase_gpuprops_uk_get_props(kctx, setup) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + case KBASE_FUNC_FIND_CPU_OFFSET: + { + struct kbase_uk_find_cpu_offset *find = args; + + if (sizeof(*find) != args_size) + goto bad_size; + + if (find->gpu_addr & ~PAGE_MASK) { + dev_warn(kbdev->dev, "kbase_dispatch case KBASE_FUNC_FIND_CPU_OFFSET: find->gpu_addr: passed parameter is invalid"); + goto out_bad; + } + + if (find->size > SIZE_MAX || find->cpu_addr > ULONG_MAX) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } else { + int err; + + err = kbasep_find_enclosing_cpu_mapping_offset( + kctx, + find->gpu_addr, + (uintptr_t) find->cpu_addr, + (size_t) find->size, + &find->offset); + + if (err) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } + break; + } + case KBASE_FUNC_GET_VERSION: + { + struct kbase_uk_get_ddk_version *get_version = (struct kbase_uk_get_ddk_version *)args; + + if (sizeof(*get_version) != args_size) + goto bad_size; + + /* version buffer size check is made in compile time assert */ + memcpy(get_version->version_buffer, KERNEL_SIDE_DDK_VERSION_STRING, sizeof(KERNEL_SIDE_DDK_VERSION_STRING)); + get_version->version_string_size = sizeof(KERNEL_SIDE_DDK_VERSION_STRING); + break; + } + + case KBASE_FUNC_STREAM_CREATE: + { +#ifdef CONFIG_SYNC + struct kbase_uk_stream_create *screate = (struct kbase_uk_stream_create *)args; + + if (sizeof(*screate) != args_size) + goto bad_size; + + if (strnlen(screate->name, sizeof(screate->name)) >= sizeof(screate->name)) { + /* not NULL terminated */ + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + break; + } + + if (kbase_stream_create(screate->name, &screate->fd) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + else + ukh->ret = MALI_ERROR_NONE; +#else /* CONFIG_SYNC */ + ukh->ret = MALI_ERROR_FUNCTION_FAILED; +#endif /* CONFIG_SYNC */ + break; + } + case KBASE_FUNC_FENCE_VALIDATE: + { +#ifdef CONFIG_SYNC + struct kbase_uk_fence_validate *fence_validate = (struct kbase_uk_fence_validate *)args; + + if (sizeof(*fence_validate) != args_size) + goto bad_size; + + if (kbase_fence_validate(fence_validate->fd) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + else + ukh->ret = MALI_ERROR_NONE; +#endif /* CONFIG_SYNC */ + break; + } + + case KBASE_FUNC_EXT_BUFFER_LOCK: + { +#ifdef CONFIG_KDS + switch (kbase_external_buffer_lock(kctx, + (struct kbase_uk_ext_buff_kds_data *)args, + args_size)) { + case 0: + ukh->ret = MALI_ERROR_NONE; + break; + case -ENOMEM: + ukh->ret = MALI_ERROR_OUT_OF_MEMORY; + break; + default: + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } +#endif /* CONFIG_KDS */ + break; + } + + case KBASE_FUNC_SET_TEST_DATA: + { +#if MALI_UNIT_TEST + struct kbase_uk_set_test_data *set_data = args; + + shared_kernel_test_data = set_data->test_data; + shared_kernel_test_data.kctx.value = (void __user *)kctx; + shared_kernel_test_data.mm.value = (void __user *)current->mm; + ukh->ret = MALI_ERROR_NONE; +#endif /* MALI_UNIT_TEST */ + break; + } + + case KBASE_FUNC_INJECT_ERROR: + { +#ifdef CONFIG_MALI_ERROR_INJECT + unsigned long flags; + struct kbase_error_params params = ((struct kbase_uk_error_params *)args)->params; + + /*mutex lock */ + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + if (job_atom_inject_error(¶ms) != 0) + ukh->ret = MALI_ERROR_OUT_OF_MEMORY; + else + ukh->ret = MALI_ERROR_NONE; + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); + /*mutex unlock */ +#endif /* CONFIG_MALI_ERROR_INJECT */ + break; + } + + case KBASE_FUNC_MODEL_CONTROL: + { +#ifdef CONFIG_MALI_NO_MALI + unsigned long flags; + struct kbase_model_control_params params = + ((struct kbase_uk_model_control_params *)args)->params; + + /*mutex lock */ + spin_lock_irqsave(&kbdev->reg_op_lock, flags); + if (gpu_model_control(kbdev->model, ¶ms) != 0) + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + else + ukh->ret = MALI_ERROR_NONE; + spin_unlock_irqrestore(&kbdev->reg_op_lock, flags); + /*mutex unlock */ +#endif /* CONFIG_MALI_NO_MALI */ + break; + } + + case KBASE_FUNC_GET_PROFILING_CONTROLS: + { + struct kbase_uk_profiling_controls *controls = + (struct kbase_uk_profiling_controls *)args; + u32 i; + + if (sizeof(*controls) != args_size) + goto bad_size; + + for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++) + controls->profiling_controls[i] = kbase_get_profiling_control(kbdev, i); + + break; + } + + /* used only for testing purposes; these controls are to be set by gator through gator API */ + case KBASE_FUNC_SET_PROFILING_CONTROLS: + { + struct kbase_uk_profiling_controls *controls = + (struct kbase_uk_profiling_controls *)args; + u32 i; + + if (sizeof(*controls) != args_size) + goto bad_size; + + for (i = FBDUMP_CONTROL_MIN; i < FBDUMP_CONTROL_MAX; i++) + _mali_profiling_control(i, controls->profiling_controls[i]); + + break; + } + + case KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD: + { + struct kbase_uk_debugfs_mem_profile_add *add_data = + (struct kbase_uk_debugfs_mem_profile_add *)args; + char *buf; + char __user *user_buf; + + if (sizeof(*add_data) != args_size) + goto bad_size; + + if (add_data->len > KBASE_MEM_PROFILE_MAX_BUF_SIZE) { + dev_err(kbdev->dev, "buffer too big"); + goto out_bad; + } + +#ifdef CONFIG_COMPAT + if (kctx->is_compat) + user_buf = compat_ptr(add_data->buf.compat_value); + else +#endif + user_buf = add_data->buf.value; + + buf = kmalloc(add_data->len, GFP_KERNEL); + if (!buf) + goto out_bad; + + if (0 != copy_from_user(buf, user_buf, add_data->len)) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + kfree(buf); + goto out_bad; + } + kbasep_mem_profile_debugfs_insert(kctx, buf, + add_data->len); + + break; + } +#ifdef CONFIG_MALI_MIPE_ENABLED + case KBASE_FUNC_TLSTREAM_ACQUIRE: + { + struct kbase_uk_tlstream_acquire *tlstream_acquire = + args; + + if (sizeof(*tlstream_acquire) != args_size) + goto bad_size; + + if (0 != kbase_tlstream_acquire( + kctx, + &tlstream_acquire->fd)) { + ukh->ret = MALI_ERROR_FUNCTION_FAILED; + } else if (0 <= tlstream_acquire->fd) { + /* Summary stream was cleared during acquire. + * Create static timeline objects that will be + * read by client. */ + kbase_create_timeline_objects(kctx); + } + break; + } + case KBASE_FUNC_TLSTREAM_FLUSH: + { + struct kbase_uk_tlstream_flush *tlstream_flush = + args; + + if (sizeof(*tlstream_flush) != args_size) + goto bad_size; + + kbase_tlstream_flush_streams(); + break; + } +#if MALI_UNIT_TEST + case KBASE_FUNC_TLSTREAM_TEST: + { + struct kbase_uk_tlstream_test *tlstream_test = args; + + if (sizeof(*tlstream_test) != args_size) + goto bad_size; + + kbase_tlstream_test( + tlstream_test->tpw_count, + tlstream_test->msg_delay, + tlstream_test->msg_count, + tlstream_test->aux_msg); + break; + } + case KBASE_FUNC_TLSTREAM_STATS: + { + struct kbase_uk_tlstream_stats *tlstream_stats = args; + + if (sizeof(*tlstream_stats) != args_size) + goto bad_size; + + kbase_tlstream_stats( + &tlstream_stats->bytes_collected, + &tlstream_stats->bytes_generated); + break; + } +#endif /* MALI_UNIT_TEST */ +#endif /* CONFIG_MALI_MIPE_ENABLED */ + /* used to signal the job core dump on fault has terminated and release the + * refcount of the context to let it be removed. It requires at least + * BASE_UK_VERSION_MAJOR to be 8 and BASE_UK_VERSION_MINOR to be 1 in the + * UK interface. + */ + case KBASE_FUNC_DUMP_FAULT_TERM: + { +#if 2 == MALI_INSTRUMENTATION_LEVEL + if (atomic_read(&kctx->jctx.sched_info.ctx.fault_count) > 0 && + kctx->jctx.sched_info.ctx.is_scheduled) + + kbasep_js_dump_fault_term(kbdev, kctx); + + break; +#endif /* 2 == MALI_INSTRUMENTATION_LEVEL */ + + /* This IOCTL should only be called when instr=2 at compile time. */ + goto out_bad; + } + + case KBASE_FUNC_GET_CONTEXT_ID: + { + struct kbase_uk_context_id *info = args; + + info->id = kctx->id; + break; + } + + default: + dev_err(kbdev->dev, "unknown ioctl %u", id); + goto out_bad; + } + + return 0; + + bad_size: + dev_err(kbdev->dev, "Wrong syscall size (%d) for %08x\n", args_size, id); + out_bad: + return -EINVAL; +} + +static struct kbase_device *to_kbase_device(struct device *dev) +{ + return dev_get_drvdata(dev); +} + +/* + * API to acquire device list mutex and + * return pointer to the device list head + */ +const struct list_head *kbase_dev_list_get(void) +{ + mutex_lock(&kbase_dev_list_lock); + return &kbase_dev_list; +} +KBASE_EXPORT_TEST_API(kbase_dev_list_get); + +/* API to release the device list mutex */ +void kbase_dev_list_put(const struct list_head *dev_list) +{ + mutex_unlock(&kbase_dev_list_lock); +} +KBASE_EXPORT_TEST_API(kbase_dev_list_put); + +/* Find a particular kbase device (as specified by minor number), or find the "first" device if -1 is specified */ +struct kbase_device *kbase_find_device(int minor) +{ + struct kbase_device *kbdev = NULL; + struct list_head *entry; + const struct list_head *dev_list = kbase_dev_list_get(); + + list_for_each(entry, dev_list) { + struct kbase_device *tmp; + + tmp = list_entry(entry, struct kbase_device, entry); + if (tmp->mdev.minor == minor || minor == -1) { + kbdev = tmp; + get_device(kbdev->dev); + break; + } + } + kbase_dev_list_put(dev_list); + + return kbdev; +} +EXPORT_SYMBOL(kbase_find_device); + +void kbase_release_device(struct kbase_device *kbdev) +{ + put_device(kbdev->dev); +} +EXPORT_SYMBOL(kbase_release_device); + +static int kbase_open(struct inode *inode, struct file *filp) +{ + struct kbase_device *kbdev = NULL; + struct kbase_context *kctx; + int ret = 0; +#ifdef CONFIG_DEBUG_FS + char kctx_name[64]; +#endif + + kbdev = kbase_find_device(iminor(inode)); + + if (!kbdev) + return -ENODEV; + + kctx = kbase_create_context(kbdev, is_compat_task()); + if (!kctx) { + ret = -ENOMEM; + goto out; + } + + init_waitqueue_head(&kctx->event_queue); + filp->private_data = kctx; + + kctx->infinite_cache_active = kbdev->infinite_cache_active_default; + +#ifdef CONFIG_DEBUG_FS + snprintf(kctx_name, 64, "%d_%d", kctx->tgid, kctx->id); + + kctx->kctx_dentry = debugfs_create_dir(kctx_name, + kbdev->debugfs_ctx_directory); + + if (IS_ERR_OR_NULL(kctx->kctx_dentry)) { + ret = -ENOMEM; + goto out; + } + +#ifdef CONFIG_MALI_CACHE_COHERENT + /* if cache is completely coherent at hardware level, then remove the + * infinite cache control support from debugfs. + */ +#else + debugfs_create_bool("infinite_cache", 0644, kctx->kctx_dentry, + &kctx->infinite_cache_active); +#endif /* CONFIG_MALI_CACHE_COHERENT */ + kbasep_mem_profile_debugfs_add(kctx); + + kbasep_jd_debugfs_ctx_add(kctx); + kbase_debug_mem_view_init(filp); +#endif + + dev_dbg(kbdev->dev, "created base context\n"); + + { + struct kbasep_kctx_list_element *element; + + element = kzalloc(sizeof(*element), GFP_KERNEL); + if (element) { + mutex_lock(&kbdev->kctx_list_lock); + element->kctx = kctx; + list_add(&element->link, &kbdev->kctx_list); +#ifdef CONFIG_MALI_MIPE_ENABLED + kbase_tlstream_tl_new_ctx( + element->kctx, + (u32)(element->kctx->id)); +#endif + mutex_unlock(&kbdev->kctx_list_lock); + } else { + /* we don't treat this as a fail - just warn about it */ + dev_warn(kbdev->dev, "couldn't add kctx to kctx_list\n"); + } + } + return 0; + + out: + kbase_release_device(kbdev); + return ret; +} + +static int kbase_release(struct inode *inode, struct file *filp) +{ + struct kbase_context *kctx = filp->private_data; + struct kbase_device *kbdev = kctx->kbdev; + struct kbasep_kctx_list_element *element, *tmp; + bool found_element = false; + +#ifdef CONFIG_MALI_MIPE_ENABLED + kbase_tlstream_tl_del_ctx(kctx); +#endif + +#ifdef CONFIG_DEBUG_FS + debugfs_remove_recursive(kctx->kctx_dentry); + kbasep_mem_profile_debugfs_remove(kctx); +#endif + + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry_safe(element, tmp, &kbdev->kctx_list, link) { + if (element->kctx == kctx) { + list_del(&element->link); + kfree(element); + found_element = true; + } + } + mutex_unlock(&kbdev->kctx_list_lock); + if (!found_element) + dev_warn(kbdev->dev, "kctx not in kctx_list\n"); + + filp->private_data = NULL; + + mutex_lock(&kctx->vinstr_cli_lock); + /* If this client was performing hwcnt dumping and did not explicitly + * detach itself, remove it from the vinstr core now */ + kbase_vinstr_detach_client(kctx->kbdev->vinstr_ctx, kctx->vinstr_cli); + mutex_unlock(&kctx->vinstr_cli_lock); + + kbase_destroy_context(kctx); + + dev_dbg(kbdev->dev, "deleted base context\n"); + kbase_release_device(kbdev); + return 0; +} + +#define CALL_MAX_SIZE 536 + +static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + u64 msg[(CALL_MAX_SIZE + 7) >> 3] = { 0xdeadbeefdeadbeefull }; /* alignment fixup */ + u32 size = _IOC_SIZE(cmd); + struct kbase_context *kctx = filp->private_data; + + if (size > CALL_MAX_SIZE) + return -ENOTTY; + + if (0 != copy_from_user(&msg, (void __user *)arg, size)) { + dev_err(kctx->kbdev->dev, "failed to copy ioctl argument into kernel space\n"); + return -EFAULT; + } + + if (kbase_dispatch(kctx, &msg, size) != 0) + return -EFAULT; + + if (0 != copy_to_user((void __user *)arg, &msg, size)) { + dev_err(kctx->kbdev->dev, "failed to copy results of UK call back to user space\n"); + return -EFAULT; + } + return 0; +} + +static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) +{ + struct kbase_context *kctx = filp->private_data; + struct base_jd_event_v2 uevent; + int out_count = 0; + + if (count < sizeof(uevent)) + return -ENOBUFS; + + do { + while (kbase_event_dequeue(kctx, &uevent)) { + if (out_count > 0) + goto out; + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(kctx->event_queue, kbase_event_pending(kctx))) + return -ERESTARTSYS; + } + if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { + if (out_count == 0) + return -EPIPE; + goto out; + } + + if (copy_to_user(buf, &uevent, sizeof(uevent))) + return -EFAULT; + + buf += sizeof(uevent); + out_count++; + count -= sizeof(uevent); + } while (count >= sizeof(uevent)); + + out: + return out_count * sizeof(uevent); +} + +static unsigned int kbase_poll(struct file *filp, poll_table *wait) +{ + struct kbase_context *kctx = filp->private_data; + + poll_wait(filp, &kctx->event_queue, wait); + if (kbase_event_pending(kctx)) + return POLLIN | POLLRDNORM; + + return 0; +} + +void kbase_event_wakeup(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + + wake_up_interruptible(&kctx->event_queue); +} + +KBASE_EXPORT_TEST_API(kbase_event_wakeup); + +static int kbase_check_flags(int flags) +{ + /* Enforce that the driver keeps the O_CLOEXEC flag so that execve() always + * closes the file descriptor in a child process. + */ + if (0 == (flags & O_CLOEXEC)) + return -EINVAL; + + return 0; +} + +static unsigned long kbase_get_unmapped_area(struct file *filp, + const unsigned long addr, const unsigned long len, + const unsigned long pgoff, const unsigned long flags) +{ +#ifdef CONFIG_64BIT + /* based on get_unmapped_area, but simplified slightly due to that some + * values are known in advance */ + struct kbase_context *kctx = filp->private_data; + + if (!kctx->is_compat && !addr && + kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) { + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long low_limit, high_limit, gap_start, gap_end; + + /* Hardware has smaller VA than userspace, ensure the page + * comes from a VA which can be used on the GPU */ + + gap_end = (1UL<<33); + if (gap_end < len) + return -ENOMEM; + high_limit = gap_end - len; + low_limit = PAGE_SIZE + len; + + gap_start = mm->highest_vm_end; + if (gap_start <= high_limit) + goto found_highest; + + if (RB_EMPTY_ROOT(&mm->mm_rb)) + return -ENOMEM; + vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb); + if (vma->rb_subtree_gap < len) + return -ENOMEM; + + while (true) { + gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; + if (gap_start <= high_limit && vma->vm_rb.rb_right) { + struct vm_area_struct *right = + rb_entry(vma->vm_rb.rb_right, + struct vm_area_struct, vm_rb); + if (right->rb_subtree_gap >= len) { + vma = right; + continue; + } + } +check_current: + gap_end = vma->vm_start; + if (gap_end < low_limit) + return -ENOMEM; + if (gap_start <= high_limit && + gap_end - gap_start >= len) + goto found; + + if (vma->vm_rb.rb_left) { + struct vm_area_struct *left = + rb_entry(vma->vm_rb.rb_left, + struct vm_area_struct, vm_rb); + + if (left->rb_subtree_gap >= len) { + vma = left; + continue; + } + } + while (true) { + struct rb_node *prev = &vma->vm_rb; + + if (!rb_parent(prev)) + return -ENOMEM; + vma = rb_entry(rb_parent(prev), + struct vm_area_struct, vm_rb); + if (prev == vma->vm_rb.rb_right) { + gap_start = vma->vm_prev ? + vma->vm_prev->vm_end : 0; + goto check_current; + } + } + } + +found: + if (gap_end > (1UL<<33)) + gap_end = (1UL<<33); + +found_highest: + gap_end -= len; + + VM_BUG_ON(gap_end < PAGE_SIZE); + VM_BUG_ON(gap_end < gap_start); + return gap_end; + } +#endif + /* No special requirements - fallback to the default version */ + return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); +} + +static const struct file_operations kbase_fops = { + .owner = THIS_MODULE, + .open = kbase_open, + .release = kbase_release, + .read = kbase_read, + .poll = kbase_poll, + .unlocked_ioctl = kbase_ioctl, + .compat_ioctl = kbase_ioctl, + .mmap = kbase_mmap, + .check_flags = kbase_check_flags, + .get_unmapped_area = kbase_get_unmapped_area, +}; + +#ifndef CONFIG_MALI_NO_MALI +void kbase_os_reg_write(struct kbase_device *kbdev, u16 offset, u32 value) +{ + writel(value, kbdev->reg + offset); +} + +u32 kbase_os_reg_read(struct kbase_device *kbdev, u16 offset) +{ + return readl(kbdev->reg + offset); +} +#endif /* !CONFIG_MALI_NO_MALI */ + + +/** Show callback for the @c power_policy sysfs file. + * + * This function is called to get the contents of the @c power_policy sysfs + * file. This is a list of the available policies with the currently active one + * surrounded by square brackets. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf) +{ + struct kbase_device *kbdev; + const struct kbase_pm_policy *current_policy; + const struct kbase_pm_policy *const *policy_list; + int policy_count; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + current_policy = kbase_pm_get_policy(kbdev); + + policy_count = kbase_pm_list_policies(&policy_list); + + for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { + if (policy_list[i] == current_policy) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); + else + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); + } + + if (ret < PAGE_SIZE - 1) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + } else { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** Store callback for the @c power_policy sysfs file. + * + * This function is called when the @c power_policy sysfs file is written to. + * It matches the requested policy against the available policies and if a + * matching policy is found calls @ref kbase_pm_set_policy to change the + * policy. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + const struct kbase_pm_policy *new_policy = NULL; + const struct kbase_pm_policy *const *policy_list; + int policy_count; + int i; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + policy_count = kbase_pm_list_policies(&policy_list); + + for (i = 0; i < policy_count; i++) { + if (sysfs_streq(policy_list[i]->name, buf)) { + new_policy = policy_list[i]; + break; + } + } + + if (!new_policy) { + dev_err(dev, "power_policy: policy not found\n"); + return -EINVAL; + } + + kbase_pm_set_policy(kbdev, new_policy); + + return count; +} + +/** The sysfs file @c power_policy. + * + * This is used for obtaining information about the available policies, + * determining which policy is currently active, and changing the active + * policy. + */ +static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy); + +/** Show callback for the @c core_availability_policy sysfs file. + * + * This function is called to get the contents of the @c core_availability_policy + * sysfs file. This is a list of the available policies with the currently + * active one surrounded by square brackets. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_ca_policy(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + const struct kbase_pm_ca_policy *current_policy; + const struct kbase_pm_ca_policy *const *policy_list; + int policy_count; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + current_policy = kbase_pm_ca_get_policy(kbdev); + + policy_count = kbase_pm_ca_list_policies(&policy_list); + + for (i = 0; i < policy_count && ret < PAGE_SIZE; i++) { + if (policy_list[i] == current_policy) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s] ", policy_list[i]->name); + else + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ", policy_list[i]->name); + } + + if (ret < PAGE_SIZE - 1) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + } else { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** Store callback for the @c core_availability_policy sysfs file. + * + * This function is called when the @c core_availability_policy sysfs file is + * written to. It matches the requested policy against the available policies + * and if a matching policy is found calls @ref kbase_pm_set_policy to change + * the policy. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_ca_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + const struct kbase_pm_ca_policy *new_policy = NULL; + const struct kbase_pm_ca_policy *const *policy_list; + int policy_count; + int i; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + policy_count = kbase_pm_ca_list_policies(&policy_list); + + for (i = 0; i < policy_count; i++) { + if (sysfs_streq(policy_list[i]->name, buf)) { + new_policy = policy_list[i]; + break; + } + } + + if (!new_policy) { + dev_err(dev, "core_availability_policy: policy not found\n"); + return -EINVAL; + } + + kbase_pm_ca_set_policy(kbdev, new_policy); + + return count; +} + +/** The sysfs file @c core_availability_policy + * + * This is used for obtaining information about the available policies, + * determining which policy is currently active, and changing the active + * policy. + */ +static DEVICE_ATTR(core_availability_policy, S_IRUGO | S_IWUSR, show_ca_policy, set_ca_policy); + +/** Show callback for the @c core_mask sysfs file. + * + * This function is called to get the contents of the @c core_mask sysfs + * file. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Current core mask : 0x%llX\n", kbdev->pm.debug_core_mask); + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "Available core mask : 0x%llX\n", kbdev->shader_present_bitmap); + + return ret; +} + +/** Store callback for the @c core_mask sysfs file. + * + * This function is called when the @c core_mask sysfs file is written to. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + u64 new_core_mask; + int rc; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + rc = kstrtoull(buf, 16, &new_core_mask); + if (rc) + return rc; + + if ((new_core_mask & kbdev->shader_present_bitmap) != new_core_mask || + !(new_core_mask & kbdev->gpu_props.props.coherency_info.group[0].core_mask)) { + dev_err(dev, "power_policy: invalid core specification\n"); + return -EINVAL; + } + + if (kbdev->pm.debug_core_mask != new_core_mask) { + unsigned long flags; + + spin_lock_irqsave(&kbdev->pm.power_change_lock, flags); + + kbase_pm_set_debug_core_mask(kbdev, new_core_mask); + + spin_unlock_irqrestore(&kbdev->pm.power_change_lock, flags); + } + + return count; +} + +/** The sysfs file @c core_mask. + * + * This is used to restrict shader core availability for debugging purposes. + * Reading it will show the current core mask and the mask of cores available. + * Writing to it will set the current core mask. + */ +static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask); + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS +/** + * struct sc_split_config + * @tag: Short name + * @human_readable: Long name + * @js0_mask: Mask for job slot 0 + * @js1_mask: Mask for job slot 1 + * @js2_mask: Mask for job slot 2 + * + * Structure containing a single shader affinity split configuration. + */ +struct sc_split_config { + char const *tag; + char const *human_readable; + u64 js0_mask; + u64 js1_mask; + u64 js2_mask; +}; + +/* + * Array of available shader affinity split configurations. + */ +static struct sc_split_config const sc_split_configs[] = { + /* All must be the first config (default). */ + { + "all", "All cores", + 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL + }, + { + "mp1", "MP1 shader core", + 0x1, 0x1, 0x1 + }, + { + "mp2", "MP2 shader core", + 0x3, 0x3, 0x3 + }, + { + "mp4", "MP4 shader core", + 0xF, 0xF, 0xF + }, + { + "mp1_vf", "MP1 vertex + MP1 fragment shader core", + 0x2, 0x1, 0xFFFFFFFFFFFFFFFFULL + }, + { + "mp2_vf", "MP2 vertex + MP2 fragment shader core", + 0xA, 0x5, 0xFFFFFFFFFFFFFFFFULL + }, + /* This must be the last config. */ + { + NULL, NULL, + 0x0, 0x0, 0x0 + }, +}; + +/* Pointer to the currently active shader split configuration. */ +static struct sc_split_config const *current_sc_split_config = &sc_split_configs[0]; + +/** Show callback for the @c sc_split sysfs file + * + * Returns the current shader core affinity policy. + */ +static ssize_t show_split(struct device *dev, struct device_attribute *attr, char * const buf) +{ + ssize_t ret; + /* We know we are given a buffer which is PAGE_SIZE long. Our strings are all guaranteed + * to be shorter than that at this time so no length check needed. */ + ret = scnprintf(buf, PAGE_SIZE, "Current sc_split: '%s'\n", current_sc_split_config->tag); + return ret; +} + +/** Store callback for the @c sc_split sysfs file. + * + * This function is called when the @c sc_split sysfs file is written to + * It modifies the system shader core affinity configuration to allow + * system profiling with different hardware configurations. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_split(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct sc_split_config const *config = &sc_split_configs[0]; + + /* Try to match: loop until we hit the last "NULL" entry */ + while (config->tag) { + if (sysfs_streq(config->tag, buf)) { + current_sc_split_config = config; + mali_js0_affinity_mask = config->js0_mask; + mali_js1_affinity_mask = config->js1_mask; + mali_js2_affinity_mask = config->js2_mask; + dev_dbg(dev, "Setting sc_split: '%s'\n", config->tag); + return count; + } + config++; + } + + /* No match found in config list */ + dev_err(dev, "sc_split: invalid value\n"); + dev_err(dev, " Possible settings: mp[1|2|4], mp[1|2]_vf\n"); + return -ENOENT; +} + +/** The sysfs file @c sc_split + * + * This is used for configuring/querying the current shader core work affinity + * configuration. + */ +static DEVICE_ATTR(sc_split, S_IRUGO|S_IWUSR, show_split, set_split); +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + + +/** Store callback for the @c js_timeouts sysfs file. + * + * This function is called to get the contents of the @c js_timeouts sysfs + * file. This file contains five values separated by whitespace. The values + * are basically the same as JS_SOFT_STOP_TICKS, JS_HARD_STOP_TICKS_SS, + * JS_HARD_STOP_TICKS_DUMPING, JS_RESET_TICKS_SS, JS_RESET_TICKS_DUMPING + * configuration values (in that order), with the difference that the js_timeout + * values are expressed in MILLISECONDS. + * + * The js_timeouts sysfile file allows the current values in + * use by the job scheduler to get override. Note that a value needs to + * be other than 0 for it to override the current job scheduler value. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int items; + long js_soft_stop_ms; + long js_soft_stop_ms_cl; + long js_hard_stop_ms_ss; + long js_hard_stop_ms_cl; + long js_hard_stop_ms_dumping; + long js_reset_ms_ss; + long js_reset_ms_cl; + long js_reset_ms_dumping; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + items = sscanf(buf, "%ld %ld %ld %ld %ld %ld %ld %ld", + &js_soft_stop_ms, &js_soft_stop_ms_cl, + &js_hard_stop_ms_ss, &js_hard_stop_ms_cl, + &js_hard_stop_ms_dumping, &js_reset_ms_ss, + &js_reset_ms_cl, &js_reset_ms_dumping); + + if (items == 8) { + u64 ticks; + + if (js_soft_stop_ms >= 0) { + ticks = js_soft_stop_ms * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_soft_stop_ticks = ticks; + } else { + kbdev->js_soft_stop_ticks = -1; + } + + if (js_soft_stop_ms_cl >= 0) { + ticks = js_soft_stop_ms_cl * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_soft_stop_ticks_cl = ticks; + } else { + kbdev->js_soft_stop_ticks_cl = -1; + } + + if (js_hard_stop_ms_ss >= 0) { + ticks = js_hard_stop_ms_ss * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_hard_stop_ticks_ss = ticks; + } else { + kbdev->js_hard_stop_ticks_ss = -1; + } + + if (js_hard_stop_ms_cl >= 0) { + ticks = js_hard_stop_ms_cl * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_hard_stop_ticks_cl = ticks; + } else { + kbdev->js_hard_stop_ticks_cl = -1; + } + + if (js_hard_stop_ms_dumping >= 0) { + ticks = js_hard_stop_ms_dumping * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_hard_stop_ticks_dumping = ticks; + } else { + kbdev->js_hard_stop_ticks_dumping = -1; + } + + if (js_reset_ms_ss >= 0) { + ticks = js_reset_ms_ss * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_reset_ticks_ss = ticks; + } else { + kbdev->js_reset_ticks_ss = -1; + } + + if (js_reset_ms_cl >= 0) { + ticks = js_reset_ms_cl * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_reset_ticks_cl = ticks; + } else { + kbdev->js_reset_ticks_cl = -1; + } + + if (js_reset_ms_dumping >= 0) { + ticks = js_reset_ms_dumping * 1000000ULL; + do_div(ticks, kbdev->js_data.scheduling_period_ns); + kbdev->js_reset_ticks_dumping = ticks; + } else { + kbdev->js_reset_ticks_dumping = -1; + } + + kbdev->js_timeouts_updated = true; + + dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_soft_stop_ticks, + js_soft_stop_ms); + dev_dbg(kbdev->dev, "Overriding JS_SOFT_STOP_TICKS_CL with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_soft_stop_ticks_cl, + js_soft_stop_ms_cl); + dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_SS with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_hard_stop_ticks_ss, + js_hard_stop_ms_ss); + dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_CL with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_hard_stop_ticks_cl, + js_hard_stop_ms_cl); + dev_dbg(kbdev->dev, "Overriding JS_HARD_STOP_TICKS_DUMPING with %lu ticks (%lu ms)\n", + (unsigned long) + kbdev->js_hard_stop_ticks_dumping, + js_hard_stop_ms_dumping); + dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_SS with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_reset_ticks_ss, + js_reset_ms_ss); + dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_CL with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_reset_ticks_cl, + js_reset_ms_cl); + dev_dbg(kbdev->dev, "Overriding JS_RESET_TICKS_DUMPING with %lu ticks (%lu ms)\n", + (unsigned long)kbdev->js_reset_ticks_dumping, + js_reset_ms_dumping); + + return count; + } + + dev_err(kbdev->dev, "Couldn't process js_timeouts write operation.\n" + "Use format <soft_stop_ms> <soft_stop_ms_cl> <hard_stop_ms_ss> <hard_stop_ms_cl> <hard_stop_ms_dumping> <reset_ms_ss> <reset_ms_cl> <reset_ms_dumping>\n" + "Write 0 for no change, -1 to restore default timeout\n"); + return -EINVAL; +} + +/** Show callback for the @c js_timeouts sysfs file. + * + * This function is called to get the contents of the @c js_timeouts sysfs + * file. It returns the last set values written to the js_timeouts sysfs file. + * If the file didn't get written yet, the values will be current setting in + * use. + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + u64 ms; + unsigned long js_soft_stop_ms; + unsigned long js_soft_stop_ms_cl; + unsigned long js_hard_stop_ms_ss; + unsigned long js_hard_stop_ms_cl; + unsigned long js_hard_stop_ms_dumping; + unsigned long js_reset_ms_ss; + unsigned long js_reset_ms_cl; + unsigned long js_reset_ms_dumping; + unsigned long ticks; + u32 scheduling_period_ns; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + /* If no contexts have been scheduled since js_timeouts was last written + * to, the new timeouts might not have been latched yet. So check if an + * update is pending and use the new values if necessary. */ + if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0) + scheduling_period_ns = kbdev->js_scheduling_period_ns; + else + scheduling_period_ns = kbdev->js_data.scheduling_period_ns; + + if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0) + ticks = kbdev->js_soft_stop_ticks; + else + ticks = kbdev->js_data.soft_stop_ticks; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_soft_stop_ms = (unsigned long)ms; + + if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0) + ticks = kbdev->js_soft_stop_ticks_cl; + else + ticks = kbdev->js_data.soft_stop_ticks_cl; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_soft_stop_ms_cl = (unsigned long)ms; + + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0) + ticks = kbdev->js_hard_stop_ticks_ss; + else + ticks = kbdev->js_data.hard_stop_ticks_ss; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_hard_stop_ms_ss = (unsigned long)ms; + + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0) + ticks = kbdev->js_hard_stop_ticks_cl; + else + ticks = kbdev->js_data.hard_stop_ticks_cl; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_hard_stop_ms_cl = (unsigned long)ms; + + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0) + ticks = kbdev->js_hard_stop_ticks_dumping; + else + ticks = kbdev->js_data.hard_stop_ticks_dumping; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_hard_stop_ms_dumping = (unsigned long)ms; + + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0) + ticks = kbdev->js_reset_ticks_ss; + else + ticks = kbdev->js_data.gpu_reset_ticks_ss; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_reset_ms_ss = (unsigned long)ms; + + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0) + ticks = kbdev->js_reset_ticks_cl; + else + ticks = kbdev->js_data.gpu_reset_ticks_cl; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_reset_ms_cl = (unsigned long)ms; + + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0) + ticks = kbdev->js_reset_ticks_dumping; + else + ticks = kbdev->js_data.gpu_reset_ticks_dumping; + ms = (u64)ticks * scheduling_period_ns; + do_div(ms, 1000000UL); + js_reset_ms_dumping = (unsigned long)ms; + + ret = scnprintf(buf, PAGE_SIZE, "%lu %lu %lu %lu %lu %lu %lu %lu\n", + js_soft_stop_ms, js_soft_stop_ms_cl, + js_hard_stop_ms_ss, js_hard_stop_ms_cl, + js_hard_stop_ms_dumping, js_reset_ms_ss, + js_reset_ms_cl, js_reset_ms_dumping); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** The sysfs file @c js_timeouts. + * + * This is used to override the current job scheduler values for + * JS_STOP_STOP_TICKS_SS + * JS_STOP_STOP_TICKS_CL + * JS_HARD_STOP_TICKS_SS + * JS_HARD_STOP_TICKS_CL + * JS_HARD_STOP_TICKS_DUMPING + * JS_RESET_TICKS_SS + * JS_RESET_TICKS_CL + * JS_RESET_TICKS_DUMPING. + */ +static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts); + +/** + * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs + * file + * @dev: The device the sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the js_scheduling_period sysfs file is written + * to. It checks the data written, and if valid updates the js_scheduling_period + * value + * + * Return: @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_js_scheduling_period(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + unsigned int js_scheduling_period; + u32 new_scheduling_period_ns; + u32 old_period; + u64 ticks; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtouint(buf, 0, &js_scheduling_period); + if (ret || !js_scheduling_period) { + dev_err(kbdev->dev, "Couldn't process js_scheduling_period write operation.\n" + "Use format <js_scheduling_period_ms>\n"); + return -EINVAL; + } + + new_scheduling_period_ns = js_scheduling_period * 1000000; + + /* Update scheduling timeouts */ + mutex_lock(&kbdev->js_data.runpool_mutex); + + /* If no contexts have been scheduled since js_timeouts was last written + * to, the new timeouts might not have been latched yet. So check if an + * update is pending and use the new values if necessary. */ + + /* Use previous 'new' scheduling period as a base if present. */ + if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns) + old_period = kbdev->js_scheduling_period_ns; + else + old_period = kbdev->js_data.scheduling_period_ns; + + if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks > 0) + ticks = (u64)kbdev->js_soft_stop_ticks * old_period; + else + ticks = (u64)kbdev->js_data.soft_stop_ticks * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_soft_stop_ticks = ticks ? ticks : 1; + + if (kbdev->js_timeouts_updated && kbdev->js_soft_stop_ticks_cl > 0) + ticks = (u64)kbdev->js_soft_stop_ticks_cl * old_period; + else + ticks = (u64)kbdev->js_data.soft_stop_ticks_cl * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_soft_stop_ticks_cl = ticks ? ticks : 1; + + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_ss > 0) + ticks = (u64)kbdev->js_hard_stop_ticks_ss * old_period; + else + ticks = (u64)kbdev->js_data.hard_stop_ticks_ss * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_hard_stop_ticks_ss = ticks ? ticks : 1; + + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_cl > 0) + ticks = (u64)kbdev->js_hard_stop_ticks_cl * old_period; + else + ticks = (u64)kbdev->js_data.hard_stop_ticks_cl * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_hard_stop_ticks_cl = ticks ? ticks : 1; + + if (kbdev->js_timeouts_updated && kbdev->js_hard_stop_ticks_dumping > 0) + ticks = (u64)kbdev->js_hard_stop_ticks_dumping * old_period; + else + ticks = (u64)kbdev->js_data.hard_stop_ticks_dumping * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_hard_stop_ticks_dumping = ticks ? ticks : 1; + + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_ss > 0) + ticks = (u64)kbdev->js_reset_ticks_ss * old_period; + else + ticks = (u64)kbdev->js_data.gpu_reset_ticks_ss * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_reset_ticks_ss = ticks ? ticks : 1; + + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_cl > 0) + ticks = (u64)kbdev->js_reset_ticks_cl * old_period; + else + ticks = (u64)kbdev->js_data.gpu_reset_ticks_cl * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_reset_ticks_cl = ticks ? ticks : 1; + + if (kbdev->js_timeouts_updated && kbdev->js_reset_ticks_dumping > 0) + ticks = (u64)kbdev->js_reset_ticks_dumping * old_period; + else + ticks = (u64)kbdev->js_data.gpu_reset_ticks_dumping * + kbdev->js_data.scheduling_period_ns; + do_div(ticks, new_scheduling_period_ns); + kbdev->js_reset_ticks_dumping = ticks ? ticks : 1; + + kbdev->js_scheduling_period_ns = new_scheduling_period_ns; + kbdev->js_timeouts_updated = true; + + mutex_unlock(&kbdev->js_data.runpool_mutex); + + dev_dbg(kbdev->dev, "JS scheduling period: %dms\n", + js_scheduling_period); + + return count; +} + +/** + * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs + * entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current period used for the JS scheduling + * period. + * + * Return: The number of bytes output to buf. + */ +static ssize_t show_js_scheduling_period(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + u32 period; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if (kbdev->js_timeouts_updated && kbdev->js_scheduling_period_ns > 0) + period = kbdev->js_scheduling_period_ns; + else + period = kbdev->js_data.scheduling_period_ns; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", + period / 1000000); + + return ret; +} + +static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR, + show_js_scheduling_period, set_js_scheduling_period); + +#if !MALI_CUSTOMER_RELEASE +/** Store callback for the @c force_replay sysfs file. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_force_replay(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if (!strncmp("limit=", buf, MIN(6, count))) { + int force_replay_limit; + int items = sscanf(buf, "limit=%u", &force_replay_limit); + + if (items == 1) { + kbdev->force_replay_random = false; + kbdev->force_replay_limit = force_replay_limit; + kbdev->force_replay_count = 0; + + return count; + } + } else if (!strncmp("random_limit", buf, MIN(12, count))) { + kbdev->force_replay_random = true; + kbdev->force_replay_count = 0; + + return count; + } else if (!strncmp("norandom_limit", buf, MIN(14, count))) { + kbdev->force_replay_random = false; + kbdev->force_replay_limit = KBASEP_FORCE_REPLAY_DISABLED; + kbdev->force_replay_count = 0; + + return count; + } else if (!strncmp("core_req=", buf, MIN(9, count))) { + unsigned int core_req; + int items = sscanf(buf, "core_req=%x", &core_req); + + if (items == 1) { + kbdev->force_replay_core_req = (base_jd_core_req)core_req; + + return count; + } + } + dev_err(kbdev->dev, "Couldn't process force_replay write operation.\nPossible settings: limit=<limit>, random_limit, norandom_limit, core_req=<core_req>\n"); + return -EINVAL; +} + +/** Show callback for the @c force_replay sysfs file. + * + * This function is called to get the contents of the @c force_replay sysfs + * file. It returns the last set value written to the force_replay sysfs file. + * If the file didn't get written yet, the values will be 0. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_force_replay(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if (kbdev->force_replay_random) + ret = scnprintf(buf, PAGE_SIZE, + "limit=0\nrandom_limit\ncore_req=%x\n", + kbdev->force_replay_core_req); + else + ret = scnprintf(buf, PAGE_SIZE, + "limit=%u\nnorandom_limit\ncore_req=%x\n", + kbdev->force_replay_limit, + kbdev->force_replay_core_req); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** The sysfs file @c force_replay. + * + */ +static DEVICE_ATTR(force_replay, S_IRUGO | S_IWUSR, show_force_replay, + set_force_replay); +#endif /* !MALI_CUSTOMER_RELEASE */ + +#ifdef CONFIG_MALI_DEBUG +static ssize_t set_js_softstop_always(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + int softstop_always; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtoint(buf, 0, &softstop_always); + if (ret || ((softstop_always != 0) && (softstop_always != 1))) { + dev_err(kbdev->dev, "Couldn't process js_softstop_always write operation.\n" + "Use format <soft_stop_always>\n"); + return -EINVAL; + } + + kbdev->js_data.softstop_always = (bool) softstop_always; + dev_dbg(kbdev->dev, "Support for softstop on a single context: %s\n", + (kbdev->js_data.softstop_always) ? + "Enabled" : "Disabled"); + return count; +} + +static ssize_t show_js_softstop_always(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->js_data.softstop_always); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/* + * By default, soft-stops are disabled when only a single context is present. The ability to + * enable soft-stop when only a single context is present can be used for debug and unit-testing purposes. + * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.) + */ +static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always); +#endif /* CONFIG_MALI_DEBUG */ + +#ifdef CONFIG_MALI_DEBUG +typedef void (kbasep_debug_command_func) (struct kbase_device *); + +enum kbasep_debug_command_code { + KBASEP_DEBUG_COMMAND_DUMPTRACE, + + /* This must be the last enum */ + KBASEP_DEBUG_COMMAND_COUNT +}; + +struct kbasep_debug_command { + char *str; + kbasep_debug_command_func *func; +}; + +/** Debug commands supported by the driver */ +static const struct kbasep_debug_command debug_commands[] = { + { + .str = "dumptrace", + .func = &kbasep_trace_dump, + } +}; + +/** Show callback for the @c debug_command sysfs file. + * + * This function is called to get the contents of the @c debug_command sysfs + * file. This is a list of the available debug commands, separated by newlines. + * + * @param dev The device this sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The output buffer for the sysfs file contents + * + * @return The number of bytes output to @c buf. + */ +static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + int i; + ssize_t ret = 0; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT && ret < PAGE_SIZE; i++) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s\n", debug_commands[i].str); + + if (ret >= PAGE_SIZE) { + buf[PAGE_SIZE - 2] = '\n'; + buf[PAGE_SIZE - 1] = '\0'; + ret = PAGE_SIZE - 1; + } + + return ret; +} + +/** Store callback for the @c debug_command sysfs file. + * + * This function is called when the @c debug_command sysfs file is written to. + * It matches the requested command against the available commands, and if + * a matching command is found calls the associated function from + * @ref debug_commands to issue the command. + * + * @param dev The device with sysfs file is for + * @param attr The attributes of the sysfs file + * @param buf The value written to the sysfs file + * @param count The number of bytes written to the sysfs file + * + * @return @c count if the function succeeded. An error code on failure. + */ +static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int i; + + kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + for (i = 0; i < KBASEP_DEBUG_COMMAND_COUNT; i++) { + if (sysfs_streq(debug_commands[i].str, buf)) { + debug_commands[i].func(kbdev); + return count; + } + } + + /* Debug Command not found */ + dev_err(dev, "debug_command: command not known\n"); + return -EINVAL; +} + +/** The sysfs file @c debug_command. + * + * This is used to issue general debug commands to the device driver. + * Reading it will produce a list of debug commands, separated by newlines. + * Writing to it with one of those commands will issue said command. + */ +static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug); +#endif /* CONFIG_MALI_DEBUG */ + +/** + * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get a description of the present Mali + * GPU via the gpuinfo sysfs entry. This includes the GPU family, the + * number of cores, the hardware version and the raw product id. For + * example: + * + * Mali-T60x MP4 r0p0 0x6956 + * + * Return: The number of bytes output to buf. + */ +static ssize_t kbase_show_gpuinfo(struct device *dev, + struct device_attribute *attr, char *buf) +{ + static const struct gpu_product_id_name { + unsigned id; + char *name; + } gpu_product_id_names[] = { + { .id = GPU_ID_PI_T60X, .name = "Mali-T60x" }, + { .id = GPU_ID_PI_T62X, .name = "Mali-T62x" }, + { .id = GPU_ID_PI_T72X, .name = "Mali-T72x" }, + { .id = GPU_ID_PI_T76X, .name = "Mali-T76x" }, + { .id = GPU_ID_PI_T82X, .name = "Mali-T82x" }, + { .id = GPU_ID_PI_T83X, .name = "Mali-T83x" }, + { .id = GPU_ID_PI_T86X, .name = "Mali-T86x" }, + { .id = GPU_ID_PI_TFRX, .name = "Mali-T88x" }, +#ifdef MALI_INCLUDE_TMIX + { .id = GPU_ID_PI_TMIX, .name = "Mali-TMIx" }, +#endif /* MALI_INCLUDE_TMIX */ + }; + const char *product_name = "(Unknown Mali GPU)"; + struct kbase_device *kbdev; + u32 gpu_id; + unsigned product_id; + unsigned i; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) { + if (gpu_product_id_names[i].id == product_id) { + product_name = gpu_product_id_names[i].name; + break; + } + } + + return scnprintf(buf, PAGE_SIZE, "%s MP%d r%dp%d 0x%04X\n", + product_name, kbdev->gpu_props.num_cores, + (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, + (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, + product_id); +} +static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL); + +/** + * set_dvfs_period - Store callback for the dvfs_period sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the dvfs_period sysfs file is written to. It + * checks the data written, and if valid updates the DVFS period variable, + * + * Return: @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_dvfs_period(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + int dvfs_period; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtoint(buf, 0, &dvfs_period); + if (ret || dvfs_period <= 0) { + dev_err(kbdev->dev, "Couldn't process dvfs_period write operation.\n" + "Use format <dvfs_period_ms>\n"); + return -EINVAL; + } + + kbdev->pm.dvfs_period = dvfs_period; + dev_dbg(kbdev->dev, "DVFS period: %dms\n", dvfs_period); + + return count; +} + +/** + * show_dvfs_period - Show callback for the dvfs_period sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current period used for the DVFS sample + * timer. + * + * Return: The number of bytes output to buf. + */ +static ssize_t show_dvfs_period(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->pm.dvfs_period); + + return ret; +} + +static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period, + set_dvfs_period); + +/** + * set_pm_poweroff - Store callback for the pm_poweroff sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the pm_poweroff sysfs file is written to. + * + * This file contains three values separated by whitespace. The values + * are gpu_poweroff_time (the period of the poweroff timer, in ns), + * poweroff_shader_ticks (the number of poweroff timer ticks before an idle + * shader is powered off), and poweroff_gpu_ticks (the number of poweroff timer + * ticks before the GPU is powered off), in that order. + * + * Return: @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_pm_poweroff(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int items; + s64 gpu_poweroff_time; + int poweroff_shader_ticks, poweroff_gpu_ticks; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + items = sscanf(buf, "%llu %u %u", &gpu_poweroff_time, + &poweroff_shader_ticks, + &poweroff_gpu_ticks); + if (items != 3) { + dev_err(kbdev->dev, "Couldn't process pm_poweroff write operation.\n" + "Use format <gpu_poweroff_time_ns> <poweroff_shader_ticks> <poweroff_gpu_ticks>\n"); + return -EINVAL; + } + + kbdev->pm.gpu_poweroff_time = HR_TIMER_DELAY_NSEC(gpu_poweroff_time); + kbdev->pm.poweroff_shader_ticks = poweroff_shader_ticks; + kbdev->pm.poweroff_gpu_ticks = poweroff_gpu_ticks; + + return count; +} + +/** + * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current period used for the DVFS sample + * timer. + * + * Return: The number of bytes output to buf. + */ +static ssize_t show_pm_poweroff(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%llu %u %u\n", + ktime_to_ns(kbdev->pm.gpu_poweroff_time), + kbdev->pm.poweroff_shader_ticks, + kbdev->pm.poweroff_gpu_ticks); + + return ret; +} + +static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff, + set_pm_poweroff); + +/** + * set_reset_timeout - Store callback for the reset_timeout sysfs file. + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the reset_timeout sysfs file is written to. It + * checks the data written, and if valid updates the reset timeout. + * + * Return: @c count if the function succeeded. An error code on failure. + */ +static ssize_t set_reset_timeout(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct kbase_device *kbdev; + int ret; + int reset_timeout; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = kstrtoint(buf, 0, &reset_timeout); + if (ret || reset_timeout <= 0) { + dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n" + "Use format <reset_timeout_ms>\n"); + return -EINVAL; + } + + kbdev->reset_timeout_ms = reset_timeout; + dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout); + + return count; +} + +/** + * show_reset_timeout - Show callback for the reset_timeout sysfs entry. + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current reset timeout. + * + * Return: The number of bytes output to buf. + */ +static ssize_t show_reset_timeout(struct device *dev, + struct device_attribute *attr, char * const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + ret = scnprintf(buf, PAGE_SIZE, "%d\n", kbdev->reset_timeout_ms); + + return ret; +} + +static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout, + set_reset_timeout); + +#ifdef CONFIG_MALI_NO_MALI +static int kbase_common_reg_map(struct kbase_device *kbdev) +{ + return 0; +} +static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +{ +} +#else /* CONFIG_MALI_NO_MALI */ +static int kbase_common_reg_map(struct kbase_device *kbdev) +{ + int err = -ENOMEM; + + if (!request_mem_region(kbdev->reg_start, kbdev->reg_size, dev_name(kbdev->dev))) { + dev_err(kbdev->dev, "Register window unavailable\n"); + err = -EIO; + goto out_region; + } + + kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size); + if (!kbdev->reg) { + dev_err(kbdev->dev, "Can't remap register window\n"); + err = -EINVAL; + goto out_ioremap; + } + + return 0; + + out_ioremap: + release_mem_region(kbdev->reg_start, kbdev->reg_size); + out_region: + return err; +} + +static void kbase_common_reg_unmap(struct kbase_device * const kbdev) +{ + iounmap(kbdev->reg); + release_mem_region(kbdev->reg_start, kbdev->reg_size); +} +#endif /* CONFIG_MALI_NO_MALI */ + + +#ifdef CONFIG_DEBUG_FS + +#if KBASE_GPU_RESET_EN +#include <mali_kbase_hwaccess_jm.h> + +static void trigger_quirks_reload(struct kbase_device *kbdev) +{ + kbase_pm_context_active(kbdev); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + kbase_pm_context_idle(kbdev); +} + +#define MAKE_QUIRK_ACCESSORS(type) \ +static int type##_quirks_set(void *data, u64 val) \ +{ \ + struct kbase_device *kbdev; \ + kbdev = (struct kbase_device *)data; \ + kbdev->hw_quirks_##type = (u32)val; \ + trigger_quirks_reload(kbdev); \ + return 0;\ +} \ +\ +static int type##_quirks_get(void *data, u64 *val) \ +{ \ + struct kbase_device *kbdev;\ + kbdev = (struct kbase_device *)data;\ + *val = kbdev->hw_quirks_##type;\ + return 0;\ +} \ +DEFINE_SIMPLE_ATTRIBUTE(fops_##type##_quirks, type##_quirks_get,\ + type##_quirks_set, "%llu\n") + +MAKE_QUIRK_ACCESSORS(sc); +MAKE_QUIRK_ACCESSORS(tiler); +MAKE_QUIRK_ACCESSORS(mmu); + +#endif /* KBASE_GPU_RESET_EN */ + +static int kbase_device_debugfs_init(struct kbase_device *kbdev) +{ + struct dentry *debugfs_ctx_defaults_directory; + int err; + + kbdev->mali_debugfs_directory = debugfs_create_dir(kbdev->devname, + NULL); + if (!kbdev->mali_debugfs_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs directory\n"); + err = -ENOMEM; + goto out; + } + + kbdev->debugfs_ctx_directory = debugfs_create_dir("ctx", + kbdev->mali_debugfs_directory); + if (!kbdev->debugfs_ctx_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs ctx directory\n"); + err = -ENOMEM; + goto out; + } + + debugfs_ctx_defaults_directory = debugfs_create_dir("defaults", + kbdev->debugfs_ctx_directory); + if (!debugfs_ctx_defaults_directory) { + dev_err(kbdev->dev, "Couldn't create mali debugfs ctx defaults directory\n"); + err = -ENOMEM; + goto out; + } + + kbasep_gpu_memory_debugfs_init(kbdev); +#if KBASE_GPU_RESET_EN + debugfs_create_file("quirks_sc", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_sc_quirks); + debugfs_create_file("quirks_tiler", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_tiler_quirks); + debugfs_create_file("quirks_mmu", 0644, + kbdev->mali_debugfs_directory, kbdev, + &fops_mmu_quirks); +#endif /* KBASE_GPU_RESET_EN */ + +#ifndef CONFIG_MALI_COH_USER + debugfs_create_bool("infinite_cache", 0644, + debugfs_ctx_defaults_directory, + &kbdev->infinite_cache_active_default); +#endif /* CONFIG_MALI_COH_USER */ + +#if KBASE_TRACE_ENABLE + kbasep_trace_debugfs_init(kbdev); +#endif /* KBASE_TRACE_ENABLE */ + +#ifdef CONFIG_MALI_TRACE_TIMELINE + kbasep_trace_timeline_debugfs_init(kbdev); +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + + return 0; + +out: + debugfs_remove_recursive(kbdev->mali_debugfs_directory); + return err; +} + +static void kbase_device_debugfs_term(struct kbase_device *kbdev) +{ + debugfs_remove_recursive(kbdev->mali_debugfs_directory); +} + +#else /* CONFIG_DEBUG_FS */ +static inline int kbase_device_debugfs_init(struct kbase_device *kbdev) +{ + return 0; +} + +static inline void kbase_device_debugfs_term(struct kbase_device *kbdev) { } +#endif /* CONFIG_DEBUG_FS */ + + +static int kbase_common_device_init(struct kbase_device *kbdev) +{ + int err; + struct mali_base_gpu_core_props *core_props; + enum { + inited_mem = (1u << 0), + inited_js = (1u << 1), + inited_debug = (1u << 2), + inited_js_softstop = (1u << 3), + inited_js_timeouts = (1u << 4), +#if !MALI_CUSTOMER_RELEASE + inited_force_replay = (1u << 5), +#endif /* !MALI_CUSTOMER_RELEASE */ + inited_pm_runtime_init = (1u << 6), +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + inited_sc_split = (1u << 7), +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ +#ifdef CONFIG_MALI_TRACE_TIMELINE + inited_timeline = (1u << 8), +#endif /* CONFIG_MALI_TRACE_TIMELINE */ +#ifdef CONFIG_MALI_DEVFREQ + inited_devfreq = (1u << 9), +#endif /* CONFIG_MALI_DEVFREQ */ +#ifdef CONFIG_MALI_MIPE_ENABLED + inited_tlstream = (1u << 10), +#endif /* CONFIG_MALI_MIPE_ENABLED */ + inited_backend_early = (1u << 11), + inited_backend_late = (1u << 12), + inited_device = (1u << 13), + inited_gpuinfo = (1u << 14), + inited_dvfs_period = (1u << 15), + inited_pm_poweroff = (1u << 16), + inited_reset_timeout = (1u << 17), + inited_js_scheduling_period = (1u << 18), + inited_vinstr = (1u << 19) + }; + + int inited = 0; +#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) + u32 ve_logic_tile = 0; +#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ + + dev_set_drvdata(kbdev->dev, kbdev); + + err = kbase_backend_early_init(kbdev); + if (err) + goto out_partial; + inited |= inited_backend_early; + + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, + kbase_dev_nr++); + + kbase_disjoint_init(kbdev); + + /* obtain min/max configured gpu frequencies */ + core_props = &(kbdev->gpu_props.props.core_props); + + /* For versatile express platforms, min and max values of GPU frequency + * depend on the type of the logic tile; these values may not be known + * at the build time so in some cases a platform config file with wrong + * GPU freguency values may be included; to ensure the correct value of + * min and max GPU frequency is obtained, the type of the logic tile is + * read from the corresponding register on the platform and frequency + * values assigned accordingly.*/ +#if defined(CONFIG_MALI_PLATFORM_VEXPRESS) + ve_logic_tile = kbase_get_platform_logic_tile_type(); + + switch (ve_logic_tile) { + case 0x217: + /* Virtex 6, HBI0217 */ + core_props->gpu_freq_khz_min = VE_VIRTEX6_GPU_FREQ_MIN; + core_props->gpu_freq_khz_max = VE_VIRTEX6_GPU_FREQ_MAX; + break; + case 0x247: + /* Virtex 7, HBI0247 */ + core_props->gpu_freq_khz_min = VE_VIRTEX7_GPU_FREQ_MIN; + core_props->gpu_freq_khz_max = VE_VIRTEX7_GPU_FREQ_MAX; + break; + default: + /* all other logic tiles, i.e., Virtex 5 HBI0192 + * or unsuccessful reading from the platform - + * fall back to the config_platform default */ + core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; + core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; + break; + } +#else + core_props->gpu_freq_khz_min = GPU_FREQ_KHZ_MIN; + core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX; +#endif /* CONFIG_MALI_PLATFORM_VEXPRESS */ + + kbdev->gpu_props.irq_throttle_time_us = DEFAULT_IRQ_THROTTLE_TIME_US; + + err = kbase_device_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Can't initialize device (%d)\n", err); + goto out_partial; + } + + inited |= inited_device; + + kbdev->vinstr_ctx = kbase_vinstr_init(kbdev); + if (!kbdev->vinstr_ctx) { + dev_err(kbdev->dev, "Can't initialize virtual instrumentation core\n"); + goto out_partial; + } + + inited |= inited_vinstr; + + if (kbdev->pm.callback_power_runtime_init) { + err = kbdev->pm.callback_power_runtime_init(kbdev); + if (err) + goto out_partial; + + inited |= inited_pm_runtime_init; + } + + err = kbase_mem_init(kbdev); + if (err) + goto out_partial; + + inited |= inited_mem; + + kbdev->system_coherency = COHERENCY_NONE; + + + err = kbasep_js_devdata_init(kbdev); + if (err) + goto out_partial; + + inited |= inited_js; + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + err = device_create_file(kbdev->dev, &dev_attr_sc_split); + if (err) { + dev_err(kbdev->dev, "Couldn't create sc_split sysfs file\n"); + goto out_partial; + } + + inited |= inited_sc_split; +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + +#ifdef CONFIG_MALI_DEBUG + + err = device_create_file(kbdev->dev, &dev_attr_debug_command); + if (err) { + dev_err(kbdev->dev, "Couldn't create debug_command sysfs file\n"); + goto out_partial; + } + inited |= inited_debug; + + err = device_create_file(kbdev->dev, &dev_attr_js_softstop_always); + if (err) { + dev_err(kbdev->dev, "Couldn't create js_softstop_always sysfs file\n"); + goto out_partial; + } + inited |= inited_js_softstop; +#endif /* CONFIG_MALI_DEBUG */ + + err = device_create_file(kbdev->dev, &dev_attr_js_timeouts); + if (err) { + dev_err(kbdev->dev, "Couldn't create js_timeouts sysfs file\n"); + goto out_partial; + } + inited |= inited_js_timeouts; + +#if !MALI_CUSTOMER_RELEASE + err = device_create_file(kbdev->dev, &dev_attr_force_replay); + if (err) { + dev_err(kbdev->dev, "Couldn't create force_replay sysfs file\n"); + goto out_partial; + } + inited |= inited_force_replay; +#endif /* !MALI_CUSTOMER_RELEASE */ + + err = device_create_file(kbdev->dev, &dev_attr_gpuinfo); + if (err) { + dev_err(kbdev->dev, "Couldn't create gpuinfo sysfs file\n"); + goto out_partial; + } + inited |= inited_gpuinfo; + + err = device_create_file(kbdev->dev, &dev_attr_dvfs_period); + if (err) { + dev_err(kbdev->dev, "Couldn't create dvfs_period sysfs file\n"); + goto out_partial; + } + inited |= inited_dvfs_period; + + err = device_create_file(kbdev->dev, &dev_attr_pm_poweroff); + if (err) { + dev_err(kbdev->dev, "Couldn't create pm_poweroff sysfs file\n"); + goto out_partial; + } + inited |= inited_pm_poweroff; + + err = device_create_file(kbdev->dev, &dev_attr_reset_timeout); + if (err) { + dev_err(kbdev->dev, "Couldn't create reset_timeout sysfs file\n"); + goto out_partial; + } + inited |= inited_reset_timeout; + + err = device_create_file(kbdev->dev, &dev_attr_js_scheduling_period); + if (err) { + dev_err(kbdev->dev, "Couldn't create js_scheduling_period sysfs file\n"); + goto out_partial; + } + inited |= inited_js_scheduling_period; + +#ifdef CONFIG_MALI_MIPE_ENABLED + err = kbase_tlstream_init(); + if (err) { + dev_err(kbdev->dev, "Couldn't initialize timeline stream\n"); + goto out_partial; + } + inited |= inited_tlstream; +#endif /* CONFIG_MALI_MIPE_ENABLED */ + + err = kbase_backend_late_init(kbdev); + if (err) + goto out_partial; + inited |= inited_backend_late; + +#ifdef CONFIG_MALI_DEVFREQ + err = kbase_devfreq_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Couldn't initialize devfreq\n"); + goto out_partial; + } + inited |= inited_devfreq; +#endif /* CONFIG_MALI_DEVFREQ */ + +#ifdef SECURE_CALLBACKS + kbdev->secure_ops = SECURE_CALLBACKS; +#endif + + err = kbase_device_debugfs_init(kbdev); + if (err) + goto out_partial; + + /* intialise the kctx list */ + mutex_init(&kbdev->kctx_list_lock); + INIT_LIST_HEAD(&kbdev->kctx_list); + + kbdev->mdev.minor = MISC_DYNAMIC_MINOR; + kbdev->mdev.name = kbdev->devname; + kbdev->mdev.fops = &kbase_fops; + kbdev->mdev.parent = get_device(kbdev->dev); + + err = misc_register(&kbdev->mdev); + if (err) { + dev_err(kbdev->dev, "Couldn't register misc dev %s\n", + kbdev->devname); + goto out_misc; + } + + err = device_create_file(kbdev->dev, &dev_attr_power_policy); + if (err) { + dev_err(kbdev->dev, "Couldn't create power_policy sysfs file\n"); + goto out_file; + } + + err = device_create_file(kbdev->dev, + &dev_attr_core_availability_policy); + if (err) { + dev_err(kbdev->dev, "Couldn't create core_availability_policy sysfs file\n"); + goto out_file_core_availability_policy; + } + + err = device_create_file(kbdev->dev, &dev_attr_core_mask); + if (err) { + dev_err(kbdev->dev, "Couldn't create core_mask sysfs file\n"); + goto out_file_core_mask; + } + + { + const struct list_head *dev_list = kbase_dev_list_get(); + + list_add(&kbdev->entry, &kbase_dev_list); + kbase_dev_list_put(dev_list); + } + + dev_info(kbdev->dev, "Probed as %s\n", + dev_name(kbdev->mdev.this_device)); + + return 0; + +out_file_core_mask: + device_remove_file(kbdev->dev, &dev_attr_core_availability_policy); +out_file_core_availability_policy: + device_remove_file(kbdev->dev, &dev_attr_power_policy); +out_file: + misc_deregister(&kbdev->mdev); +out_misc: + put_device(kbdev->dev); + kbase_device_debugfs_term(kbdev); +out_partial: + if (inited & inited_vinstr) + kbase_vinstr_term(kbdev->vinstr_ctx); +#ifdef CONFIG_MALI_DEVFREQ + if (inited & inited_devfreq) + kbase_devfreq_term(kbdev); +#endif /* CONFIG_MALI_DEVFREQ */ + if (inited & inited_backend_late) + kbase_backend_late_term(kbdev); +#ifdef CONFIG_MALI_MIPE_ENABLED + if (inited & inited_tlstream) + kbase_tlstream_term(); +#endif /* CONFIG_MALI_MIPE_ENABLED */ + + if (inited & inited_js_scheduling_period) + device_remove_file(kbdev->dev, &dev_attr_js_scheduling_period); + if (inited & inited_reset_timeout) + device_remove_file(kbdev->dev, &dev_attr_reset_timeout); + if (inited & inited_pm_poweroff) + device_remove_file(kbdev->dev, &dev_attr_pm_poweroff); + if (inited & inited_dvfs_period) + device_remove_file(kbdev->dev, &dev_attr_dvfs_period); +#if !MALI_CUSTOMER_RELEASE + if (inited & inited_force_replay) + device_remove_file(kbdev->dev, &dev_attr_force_replay); +#endif /* !MALI_CUSTOMER_RELEASE */ + if (inited & inited_js_timeouts) + device_remove_file(kbdev->dev, &dev_attr_js_timeouts); +#ifdef CONFIG_MALI_DEBUG + if (inited & inited_js_softstop) + device_remove_file(kbdev->dev, &dev_attr_js_softstop_always); + + if (inited & inited_debug) + device_remove_file(kbdev->dev, &dev_attr_debug_command); + +#endif /* CONFIG_MALI_DEBUG */ + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + if (inited & inited_sc_split) + device_remove_file(kbdev->dev, &dev_attr_sc_split); +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + + if (inited & inited_gpuinfo) + device_remove_file(kbdev->dev, &dev_attr_gpuinfo); + + if (inited & inited_js) + kbasep_js_devdata_halt(kbdev); + + if (inited & inited_mem) + kbase_mem_halt(kbdev); + + if (inited & inited_js) + kbasep_js_devdata_term(kbdev); + + if (inited & inited_mem) + kbase_mem_term(kbdev); + + if (inited & inited_pm_runtime_init) { + if (kbdev->pm.callback_power_runtime_term) + kbdev->pm.callback_power_runtime_term(kbdev); + } + + if (inited & inited_device) + kbase_device_term(kbdev); + + if (inited & inited_backend_early) + kbase_backend_early_term(kbdev); + + return err; +} + + +static int kbase_platform_device_probe(struct platform_device *pdev) +{ + struct kbase_device *kbdev; + struct resource *reg_res; + int err; + int i; + +#ifdef CONFIG_OF + err = kbase_platform_early_init(); + if (err) { + dev_err(&pdev->dev, "Early platform initialization failed\n"); + return err; + } +#endif + + kbdev = kbase_device_alloc(); + if (!kbdev) { + dev_err(&pdev->dev, "Can't allocate device\n"); + err = -ENOMEM; + goto out; + } +#ifdef CONFIG_MALI_NO_MALI + err = gpu_device_create(kbdev); + if (err) { + dev_err(&pdev->dev, "Can't initialize dummy model\n"); + goto out_midg; + } +#endif /* CONFIG_MALI_NO_MALI */ + + kbdev->dev = &pdev->dev; + + /* 3 IRQ resources */ + for (i = 0; i < 3; i++) { + struct resource *irq_res; + int irqtag; + + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); + if (!irq_res) { + dev_err(kbdev->dev, "No IRQ resource at index %d\n", i); + err = -ENOENT; + goto out_platform_irq; + } + +#ifdef CONFIG_OF + if (!strcmp(irq_res->name, "JOB")) { + irqtag = JOB_IRQ_TAG; + } else if (!strcmp(irq_res->name, "MMU")) { + irqtag = MMU_IRQ_TAG; + } else if (!strcmp(irq_res->name, "GPU")) { + irqtag = GPU_IRQ_TAG; + } else { + dev_err(&pdev->dev, "Invalid irq res name: '%s'\n", + irq_res->name); + err = -EINVAL; + goto out_irq_name; + } +#else + irqtag = i; +#endif /* CONFIG_OF */ + kbdev->irqs[irqtag].irq = irq_res->start; + kbdev->irqs[irqtag].flags = (irq_res->flags & IRQF_TRIGGER_MASK); + } + + /* the first memory resource is the physical address of the GPU registers */ + reg_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!reg_res) { + dev_err(kbdev->dev, "Invalid register resource\n"); + err = -ENOENT; + goto out_platform_mem; + } + + kbdev->reg_start = reg_res->start; + kbdev->reg_size = resource_size(reg_res); + + err = kbase_common_reg_map(kbdev); + if (err) + goto out_reg_map; + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + kbdev->regulator = regulator_get_optional(kbdev->dev, "mali"); + if (IS_ERR_OR_NULL(kbdev->regulator)) { + dev_info(kbdev->dev, "Continuing without Mali regulator control\n"); + kbdev->regulator = NULL; + /* Allow probe to continue without regulator */ + } +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ + +#ifdef CONFIG_MALI_PLATFORM_DEVICETREE + pm_runtime_enable(kbdev->dev); +#endif + kbdev->clock = clk_get(kbdev->dev, "clk_mali"); + if (IS_ERR_OR_NULL(kbdev->clock)) { + dev_info(kbdev->dev, "Continuing without Mali clock control\n"); + kbdev->clock = NULL; + /* Allow probe to continue without clock. */ + } else { + err = clk_prepare_enable(kbdev->clock); + if (err) { + dev_err(kbdev->dev, + "Failed to prepare and enable clock (%d)\n", err); + goto out_clock_prepare; + } + } + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_PM_OPP) + /* Register the OPPs if they are available in device tree */ + if (of_init_opp_table(kbdev->dev) < 0) + dev_dbg(kbdev->dev, "OPP table not found\n"); +#endif + + + err = kbase_common_device_init(kbdev); + if (err) { + dev_err(kbdev->dev, "Failed kbase_common_device_init\n"); + goto out_common_init; + } + return 0; + +out_common_init: + clk_disable_unprepare(kbdev->clock); +out_clock_prepare: + clk_put(kbdev->clock); +#ifdef CONFIG_MALI_PLATFORM_DEVICETREE + pm_runtime_disable(kbdev->dev); +#endif +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + regulator_put(kbdev->regulator); +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ + kbase_common_reg_unmap(kbdev); +out_reg_map: +out_platform_mem: +#ifdef CONFIG_OF +out_irq_name: +#endif +out_platform_irq: +#ifdef CONFIG_MALI_NO_MALI + gpu_device_destroy(kbdev); +out_midg: +#endif /* CONFIG_MALI_NO_MALI */ + kbase_device_free(kbdev); +out: + return err; +} + +static int kbase_common_device_remove(struct kbase_device *kbdev) +{ + kbase_vinstr_term(kbdev->vinstr_ctx); +#ifdef CONFIG_DEBUG_FS + debugfs_remove_recursive(kbdev->mali_debugfs_directory); +#endif +#ifdef CONFIG_MALI_DEVFREQ + kbase_devfreq_term(kbdev); +#endif + + kbase_backend_late_term(kbdev); + + if (kbdev->pm.callback_power_runtime_term) + kbdev->pm.callback_power_runtime_term(kbdev); +#ifdef CONFIG_MALI_PLATFORM_DEVICETREE + pm_runtime_disable(kbdev->dev); +#endif + device_remove_file(kbdev->dev, &dev_attr_js_scheduling_period); + device_remove_file(kbdev->dev, &dev_attr_reset_timeout); + device_remove_file(kbdev->dev, &dev_attr_pm_poweroff); + device_remove_file(kbdev->dev, &dev_attr_dvfs_period); + device_remove_file(kbdev->dev, &dev_attr_power_policy); + device_remove_file(kbdev->dev, &dev_attr_core_availability_policy); + device_remove_file(kbdev->dev, &dev_attr_core_mask); + +#ifdef CONFIG_MALI_MIPE_ENABLED + kbase_tlstream_term(); +#endif /* CONFIG_MALI_MIPE_ENABLED */ + +#ifdef CONFIG_MALI_DEBUG + device_remove_file(kbdev->dev, &dev_attr_js_softstop_always); + device_remove_file(kbdev->dev, &dev_attr_debug_command); +#endif /* CONFIG_MALI_DEBUG */ + device_remove_file(kbdev->dev, &dev_attr_js_timeouts); +#if !MALI_CUSTOMER_RELEASE + device_remove_file(kbdev->dev, &dev_attr_force_replay); +#endif /* !MALI_CUSTOMER_RELEASE */ + +#ifdef CONFIG_MALI_DEBUG_SHADER_SPLIT_FS + device_remove_file(kbdev->dev, &dev_attr_sc_split); +#endif /* CONFIG_MALI_DEBUG_SHADER_SPLIT_FS */ + device_remove_file(kbdev->dev, &dev_attr_gpuinfo); + + kbasep_js_devdata_halt(kbdev); + kbase_mem_halt(kbdev); + + kbasep_js_devdata_term(kbdev); + kbase_mem_term(kbdev); + kbase_backend_early_term(kbdev); + + { + const struct list_head *dev_list = kbase_dev_list_get(); + + list_del(&kbdev->entry); + kbase_dev_list_put(dev_list); + } + misc_deregister(&kbdev->mdev); + put_device(kbdev->dev); + kbase_common_reg_unmap(kbdev); + kbase_device_term(kbdev); + if (kbdev->clock) { + clk_disable_unprepare(kbdev->clock); + clk_put(kbdev->clock); + kbdev->clock = NULL; + } +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) && defined(CONFIG_OF) \ + && defined(CONFIG_REGULATOR) + regulator_put(kbdev->regulator); +#endif /* LINUX_VERSION_CODE >= 3, 12, 0 */ +#ifdef CONFIG_MALI_NO_MALI + gpu_device_destroy(kbdev); +#endif /* CONFIG_MALI_NO_MALI */ + kbase_device_free(kbdev); + + return 0; +} + +static int kbase_platform_device_remove(struct platform_device *pdev) +{ + struct kbase_device *kbdev = to_kbase_device(&pdev->dev); + + if (!kbdev) + return -ENODEV; + + return kbase_common_device_remove(kbdev); +} + +/** Suspend callback from the OS. + * + * This is called by Linux when the device should suspend. + * + * @param dev The device to suspend + * + * @return A standard Linux error code + */ +static int kbase_device_suspend(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + +#if defined(CONFIG_PM_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + devfreq_suspend_device(kbdev->devfreq); +#endif + + kbase_pm_suspend(kbdev); + return 0; +} + +/** Resume callback from the OS. + * + * This is called by Linux when the device should resume from suspension. + * + * @param dev The device to resume + * + * @return A standard Linux error code + */ +static int kbase_device_resume(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + kbase_pm_resume(kbdev); + +#if defined(CONFIG_PM_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + devfreq_resume_device(kbdev->devfreq); +#endif + return 0; +} + +/** Runtime suspend callback from the OS. + * + * This is called by Linux when the device should prepare for a condition in which it will + * not be able to communicate with the CPU(s) and RAM due to power management. + * + * @param dev The device to suspend + * + * @return A standard Linux error code + */ +#ifdef CONFIG_PM_RUNTIME +static int kbase_device_runtime_suspend(struct device *dev) +{ + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + +#if defined(CONFIG_PM_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + devfreq_suspend_device(kbdev->devfreq); +#endif + + if (kbdev->pm.backend.callback_power_runtime_off) { + kbdev->pm.backend.callback_power_runtime_off(kbdev); + dev_dbg(dev, "runtime suspend\n"); + } + return 0; +} +#endif /* CONFIG_PM_RUNTIME */ + +/** Runtime resume callback from the OS. + * + * This is called by Linux when the device should go into a fully active state. + * + * @param dev The device to suspend + * + * @return A standard Linux error code + */ + +#ifdef CONFIG_PM_RUNTIME +int kbase_device_runtime_resume(struct device *dev) +{ + int ret = 0; + struct kbase_device *kbdev = to_kbase_device(dev); + + if (!kbdev) + return -ENODEV; + + if (kbdev->pm.backend.callback_power_runtime_on) { + ret = kbdev->pm.backend.callback_power_runtime_on(kbdev); + dev_dbg(dev, "runtime resume\n"); + } + +#if defined(CONFIG_PM_DEVFREQ) && \ + (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + devfreq_resume_device(kbdev->devfreq); +#endif + + return ret; +} +#endif /* CONFIG_PM_RUNTIME */ + +/** Runtime idle callback from the OS. + * + * This is called by Linux when the device appears to be inactive and it might be + * placed into a low power state + * + * @param dev The device to suspend + * + * @return A standard Linux error code + */ + +#ifdef CONFIG_PM_RUNTIME +static int kbase_device_runtime_idle(struct device *dev) +{ + /* Avoid pm_runtime_suspend being called */ + return 1; +} +#endif /* CONFIG_PM_RUNTIME */ + +/** The power management operations for the platform driver. + */ +static const struct dev_pm_ops kbase_pm_ops = { + .suspend = kbase_device_suspend, + .resume = kbase_device_resume, +#ifdef CONFIG_PM_RUNTIME + .runtime_suspend = kbase_device_runtime_suspend, + .runtime_resume = kbase_device_runtime_resume, + .runtime_idle = kbase_device_runtime_idle, +#endif /* CONFIG_PM_RUNTIME */ +}; + +#ifdef CONFIG_OF +static const struct of_device_id kbase_dt_ids[] = { + { .compatible = "arm,malit6xx" }, + { .compatible = "arm,mali-midgard" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, kbase_dt_ids); +#endif + +static struct platform_driver kbase_platform_driver = { + .probe = kbase_platform_device_probe, + .remove = kbase_platform_device_remove, + .driver = { + .name = kbase_drv_name, + .owner = THIS_MODULE, + .pm = &kbase_pm_ops, + .of_match_table = of_match_ptr(kbase_dt_ids), + }, +}; + +/* + * The driver will not provide a shortcut to create the Mali platform device + * anymore when using Device Tree. + */ +#ifdef CONFIG_OF +module_platform_driver(kbase_platform_driver); +#else + +static int __init kbase_driver_init(void) +{ + int ret; + + ret = kbase_platform_early_init(); + if (ret) + return ret; + +#ifndef CONFIG_MACH_MANTA +#ifdef CONFIG_MALI_PLATFORM_FAKE + ret = kbase_platform_fake_register(); + if (ret) + return ret; +#endif +#endif + ret = platform_driver_register(&kbase_platform_driver); +#ifndef CONFIG_MACH_MANTA +#ifdef CONFIG_MALI_PLATFORM_FAKE + if (ret) + kbase_platform_fake_unregister(); +#endif +#endif + return ret; +} + +static void __exit kbase_driver_exit(void) +{ + platform_driver_unregister(&kbase_platform_driver); +#ifndef CONFIG_MACH_MANTA +#ifdef CONFIG_MALI_PLATFORM_FAKE + kbase_platform_fake_unregister(); +#endif +#endif +} + +module_init(kbase_driver_init); +module_exit(kbase_driver_exit); + +#endif /* CONFIG_OF */ + +MODULE_LICENSE("GPL"); +MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \ + __stringify(BASE_UK_VERSION_MAJOR) "." \ + __stringify(BASE_UK_VERSION_MINOR) ")"); + +#if defined(CONFIG_MALI_GATOR_SUPPORT) || defined(CONFIG_MALI_SYSTEM_TRACE) +#define CREATE_TRACE_POINTS +#endif + +#ifdef CONFIG_MALI_GATOR_SUPPORT +/* Create the trace points (otherwise we just get code to call a tracepoint) */ +#include "mali_linux_trace.h" + +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_job_slots_event); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_status); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_on); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_pm_power_off); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_page_fault_insert_pages); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_in_use); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_mmu_as_released); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_total_alloc_pages_change); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_sw_counter); + +void kbase_trace_mali_pm_status(u32 event, u64 value) +{ + trace_mali_pm_status(event, value); +} + +void kbase_trace_mali_pm_power_off(u32 event, u64 value) +{ + trace_mali_pm_power_off(event, value); +} + +void kbase_trace_mali_pm_power_on(u32 event, u64 value) +{ + trace_mali_pm_power_on(event, value); +} + +void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id) +{ + trace_mali_job_slots_event(event, (kctx != NULL ? kctx->tgid : 0), (kctx != NULL ? kctx->pid : 0), atom_id); +} + +void kbase_trace_mali_page_fault_insert_pages(int event, u32 value) +{ + trace_mali_page_fault_insert_pages(event, value); +} + +void kbase_trace_mali_mmu_as_in_use(int event) +{ + trace_mali_mmu_as_in_use(event); +} + +void kbase_trace_mali_mmu_as_released(int event) +{ + trace_mali_mmu_as_released(event); +} + +void kbase_trace_mali_total_alloc_pages_change(long long int event) +{ + trace_mali_total_alloc_pages_change(event); +} +#endif /* CONFIG_MALI_GATOR_SUPPORT */ +#ifdef CONFIG_MALI_SYSTEM_TRACE +#include "mali_linux_kbase_trace.h" +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c new file mode 100644 index 00000000000..5b62539dfba --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.c @@ -0,0 +1,125 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Base kernel property query APIs + */ + +#include "mali_kbase.h" +#ifdef BASE_LEGACY_UK7_SUPPORT + +#include "mali_kbase_cpuprops.h" +#include "mali_kbase_uku.h" +#include <mali_kbase_config.h> +#include <mali_kbase_config_defaults.h> +#include <linux/cache.h> +#include <linux/cpufreq.h> +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#include <asm/cputype.h> +#endif + +#define KBASE_DEFAULT_CPU_NUM 0 + +#define L1_DCACHE_LINE_SIZE_LOG2 L1_CACHE_SHIFT + +/* + * Macros used to extract cpu id info + * see documentation for Main ID register + */ +#define KBASE_CPUPROPS_ID_GET_REV(cpuid) ((cpuid) & 0x0F) /* [3:0] Revision */ +#define KBASE_CPUPROPS_ID_GET_PART_NR(cpuid)(((cpuid) >> 4) & 0xFFF) /* [15:4] Part number */ +#define KBASE_CPUPROPS_ID_GET_ARCH(cpuid) (((cpuid) >> 16) & 0x0F) /* [19:16] Architecture */ +#define KBASE_CPUPROPS_ID_GET_VARIANT(cpuid)(((cpuid) >> 20) & 0x0F) /* [23:20] Variant */ +#define KBASE_CPUPROPS_ID_GET_CODE(cpuid) (((cpuid) >> 24) & 0xFF) /* [31:23] ASCII code of implementer trademark */ + +/*Below value sourced from OSK*/ +#define L1_DCACHE_SIZE ((u32)0x00008000) + +/** + * kbasep_cpuprops_uk_get_cpu_id_info - Retrieves detailed CPU info from given + * cpu_val ( ID reg ) + * @kbase_props: CPU props to be filled-in with cpu id info + * + */ +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +static void kbasep_cpuprops_uk_get_cpu_id_info(struct kbase_uk_cpuprops * const kbase_props) +{ + kbase_props->props.cpu_id.id = read_cpuid_id(); + + kbase_props->props.cpu_id.valid = 1; + kbase_props->props.cpu_id.rev = KBASE_CPUPROPS_ID_GET_REV(kbase_props->props.cpu_id.id); + kbase_props->props.cpu_id.part = KBASE_CPUPROPS_ID_GET_PART_NR(kbase_props->props.cpu_id.id); + kbase_props->props.cpu_id.arch = KBASE_CPUPROPS_ID_GET_ARCH(kbase_props->props.cpu_id.id); + kbase_props->props.cpu_id.variant = KBASE_CPUPROPS_ID_GET_VARIANT(kbase_props->props.cpu_id.id); + kbase_props->props.cpu_id.implementer = KBASE_CPUPROPS_ID_GET_CODE(kbase_props->props.cpu_id.id); +} +#else +static void kbasep_cpuprops_uk_get_cpu_id_info(struct kbase_uk_cpuprops * const kbase_props) +{ + kbase_props->props.cpu_id.id = 0; + kbase_props->props.cpu_id.valid = 0; + kbase_props->props.cpu_id.rev = 0; + kbase_props->props.cpu_id.part = 0; + kbase_props->props.cpu_id.arch = 0; + kbase_props->props.cpu_id.variant = 0; + kbase_props->props.cpu_id.implementer = 'N'; +} +#endif + +/* + * This function (and file!) is kept for the backward compatibility reasons. + * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE + * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call + * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having + * the function for reading cpu properties moved from base to osu. + */ + +int kbase_cpuprops_uk_get_props(struct kbase_context *kctx, + struct kbase_uk_cpuprops * const props) +{ + unsigned int max_cpu_freq; + + props->props.cpu_l1_dcache_line_size_log2 = L1_DCACHE_LINE_SIZE_LOG2; + props->props.cpu_l1_dcache_size = L1_DCACHE_SIZE; + props->props.cpu_flags = BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN; + + props->props.nr_cores = num_possible_cpus(); + props->props.cpu_page_size_log2 = PAGE_SHIFT; + props->props.available_memory_size = totalram_pages << PAGE_SHIFT; + + kbasep_cpuprops_uk_get_cpu_id_info(props); + + /* check if kernel supports dynamic frequency scaling */ + max_cpu_freq = cpufreq_quick_get_max(KBASE_DEFAULT_CPU_NUM); + if (max_cpu_freq != 0) { + /* convert from kHz to mHz */ + props->props.max_cpu_clock_speed_mhz = max_cpu_freq / 1000; + } else { + /* fallback if CONFIG_CPU_FREQ turned off */ + int err; + kbase_cpu_clk_speed_func get_clock_speed; + + get_clock_speed = (kbase_cpu_clk_speed_func) CPU_SPEED_FUNC; + err = get_clock_speed(&props->props.max_cpu_clock_speed_mhz); + if (err) + return err; + } + + return 0; +} + +#endif /* BASE_LEGACY_UK7_SUPPORT */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h new file mode 100644 index 00000000000..daa46e502e2 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_cpuprops.h @@ -0,0 +1,53 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include "mali_kbase.h" +#ifdef BASE_LEGACY_UK7_SUPPORT + +/** + * @file mali_kbase_cpuprops.h + * Base kernel property query APIs + */ + +#ifndef _KBASE_CPUPROPS_H_ +#define _KBASE_CPUPROPS_H_ + +/* Forward declarations */ +struct kbase_uk_cpuprops; + +/** + * This file is kept for the backward compatibility reasons. + * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE + * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call + * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having + * the function for reading cpu properties moved from base to osu. + */ + +/** + * @brief Provides CPU properties data. + * + * Fill the struct kbase_uk_cpuprops with values from CPU configuration. + * + * @param kctx The kbase context + * @param kbase_props A copy of the struct kbase_uk_cpuprops structure from userspace + * + * @return 0 on success. Any other value indicates failure. + */ +int kbase_cpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_cpuprops * const kbase_props); + +#endif /*_KBASE_CPUPROPS_H_*/ +#endif /* BASE_LEGACY_UK7_SUPPORT */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.c b/drivers/gpu/arm/midgard/mali_kbase_debug.c new file mode 100644 index 00000000000..fb57ac2e31a --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_debug.c @@ -0,0 +1,39 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include <mali_kbase.h> + +static struct kbasep_debug_assert_cb kbasep_debug_assert_registered_cb = { + NULL, + NULL +}; + +void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param) +{ + kbasep_debug_assert_registered_cb.func = func; + kbasep_debug_assert_registered_cb.param = param; +} + +void kbasep_debug_assert_call_hook(void) +{ + if (kbasep_debug_assert_registered_cb.func != NULL) + kbasep_debug_assert_registered_cb.func(kbasep_debug_assert_registered_cb.param); +} +KBASE_EXPORT_SYMBOL(kbasep_debug_assert_call_hook); + diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug.h b/drivers/gpu/arm/midgard/mali_kbase_debug.h new file mode 100644 index 00000000000..5fff2892bb5 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_debug.h @@ -0,0 +1,164 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_DEBUG_H +#define _KBASE_DEBUG_H + +#include <linux/bug.h> + +/** @brief If equals to 0, a trace containing the file, line, and function will be displayed before each message. */ +#define KBASE_DEBUG_SKIP_TRACE 0 + +/** @brief If different from 0, the trace will only contain the file and line. */ +#define KBASE_DEBUG_SKIP_FUNCTION_NAME 0 + +/** @brief Disable the asserts tests if set to 1. Default is to disable the asserts in release. */ +#ifndef KBASE_DEBUG_DISABLE_ASSERTS +#ifdef CONFIG_MALI_DEBUG +#define KBASE_DEBUG_DISABLE_ASSERTS 0 +#else +#define KBASE_DEBUG_DISABLE_ASSERTS 1 +#endif +#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + +/** Function type that is called on an KBASE_DEBUG_ASSERT() or KBASE_DEBUG_ASSERT_MSG() */ +typedef void (kbase_debug_assert_hook) (void *); + +struct kbasep_debug_assert_cb { + kbase_debug_assert_hook *func; + void *param; +}; + +/** + * @def KBASEP_DEBUG_PRINT_TRACE + * @brief Private macro containing the format of the trace to display before every message + * @sa KBASE_DEBUG_SKIP_TRACE, KBASE_DEBUG_SKIP_FUNCTION_NAME + */ +#if !KBASE_DEBUG_SKIP_TRACE +#define KBASEP_DEBUG_PRINT_TRACE \ + "In file: " __FILE__ " line: " CSTD_STR2(__LINE__) +#if !KBASE_DEBUG_SKIP_FUNCTION_NAME +#define KBASEP_DEBUG_PRINT_FUNCTION __func__ +#else +#define KBASEP_DEBUG_PRINT_FUNCTION "" +#endif +#else +#define KBASEP_DEBUG_PRINT_TRACE "" +#endif + +/** + * @def KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) + * @brief (Private) system printing function associated to the @see KBASE_DEBUG_ASSERT_MSG event. + * @param trace location in the code from where the message is printed + * @param function function from where the message is printed + * @param ... Format string followed by format arguments. + * @note function parameter cannot be concatenated with other strings + */ +/* Select the correct system output function*/ +#ifdef CONFIG_MALI_DEBUG +#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...)\ + do { \ + pr_err("Mali<ASSERT>: %s function:%s ", trace, function);\ + pr_err(__VA_ARGS__);\ + pr_err("\n");\ + } while (false) +#else +#define KBASEP_DEBUG_ASSERT_OUT(trace, function, ...) CSTD_NOP() +#endif + +#ifdef CONFIG_MALI_DEBUG +#define KBASE_CALL_ASSERT_HOOK() kbasep_debug_assert_call_hook() +#else +#define KBASE_CALL_ASSERT_HOOK() CSTD_NOP() +#endif + +/** + * @def KBASE_DEBUG_ASSERT(expr) + * @brief Calls @see KBASE_PRINT_ASSERT and prints the expression @a expr if @a expr is false + * + * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + * + * @param expr Boolean expression + */ +#define KBASE_DEBUG_ASSERT(expr) \ + KBASE_DEBUG_ASSERT_MSG(expr, #expr) + +#if KBASE_DEBUG_DISABLE_ASSERTS +#define KBASE_DEBUG_ASSERT_MSG(expr, ...) CSTD_NOP() +#else + /** + * @def KBASE_DEBUG_ASSERT_MSG(expr, ...) + * @brief Calls @see KBASEP_DEBUG_ASSERT_OUT and prints the given message if @a expr is false + * + * @note This macro does nothing if the flag @see KBASE_DEBUG_DISABLE_ASSERTS is set to 1 + * + * @param expr Boolean expression + * @param ... Message to display when @a expr is false, as a format string followed by format arguments. + */ +#define KBASE_DEBUG_ASSERT_MSG(expr, ...) \ + do { \ + if (!(expr)) { \ + KBASEP_DEBUG_ASSERT_OUT(KBASEP_DEBUG_PRINT_TRACE, KBASEP_DEBUG_PRINT_FUNCTION, __VA_ARGS__);\ + KBASE_CALL_ASSERT_HOOK();\ + BUG();\ + } \ + } while (false) +#endif /* KBASE_DEBUG_DISABLE_ASSERTS */ + +/** + * @def KBASE_DEBUG_CODE( X ) + * @brief Executes the code inside the macro only in debug mode + * + * @param X Code to compile only in debug mode. + */ +#ifdef CONFIG_MALI_DEBUG +#define KBASE_DEBUG_CODE(X) X +#else +#define KBASE_DEBUG_CODE(X) CSTD_NOP() +#endif /* CONFIG_MALI_DEBUG */ + +/** @} */ + +/** + * @brief Register a function to call on ASSERT + * + * Such functions will \b only be called during Debug mode, and for debugging + * features \b only. Do not rely on them to be called in general use. + * + * To disable the hook, supply NULL to \a func. + * + * @note This function is not thread-safe, and should only be used to + * register/deregister once in the module's lifetime. + * + * @param[in] func the function to call when an assert is triggered. + * @param[in] param the parameter to pass to \a func when calling it + */ +void kbase_debug_assert_register_hook(kbase_debug_assert_hook *func, void *param); + +/** + * @brief Call a debug assert hook previously registered with kbase_debug_assert_register_hook() + * + * @note This function is not thread-safe with respect to multiple threads + * registering functions and parameters with + * kbase_debug_assert_register_hook(). Otherwise, thread safety is the + * responsibility of the registered hook. + */ +void kbasep_debug_assert_call_hook(void); + +#endif /* _KBASE_DEBUG_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c new file mode 100644 index 00000000000..1a3198e5b53 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.c @@ -0,0 +1,251 @@ +/* + * + * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Debugfs interface to dump the memory visible to the GPU + */ + +#include "mali_kbase_debug_mem_view.h" +#include "mali_kbase.h" + +#include <linux/list.h> +#include <linux/file.h> + +#if CONFIG_DEBUG_FS + +struct debug_mem_mapping { + struct list_head node; + + struct kbase_mem_phy_alloc *alloc; + unsigned long flags; + + u64 start_pfn; + size_t nr_pages; +}; + +struct debug_mem_data { + struct list_head mapping_list; + struct kbase_context *kctx; +}; + +struct debug_mem_seq_off { + struct list_head *lh; + size_t offset; +}; + +static void *debug_mem_start(struct seq_file *m, loff_t *_pos) +{ + struct debug_mem_data *mem_data = m->private; + struct debug_mem_seq_off *data; + struct debug_mem_mapping *map; + loff_t pos = *_pos; + + list_for_each_entry(map, &mem_data->mapping_list, node) { + if (pos >= map->nr_pages) { + pos -= map->nr_pages; + } else { + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + data->lh = &map->node; + data->offset = pos; + return data; + } + } + + /* Beyond the end */ + return NULL; +} + +static void debug_mem_stop(struct seq_file *m, void *v) +{ + kfree(v); +} + +static void *debug_mem_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct debug_mem_data *mem_data = m->private; + struct debug_mem_seq_off *data = v; + struct debug_mem_mapping *map; + + map = list_entry(data->lh, struct debug_mem_mapping, node); + + if (data->offset < map->nr_pages - 1) { + data->offset++; + ++*pos; + return data; + } + + if (list_is_last(data->lh, &mem_data->mapping_list)) + return NULL; + + data->lh = data->lh->next; + data->offset = 0; + ++*pos; + + return data; +} + +static int debug_mem_show(struct seq_file *m, void *v) +{ + struct debug_mem_data *mem_data = m->private; + struct debug_mem_seq_off *data = v; + struct debug_mem_mapping *map; + int i, j; + struct page *page; + uint32_t *mapping; + pgprot_t prot = PAGE_KERNEL; + + map = list_entry(data->lh, struct debug_mem_mapping, node); + + kbase_gpu_vm_lock(mem_data->kctx); + + if (data->offset >= map->alloc->nents) { + seq_printf(m, "%016llx: Unbacked page\n\n", (map->start_pfn + + data->offset) << PAGE_SHIFT); + goto out; + } + + if (!(map->flags & KBASE_REG_CPU_CACHED)) + prot = pgprot_writecombine(prot); + + page = pfn_to_page(PFN_DOWN(map->alloc->pages[data->offset])); + mapping = vmap(&page, 1, VM_MAP, prot); + + for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) { + seq_printf(m, "%016llx:", i + ((map->start_pfn + + data->offset) << PAGE_SHIFT)); + + for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping)) + seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]); + seq_putc(m, '\n'); + } + + vunmap(mapping); + + seq_putc(m, '\n'); + +out: + kbase_gpu_vm_unlock(mem_data->kctx); + return 0; +} + +static const struct seq_operations ops = { + .start = debug_mem_start, + .next = debug_mem_next, + .stop = debug_mem_stop, + .show = debug_mem_show, +}; + +static int debug_mem_open(struct inode *i, struct file *file) +{ + struct file *kctx_file = i->i_private; + struct kbase_context *kctx = kctx_file->private_data; + struct rb_node *p; + struct debug_mem_data *mem_data; + int ret; + + ret = seq_open(file, &ops); + + if (ret) + return ret; + + mem_data = kmalloc(sizeof(*mem_data), GFP_KERNEL); + mem_data->kctx = kctx; + + INIT_LIST_HEAD(&mem_data->mapping_list); + + get_file(kctx_file); + + kbase_gpu_vm_lock(kctx); + + for (p = rb_first(&kctx->reg_rbtree); p; p = rb_next(p)) { + struct kbase_va_region *reg; + struct debug_mem_mapping *mapping; + + reg = rb_entry(p, struct kbase_va_region, rblink); + + if (reg->gpu_alloc == NULL) + /* Empty region - ignore */ + continue; + + mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); + + mapping->alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + mapping->start_pfn = reg->start_pfn; + mapping->nr_pages = reg->nr_pages; + mapping->flags = reg->flags; + list_add_tail(&mapping->node, &mem_data->mapping_list); + } + + kbase_gpu_vm_unlock(kctx); + + ((struct seq_file *)file->private_data)->private = mem_data; + + return 0; +} + +static int debug_mem_release(struct inode *inode, struct file *file) +{ + struct file *kctx_file = inode->i_private; + struct seq_file *sfile = file->private_data; + struct debug_mem_data *mem_data = sfile->private; + struct debug_mem_mapping *mapping; + + seq_release(inode, file); + + while (!list_empty(&mem_data->mapping_list)) { + mapping = list_first_entry(&mem_data->mapping_list, + struct debug_mem_mapping, node); + kbase_mem_phy_alloc_put(mapping->alloc); + list_del(&mapping->node); + kfree(mapping); + } + + kfree(mem_data); + + fput(kctx_file); + + return 0; +} + +static const struct file_operations kbase_debug_mem_view_fops = { + .open = debug_mem_open, + .release = debug_mem_release, + .read = seq_read, + .llseek = seq_lseek +}; + +/** + * kbase_debug_mem_view_init - Initialise the mem_view sysfs file + * @kctx_file: The /dev/mali0 file instance for the context + * + * This function creates a "mem_view" file which can be used to get a view of + * the context's memory as the GPU sees it (i.e. using the GPU's page tables). + * + * The file is cleaned up by a call to debugfs_remove_recursive() deleting the + * parent directory. + */ +void kbase_debug_mem_view_init(struct file *kctx_file) +{ + struct kbase_context *kctx = kctx_file->private_data; + + debugfs_create_file("mem_view", S_IRUGO, kctx->kctx_dentry, kctx_file, + &kbase_debug_mem_view_fops); +} + +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h new file mode 100644 index 00000000000..20ab51a776c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_debug_mem_view.h @@ -0,0 +1,25 @@ +/* + * + * (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_DEBUG_MEM_VIEW_H +#define _KBASE_DEBUG_MEM_VIEW_H + +#include <mali_kbase.h> + +void kbase_debug_mem_view_init(struct file *kctx_file); + +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_defs.h b/drivers/gpu/arm/midgard/mali_kbase_defs.h new file mode 100644 index 00000000000..8b107f20cc4 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_defs.h @@ -0,0 +1,1126 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_defs.h + * + * Defintions (types, defines, etcs) common to Kbase. They are placed here to + * allow the hierarchy of header files to work. + */ + +#ifndef _KBASE_DEFS_H_ +#define _KBASE_DEFS_H_ + +#include <mali_kbase_config.h> +#include <mali_base_hwconfig_features.h> +#include <mali_base_hwconfig_issues.h> +#include <mali_kbase_mem_lowlevel.h> +#include <mali_kbase_mem_alloc.h> +#include <mali_kbase_mmu_hw.h> +#include <mali_kbase_mmu_mode.h> +#include <mali_kbase_instr.h> + +#include <linux/atomic.h> +#include <linux/mempool.h> +#include <linux/slab.h> + +#ifdef CONFIG_KDS +#include <linux/kds.h> +#endif /* CONFIG_KDS */ + +#ifdef CONFIG_SYNC +#include "sync.h" +#endif /* CONFIG_SYNC */ + +#ifdef CONFIG_DEBUG_FS +#include <linux/debugfs.h> +#endif /* CONFIG_DEBUG_FS */ + +#ifdef CONFIG_PM_DEVFREQ +#include <linux/devfreq.h> +#endif /* CONFIG_DEVFREQ */ + +#include <linux/clk.h> +#include <linux/regulator/consumer.h> + +/** Enable SW tracing when set */ +#ifdef CONFIG_MALI_MIDGARD_ENABLE_TRACE +#define KBASE_TRACE_ENABLE 1 +#endif + +#ifndef KBASE_TRACE_ENABLE +#ifdef CONFIG_MALI_DEBUG +#define KBASE_TRACE_ENABLE 1 +#else +#define KBASE_TRACE_ENABLE 0 +#endif /* CONFIG_MALI_DEBUG */ +#endif /* KBASE_TRACE_ENABLE */ + +/** Dump Job slot trace on error (only active if KBASE_TRACE_ENABLE != 0) */ +#define KBASE_TRACE_DUMP_ON_JOB_SLOT_ERROR 1 + +/** + * Number of milliseconds before resetting the GPU when a job cannot be "zapped" from the hardware. + * Note that the time is actually ZAP_TIMEOUT+SOFT_STOP_RESET_TIMEOUT between the context zap starting and the GPU + * actually being reset to give other contexts time for their jobs to be soft-stopped and removed from the hardware + * before resetting. + */ +#define ZAP_TIMEOUT 1000 + +/** Number of milliseconds before we time out on a GPU soft/hard reset */ +#define RESET_TIMEOUT 500 + +/** + * Prevent soft-stops from occuring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more predictable. + * + * Therefore, soft stop may still be disabled due to HW issues. + * + * @note Soft stop will still be used for non-scheduling purposes e.g. when terminating a context. + * + * @note if not in use, define this value to 0 instead of \#undef'ing it + */ +#define KBASE_DISABLE_SCHEDULING_SOFT_STOPS 0 +/** + * Prevent hard-stops from occuring in scheduling situations + * + * This is not due to HW issues, but when scheduling is desired to be more predictable. + * + * @note Hard stop will still be used for non-scheduling purposes e.g. when terminating a context. + * + * @note if not in use, define this value to 0 instead of \#undef'ing it + */ +#define KBASE_DISABLE_SCHEDULING_HARD_STOPS 0 + +/** + * The maximum number of Job Slots to support in the Hardware. + * + * You can optimize this down if your target devices will only ever support a + * small number of job slots. + */ +#define BASE_JM_MAX_NR_SLOTS 3 + +/** + * The maximum number of Address Spaces to support in the Hardware. + * + * You can optimize this down if your target devices will only ever support a + * small number of Address Spaces + */ +#define BASE_MAX_NR_AS 16 + +/* mmu */ +#define MIDGARD_MMU_VA_BITS 48 + +#if MIDGARD_MMU_VA_BITS > 39 +#define MIDGARD_MMU_TOPLEVEL 0 +#else +#define MIDGARD_MMU_TOPLEVEL 1 +#endif + +#define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR) + +/** setting in kbase_context::as_nr that indicates it's invalid */ +#define KBASEP_AS_NR_INVALID (-1) + +#define KBASE_LOCK_REGION_MAX_SIZE (63) +#define KBASE_LOCK_REGION_MIN_SIZE (11) + +#define KBASE_TRACE_SIZE_LOG2 8 /* 256 entries */ +#define KBASE_TRACE_SIZE (1 << KBASE_TRACE_SIZE_LOG2) +#define KBASE_TRACE_MASK ((1 << KBASE_TRACE_SIZE_LOG2)-1) + +#include "mali_kbase_js_defs.h" +#include "mali_kbase_hwaccess_defs.h" + +#define KBASEP_FORCE_REPLAY_DISABLED 0 + +/* Maximum force replay limit when randomization is enabled */ +#define KBASEP_FORCE_REPLAY_RANDOM_LIMIT 16 + +/** Atom has been previously soft-stoppped */ +#define KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED (1<<1) +/** Atom has been previously retried to execute */ +#define KBASE_KATOM_FLAGS_RERUN (1<<2) +#define KBASE_KATOM_FLAGS_JOBCHAIN (1<<3) +/** Atom has been previously hard-stopped. */ +#define KBASE_KATOM_FLAG_BEEN_HARD_STOPPED (1<<4) +/** Atom has caused us to enter disjoint state */ +#define KBASE_KATOM_FLAG_IN_DISJOINT (1<<5) +/* Atom has fail dependency on same-slot dependency */ +#define KBASE_KATOM_FLAG_FAIL_PREV (1<<6) +/* Atom blocked on cross-slot dependency */ +#define KBASE_KATOM_FLAG_X_DEP_BLOCKED (1<<7) +/* Atom has fail dependency on cross-slot dependency */ +#define KBASE_KATOM_FLAG_FAIL_BLOCKER (1<<8) +/* Atom has been submitted to JSCTX ringbuffers */ +#define KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED (1<<9) +/* Atom is currently holding a context reference */ +#define KBASE_KATOM_FLAG_HOLDING_CTX_REF (1<<10) +/* Atom requires GPU to be in secure mode */ +#define KBASE_KATOM_FLAG_SECURE (1<<11) + +/* SW related flags about types of JS_COMMAND action + * NOTE: These must be masked off by JS_COMMAND_MASK */ + +/** This command causes a disjoint event */ +#define JS_COMMAND_SW_CAUSES_DISJOINT 0x100 + +/** Bitmask of all SW related flags */ +#define JS_COMMAND_SW_BITS (JS_COMMAND_SW_CAUSES_DISJOINT) + +#if (JS_COMMAND_SW_BITS & JS_COMMAND_MASK) +#error JS_COMMAND_SW_BITS not masked off by JS_COMMAND_MASK. Must update JS_COMMAND_SW_<..> bitmasks +#endif + +/** Soft-stop command that causes a Disjoint event. This of course isn't + * entirely masked off by JS_COMMAND_MASK */ +#define JS_COMMAND_SOFT_STOP_WITH_SW_DISJOINT \ + (JS_COMMAND_SW_CAUSES_DISJOINT | JS_COMMAND_SOFT_STOP) + +#define KBASEP_ATOM_ID_INVALID BASE_JD_ATOM_COUNT + +struct kbase_jd_atom_dependency { + struct kbase_jd_atom *atom; + u8 dep_type; +}; + +/** + * @brief The function retrieves a read-only reference to the atom field from + * the kbase_jd_atom_dependency structure + * + * @param[in] dep kbase jd atom dependency. + * + * @return readonly reference to dependent ATOM. + */ +static inline const struct kbase_jd_atom *const kbase_jd_katom_dep_atom(const struct kbase_jd_atom_dependency *dep) +{ + LOCAL_ASSERT(dep != NULL); + + return (const struct kbase_jd_atom * const)(dep->atom); +} + +/** + * @brief The function retrieves a read-only reference to the dependency type field from + * the kbase_jd_atom_dependency structure + * + * @param[in] dep kbase jd atom dependency. + * + * @return A dependency type value. + */ +static inline const u8 kbase_jd_katom_dep_type(const struct kbase_jd_atom_dependency *dep) +{ + LOCAL_ASSERT(dep != NULL); + + return dep->dep_type; +} + +/** + * @brief Setter macro for dep_atom array entry in kbase_jd_atom + * + * @param[in] dep The kbase jd atom dependency. + * @param[in] a The ATOM to be set as a dependency. + * @param type The ATOM dependency type to be set. + * + */ +static inline void kbase_jd_katom_dep_set(const struct kbase_jd_atom_dependency *const_dep, + struct kbase_jd_atom *a, u8 type) +{ + struct kbase_jd_atom_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + + dep = (struct kbase_jd_atom_dependency *)const_dep; + + dep->atom = a; + dep->dep_type = type; +} + +/** + * @brief Setter macro for dep_atom array entry in kbase_jd_atom + * + * @param[in] dep The kbase jd atom dependency to be cleared. + * + */ +static inline void kbase_jd_katom_dep_clear(const struct kbase_jd_atom_dependency *const_dep) +{ + struct kbase_jd_atom_dependency *dep; + + LOCAL_ASSERT(const_dep != NULL); + + dep = (struct kbase_jd_atom_dependency *)const_dep; + + dep->atom = NULL; + dep->dep_type = BASE_JD_DEP_TYPE_INVALID; +} + +enum kbase_atom_gpu_rb_state { + /* Atom is not currently present in slot ringbuffer */ + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB, + /* Atom is in slot ringbuffer but is blocked on a previous atom */ + KBASE_ATOM_GPU_RB_WAITING_BLOCKED, + /* Atom is in slot ringbuffer but is waiting for cores to become + * available */ + KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE, + /* Atom is in slot ringbuffer but is blocked on affinity */ + KBASE_ATOM_GPU_RB_WAITING_AFFINITY, + /* Atom is in slot ringbuffer but is waiting for secure mode switch */ + KBASE_ATOM_GPU_RB_WAITING_SECURE_MODE, + /* Atom is in slot ringbuffer and ready to run */ + KBASE_ATOM_GPU_RB_READY, + /* Atom is in slot ringbuffer and has been submitted to the GPU */ + KBASE_ATOM_GPU_RB_SUBMITTED, + /* Atom must be returned to JS as soon as it reaches the head of the + * ringbuffer due to a previous failure */ + KBASE_ATOM_GPU_RB_RETURN_TO_JS +}; + +struct kbase_ext_res { + u64 gpu_address; + struct kbase_mem_phy_alloc *alloc; +}; + +struct kbase_jd_atom { + struct work_struct work; + ktime_t start_timestamp; + u64 time_spent_us; /**< Total time spent on the GPU in microseconds */ + + struct base_jd_udata udata; + struct kbase_context *kctx; + + struct list_head dep_head[2]; + struct list_head dep_item[2]; + const struct kbase_jd_atom_dependency dep[2]; + + u16 nr_extres; + struct kbase_ext_res *extres; + + u32 device_nr; + u64 affinity; + u64 jc; + enum kbase_atom_coreref_state coreref_state; +#ifdef CONFIG_KDS + struct list_head node; + struct kds_resource_set *kds_rset; + bool kds_dep_satisfied; +#endif /* CONFIG_KDS */ +#ifdef CONFIG_SYNC + struct sync_fence *fence; + struct sync_fence_waiter sync_waiter; +#endif /* CONFIG_SYNC */ + + /* Note: refer to kbasep_js_atom_retained_state, which will take a copy of some of the following members */ + enum base_jd_event_code event_code; + base_jd_core_req core_req; /**< core requirements */ + /** Job Slot to retry submitting to if submission from IRQ handler failed + * + * NOTE: see if this can be unified into the another member e.g. the event */ + int retry_submit_on_slot; + + union kbasep_js_policy_job_info sched_info; + /* JS atom priority with respect to other atoms on its kctx. */ + int sched_priority; + + int poking; /* BASE_HW_ISSUE_8316 */ + + wait_queue_head_t completed; + enum kbase_jd_atom_state status; +#ifdef CONFIG_GPU_TRACEPOINTS + int work_id; +#endif + /* Assigned after atom is completed. Used to check whether PRLAM-10676 workaround should be applied */ + int slot_nr; + + u32 atom_flags; + + /* Number of times this atom has been retried. Used by replay soft job. + */ + int retry_count; + + enum kbase_atom_gpu_rb_state gpu_rb_state; + + u64 need_cache_flush_cores_retained; + + atomic_t blocked; + + /* Pointer to atom that this atom has cross-slot dependency on */ + struct kbase_jd_atom *x_pre_dep; + /* Pointer to atom that has cross-slot dependency on this atom */ + struct kbase_jd_atom *x_post_dep; + + + struct kbase_jd_atom_backend backend; +}; + +static inline bool kbase_jd_katom_is_secure(const struct kbase_jd_atom *katom) +{ + return (bool)(katom->atom_flags & KBASE_KATOM_FLAG_SECURE); +} + +/* + * Theory of operations: + * + * Atom objects are statically allocated within the context structure. + * + * Each atom is the head of two lists, one for the "left" set of dependencies, one for the "right" set. + */ + +#define KBASE_JD_DEP_QUEUE_SIZE 256 + +struct kbase_jd_context { + struct mutex lock; + struct kbasep_js_kctx_info sched_info; + struct kbase_jd_atom atoms[BASE_JD_ATOM_COUNT]; + + /** Tracks all job-dispatch jobs. This includes those not tracked by + * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ + u32 job_nr; + + /** Waitq that reflects whether there are no jobs (including SW-only + * dependency jobs). This is set when no jobs are present on the ctx, + * and clear when there are jobs. + * + * @note: Job Dispatcher knows about more jobs than the Job Scheduler: + * the Job Scheduler is unaware of jobs that are blocked on dependencies, + * and SW-only dependency jobs. + * + * This waitq can be waited upon to find out when the context jobs are all + * done/cancelled (including those that might've been blocked on + * dependencies) - and so, whether it can be terminated. However, it should + * only be terminated once it is neither present in the policy-queue (see + * kbasep_js_policy_try_evict_ctx() ) nor the run-pool (see + * kbasep_js_kctx_info::ctx::is_scheduled). + * + * Since the waitq is only set under kbase_jd_context::lock, + * the waiter should also briefly obtain and drop kbase_jd_context::lock to + * guarentee that the setter has completed its work on the kbase_context + * + * This must be updated atomically with: + * - kbase_jd_context::job_nr */ + wait_queue_head_t zero_jobs_wait; + + /** Job Done workqueue. */ + struct workqueue_struct *job_done_wq; + + spinlock_t tb_lock; + u32 *tb; + size_t tb_wrap_offset; + +#ifdef CONFIG_KDS + struct kds_callback kds_cb; +#endif /* CONFIG_KDS */ +#ifdef CONFIG_GPU_TRACEPOINTS + atomic_t work_id; +#endif +}; + +struct kbase_device_info { + u32 features; +}; + +/** Poking state for BASE_HW_ISSUE_8316 */ +enum { + KBASE_AS_POKE_STATE_IN_FLIGHT = 1<<0, + KBASE_AS_POKE_STATE_KILLING_POKE = 1<<1 +}; + +/** Poking state for BASE_HW_ISSUE_8316 */ +typedef u32 kbase_as_poke_state; + +struct kbase_mmu_setup { + u64 transtab; + u64 memattr; +}; + +/** + * Important: Our code makes assumptions that a struct kbase_as structure is always at + * kbase_device->as[number]. This is used to recover the containing + * struct kbase_device from a struct kbase_as structure. + * + * Therefore, struct kbase_as structures must not be allocated anywhere else. + */ +struct kbase_as { + int number; + + struct workqueue_struct *pf_wq; + struct work_struct work_pagefault; + struct work_struct work_busfault; + enum kbase_mmu_fault_type fault_type; + u32 fault_status; + u64 fault_addr; + struct mutex transaction_mutex; + + struct kbase_mmu_setup current_setup; + + /* BASE_HW_ISSUE_8316 */ + struct workqueue_struct *poke_wq; + struct work_struct poke_work; + /** Protected by kbasep_js_device_data::runpool_irq::lock */ + int poke_refcount; + /** Protected by kbasep_js_device_data::runpool_irq::lock */ + kbase_as_poke_state poke_state; + struct hrtimer poke_timer; +}; + +static inline int kbase_as_has_bus_fault(struct kbase_as *as) +{ + return as->fault_type == KBASE_MMU_FAULT_TYPE_BUS; +} + +static inline int kbase_as_has_page_fault(struct kbase_as *as) +{ + return as->fault_type == KBASE_MMU_FAULT_TYPE_PAGE; +} + +struct kbasep_mem_device { + atomic_t used_pages; /* Tracks usage of OS shared memory. Updated + when OS memory is allocated/freed. */ + +}; + +#define KBASE_TRACE_CODE(X) KBASE_TRACE_CODE_ ## X + +enum kbase_trace_code { + /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE + * THIS MUST BE USED AT THE START OF THE ENUM */ +#define KBASE_TRACE_CODE_MAKE_CODE(X) KBASE_TRACE_CODE(X) +#include "mali_kbase_trace_defs.h" +#undef KBASE_TRACE_CODE_MAKE_CODE + /* Comma on its own, to extend the list */ + , + /* Must be the last in the enum */ + KBASE_TRACE_CODE_COUNT +}; + +#define KBASE_TRACE_FLAG_REFCOUNT (((u8)1) << 0) +#define KBASE_TRACE_FLAG_JOBSLOT (((u8)1) << 1) + +struct kbase_trace { + struct timespec timestamp; + u32 thread_id; + u32 cpu; + void *ctx; + bool katom; + int atom_number; + u64 atom_udata[2]; + u64 gpu_addr; + unsigned long info_val; + u8 code; + u8 jobslot; + u8 refcount; + u8 flags; +}; + +/** Event IDs for the power management framework. + * + * Any of these events might be missed, so they should not be relied upon to + * find the precise state of the GPU at a particular time in the + * trace. Overall, we should get a high percentage of these events for + * statisical purposes, and so a few missing should not be a problem */ +enum kbase_timeline_pm_event { + /* helper for tests */ + KBASEP_TIMELINE_PM_EVENT_FIRST, + + /** Event reserved for backwards compatibility with 'init' events */ + KBASE_TIMELINE_PM_EVENT_RESERVED_0 = KBASEP_TIMELINE_PM_EVENT_FIRST, + + /** The power state of the device has changed. + * + * Specifically, the device has reached a desired or available state. + */ + KBASE_TIMELINE_PM_EVENT_GPU_STATE_CHANGED, + + /** The GPU is becoming active. + * + * This event is sent when the first context is about to use the GPU. + */ + KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE, + + /** The GPU is becoming idle. + * + * This event is sent when the last context has finished using the GPU. + */ + KBASE_TIMELINE_PM_EVENT_GPU_IDLE, + + /** Event reserved for backwards compatibility with 'policy_change' + * events */ + KBASE_TIMELINE_PM_EVENT_RESERVED_4, + + /** Event reserved for backwards compatibility with 'system_suspend' + * events */ + KBASE_TIMELINE_PM_EVENT_RESERVED_5, + + /** Event reserved for backwards compatibility with 'system_resume' + * events */ + KBASE_TIMELINE_PM_EVENT_RESERVED_6, + + /** The job scheduler is requesting to power up/down cores. + * + * This event is sent when: + * - powered down cores are needed to complete a job + * - powered up cores are not needed anymore + */ + KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE, + + KBASEP_TIMELINE_PM_EVENT_LAST = KBASE_TIMELINE_PM_EVENT_CHANGE_GPU_STATE, +}; + +#ifdef CONFIG_MALI_TRACE_TIMELINE +struct kbase_trace_kctx_timeline { + atomic_t jd_atoms_in_flight; + u32 owner_tgid; +}; + +struct kbase_trace_kbdev_timeline { + /* Note: strictly speaking, not needed, because it's in sync with + * kbase_device::jm_slots[]::submitted_nr + * + * But it's kept as an example of how to add global timeline tracking + * information + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock when + * accessing this */ + u8 slot_atoms_submitted[BASE_JM_MAX_NR_SLOTS]; + + /* Last UID for each PM event */ + atomic_t pm_event_uid[KBASEP_TIMELINE_PM_EVENT_LAST+1]; + /* Counter for generating PM event UIDs */ + atomic_t pm_event_uid_counter; + /* + * L2 transition state - true indicates that the transition is ongoing + * Expected to be protected by pm.power_change_lock */ + bool l2_transitioning; +}; +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + + +struct kbasep_kctx_list_element { + struct list_head link; + struct kbase_context *kctx; +}; + +/** + * Data stored per device for power management. + * + * This structure contains data for the power management framework. There is one + * instance of this structure per device in the system. + */ +struct kbase_pm_device_data { + /** + * The lock protecting Power Management structures accessed outside of + * IRQ. + * + * This lock must also be held whenever the GPU is being powered on or + * off. + */ + struct mutex lock; + + /** The reference count of active contexts on this device. */ + int active_count; + /** Flag indicating suspending/suspended */ + bool suspending; + /* Wait queue set when active_count == 0 */ + wait_queue_head_t zero_active_count_wait; + + /** + * A bit mask identifying the available shader cores that are specified + * via sysfs + */ + u64 debug_core_mask; + + /** + * Lock protecting the power state of the device. + * + * This lock must be held when accessing the shader_available_bitmap, + * tiler_available_bitmap, l2_available_bitmap, shader_inuse_bitmap and + * tiler_inuse_bitmap fields of kbase_device, and the ca_in_transition + * and shader_poweroff_pending fields of kbase_pm_device_data. It is + * also held when the hardware power registers are being written to, to + * ensure that two threads do not conflict over the power transitions + * that the hardware should make. + */ + spinlock_t power_change_lock; + + /** + * Callback for initializing the runtime power management. + * + * @param kbdev The kbase device + * + * @return 0 on success, else error code + */ + int (*callback_power_runtime_init)(struct kbase_device *kbdev); + + /** + * Callback for terminating the runtime power management. + * + * @param kbdev The kbase device + */ + void (*callback_power_runtime_term)(struct kbase_device *kbdev); + + /* Time in milliseconds between each dvfs sample */ + u32 dvfs_period; + + /* Period of GPU poweroff timer */ + ktime_t gpu_poweroff_time; + + /* Number of ticks of GPU poweroff timer before shader is powered off */ + int poweroff_shader_ticks; + + /* Number of ticks of GPU poweroff timer before GPU is powered off */ + int poweroff_gpu_ticks; + + struct kbase_pm_backend_data backend; +}; + +/** + * struct kbase_secure_ops - Platform specific functions for GPU secure mode + * operations + * @secure_mode_enable: Callback to enable secure mode on the GPU + * @secure_mode_disable: Callback to disable secure mode on the GPU + */ +struct kbase_secure_ops { + /** + * secure_mode_enable() - Enable secure mode on the GPU + * @kbdev: The kbase device + * + * Return: 0 on success, non-zero on error + */ + int (*secure_mode_enable)(struct kbase_device *kbdev); + + /** + * secure_mode_disable() - Disable secure mode on the GPU + * @kbdev: The kbase device + * + * Return: 0 on success, non-zero on error + */ + int (*secure_mode_disable)(struct kbase_device *kbdev); +}; + + +#define DEVNAME_SIZE 16 + +struct kbase_device { + s8 slot_submit_count_irq[BASE_JM_MAX_NR_SLOTS]; + + u32 hw_quirks_sc; + u32 hw_quirks_tiler; + u32 hw_quirks_mmu; + + struct list_head entry; + struct device *dev; + struct miscdevice mdev; + u64 reg_start; + size_t reg_size; + void __iomem *reg; + struct { + int irq; + int flags; + } irqs[3]; +#ifdef CONFIG_HAVE_CLK + struct clk *clock; +#endif +#ifdef CONFIG_REGULATOR + struct regulator *regulator; +#endif + char devname[DEVNAME_SIZE]; + +#ifdef CONFIG_MALI_NO_MALI + void *model; + struct kmem_cache *irq_slab; + struct workqueue_struct *irq_workq; + atomic_t serving_job_irq; + atomic_t serving_gpu_irq; + atomic_t serving_mmu_irq; + spinlock_t reg_op_lock; +#endif /* CONFIG_MALI_NO_MALI */ + + struct kbase_pm_device_data pm; + struct kbasep_js_device_data js_data; + struct kbasep_mem_device memdev; + struct kbase_mmu_mode const *mmu_mode; + + struct kbase_as as[BASE_MAX_NR_AS]; + + spinlock_t mmu_mask_change; + + struct kbase_gpu_props gpu_props; + + /** List of SW workarounds for HW issues */ + unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + /** List of features available */ + unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; + + /* Cached present bitmaps - these are the same as the corresponding hardware registers */ + u64 shader_present_bitmap; + u64 tiler_present_bitmap; + u64 l2_present_bitmap; + + /* Bitmaps of cores that are currently in use (running jobs). + * These should be kept up to date by the job scheduler. + * + * pm.power_change_lock should be held when accessing these members. + * + * kbase_pm_check_transitions_nolock() should be called when bits are + * cleared to update the power management system and allow transitions to + * occur. */ + u64 shader_inuse_bitmap; + + /* Refcount for cores in use */ + u32 shader_inuse_cnt[64]; + + /* Bitmaps of cores the JS needs for jobs ready to run */ + u64 shader_needed_bitmap; + + /* Refcount for cores needed */ + u32 shader_needed_cnt[64]; + + u32 tiler_inuse_cnt; + + u32 tiler_needed_cnt; + + /* struct for keeping track of the disjoint information + * + * The state is > 0 if the GPU is in a disjoint state. Otherwise 0 + * The count is the number of disjoint events that have occurred on the GPU + */ + struct { + atomic_t count; + atomic_t state; + } disjoint_event; + + /* Refcount for tracking users of the l2 cache, e.g. when using hardware counter instrumentation. */ + u32 l2_users_count; + + /* Bitmaps of cores that are currently available (powered up and the power policy is happy for jobs to be + * submitted to these cores. These are updated by the power management code. The job scheduler should avoid + * submitting new jobs to any cores that are not marked as available. + * + * pm.power_change_lock should be held when accessing these members. + */ + u64 shader_available_bitmap; + u64 tiler_available_bitmap; + u64 l2_available_bitmap; + + u64 shader_ready_bitmap; + u64 shader_transitioning_bitmap; + + s8 nr_hw_address_spaces; /**< Number of address spaces in the GPU (constant after driver initialisation) */ + s8 nr_user_address_spaces; /**< Number of address spaces available to user contexts */ + + /* Structure used for instrumentation and HW counters dumping */ + struct { + /* The lock should be used when accessing any of the following members */ + spinlock_t lock; + + struct kbase_context *kctx; + u64 addr; + + struct kbase_context *suspended_kctx; + struct kbase_uk_hwcnt_setup suspended_state; + + struct kbase_instr_backend backend; + } hwcnt; + + struct kbase_vinstr_context *vinstr_ctx; + + /*value to be written to the irq_throttle register each time an irq is served */ + atomic_t irq_throttle_cycles; + +#if KBASE_TRACE_ENABLE + spinlock_t trace_lock; + u16 trace_first_out; + u16 trace_next_in; + struct kbase_trace *trace_rbuf; +#endif + + /* This is used to override the current job scheduler values for + * JS_SCHEDULING_PERIOD_NS + * JS_SOFT_STOP_TICKS + * JS_SOFT_STOP_TICKS_CL + * JS_HARD_STOP_TICKS_SS + * JS_HARD_STOP_TICKS_CL + * JS_HARD_STOP_TICKS_DUMPING + * JS_RESET_TICKS_SS + * JS_RESET_TICKS_CL + * JS_RESET_TICKS_DUMPING. + * + * These values are set via the js_timeouts sysfs file. + */ + u32 js_scheduling_period_ns; + int js_soft_stop_ticks; + int js_soft_stop_ticks_cl; + int js_hard_stop_ticks_ss; + int js_hard_stop_ticks_cl; + int js_hard_stop_ticks_dumping; + int js_reset_ticks_ss; + int js_reset_ticks_cl; + int js_reset_ticks_dumping; + bool js_timeouts_updated; + + u32 reset_timeout_ms; + + struct mutex cacheclean_lock; + + /* Platform specific private data to be accessed by mali_kbase_config_xxx.c only */ + void *platform_context; + + /* List of kbase_contexts created */ + struct list_head kctx_list; + struct mutex kctx_list_lock; + +#ifdef CONFIG_MALI_MIDGARD_RT_PM + struct delayed_work runtime_pm_workqueue; +#endif + +#ifdef CONFIG_PM_DEVFREQ + struct devfreq_dev_profile devfreq_profile; + struct devfreq *devfreq; + unsigned long current_freq; + unsigned long current_voltage; +#ifdef CONFIG_DEVFREQ_THERMAL + struct devfreq_cooling_device *devfreq_cooling; +#endif +#endif + +#ifdef CONFIG_MALI_TRACE_TIMELINE + struct kbase_trace_kbdev_timeline timeline; +#endif + +#ifdef CONFIG_DEBUG_FS + /* directory for debugfs entries */ + struct dentry *mali_debugfs_directory; + /* Root directory for per context entry */ + struct dentry *debugfs_ctx_directory; +#endif /* CONFIG_DEBUG_FS */ + + /* fbdump profiling controls set by gator */ + u32 kbase_profiling_controls[FBDUMP_CONTROL_MAX]; + + +#if MALI_CUSTOMER_RELEASE == 0 + /* Number of jobs that are run before a job is forced to fail and + * replay. May be KBASEP_FORCE_REPLAY_DISABLED, to disable forced + * failures. */ + int force_replay_limit; + /* Count of jobs between forced failures. Incremented on each job. A + * job is forced to fail once this is greater than or equal to + * force_replay_limit. */ + int force_replay_count; + /* Core requirement for jobs to be failed and replayed. May be zero. */ + base_jd_core_req force_replay_core_req; + /* true if force_replay_limit should be randomized. The random + * value will be in the range of 1 - KBASEP_FORCE_REPLAY_RANDOM_LIMIT. + */ + bool force_replay_random; +#endif + + + /* Total number of created contexts */ + atomic_t ctx_num; + + struct kbase_hwaccess_data hwaccess; + + /* Count of page/bus faults waiting for workqueues to process */ + atomic_t faults_pending; + + /* true if GPU is powered off or power off operation is in progress */ + bool poweroff_pending; + + + /* defaults for new context created for this device */ + u32 infinite_cache_active_default; + + /* system coherency mode */ + u32 system_coherency; + + /* Secure operations */ + struct kbase_secure_ops *secure_ops; +}; + +/* JSCTX ringbuffer size must always be a power of 2 */ +#define JSCTX_RB_SIZE 256 +#define JSCTX_RB_MASK (JSCTX_RB_SIZE-1) + +/** + * struct jsctx_rb_entry - Entry in &struct jsctx_rb ring buffer + * @atom_id: Atom ID + */ +struct jsctx_rb_entry { + u16 atom_id; +}; + +/** + * struct jsctx_rb - JS context atom ring buffer + * @entries: Array of size %JSCTX_RB_SIZE which holds the &struct + * kbase_jd_atom pointers which make up the contents of the ring + * buffer. + * @read_idx: Index into @entries. Indicates the next entry in @entries to + * read, and is incremented when pulling an atom, and decremented + * when unpulling. + * HW access lock must be held when accessing. + * @write_idx: Index into @entries. Indicates the next entry to use when + * adding atoms into the ring buffer, and is incremented when + * adding a new atom. + * jctx->lock must be held when accessing. + * @running_idx: Index into @entries. Indicates the last valid entry, and is + * incremented when remving atoms from the ring buffer. + * HW access lock must be held when accessing. + * + * &struct jsctx_rb is a ring buffer of &struct kbase_jd_atom. + */ +struct jsctx_rb { + struct jsctx_rb_entry entries[JSCTX_RB_SIZE]; + + u16 read_idx; /* HW access lock must be held when accessing */ + u16 write_idx; /* jctx->lock must be held when accessing */ + u16 running_idx; /* HW access lock must be held when accessing */ +}; + +#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20) | \ + (((minor) & 0xFFF) << 8) | \ + ((0 & 0xFF) << 0)) + +struct kbase_context { + struct kbase_device *kbdev; + int id; /* System wide unique id */ + unsigned long api_version; + phys_addr_t pgd; + struct list_head event_list; + struct mutex event_mutex; + bool event_closed; + struct workqueue_struct *event_workq; + + bool is_compat; + + atomic_t setup_complete; + atomic_t setup_in_progress; + + u64 *mmu_teardown_pages; + + phys_addr_t aliasing_sink_page; + + struct mutex reg_lock; /* To be converted to a rwlock? */ + struct rb_root reg_rbtree; /* Red-Black tree of GPU regions (live regions) */ + + unsigned long cookies; + struct kbase_va_region *pending_regions[BITS_PER_LONG]; + + wait_queue_head_t event_queue; + pid_t tgid; + pid_t pid; + + struct kbase_jd_context jctx; + atomic_t used_pages; + atomic_t nonmapped_pages; + + struct kbase_mem_allocator osalloc; + struct kbase_mem_allocator *pgd_allocator; + + struct list_head waiting_soft_jobs; +#ifdef CONFIG_KDS + struct list_head waiting_kds_resource; +#endif + /** This is effectively part of the Run Pool, because it only has a valid + * setting (!=KBASEP_AS_NR_INVALID) whilst the context is scheduled in + * + * The kbasep_js_device_data::runpool_irq::lock must be held whilst accessing + * this. + * + * If the context relating to this as_nr is required, you must use + * kbasep_js_runpool_retain_ctx() to ensure that the context doesn't disappear + * whilst you're using it. Alternatively, just hold the kbasep_js_device_data::runpool_irq::lock + * to ensure the context doesn't disappear (but this has restrictions on what other locks + * you can take whilst doing this) */ + int as_nr; + + /* NOTE: + * + * Flags are in jctx.sched_info.ctx.flags + * Mutable flags *must* be accessed under jctx.sched_info.ctx.jsctx_mutex + * + * All other flags must be added there */ + spinlock_t mm_update_lock; + struct mm_struct *process_mm; + +#ifdef CONFIG_MALI_TRACE_TIMELINE + struct kbase_trace_kctx_timeline timeline; +#endif +#ifdef CONFIG_DEBUG_FS + /* Content of mem_profile file */ + char *mem_profile_data; + /* Size of @c mem_profile_data */ + size_t mem_profile_size; + /* Spinlock guarding data */ + spinlock_t mem_profile_lock; + struct dentry *kctx_dentry; +#endif /* CONFIG_DEBUG_FS */ + + struct jsctx_rb jsctx_rb + [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS]; + + /* Number of atoms currently pulled from this context */ + atomic_t atoms_pulled; + /* Number of atoms currently pulled from this context, per slot */ + atomic_t atoms_pulled_slot[BASE_JM_MAX_NR_SLOTS]; + /* true if last kick() caused atoms to be pulled from this context */ + bool pulled; + /* true if infinite cache is to be enabled for new allocations. Existing + * allocations will not change. bool stored as a u32 per Linux API */ + u32 infinite_cache_active; + /* Bitmask of slots that can be pulled from */ + u32 slots_pullable; + + /* true if address space assignment is pending */ + bool as_pending; + + /* Backend specific data */ + struct kbase_context_backend backend; + + /* Work structure used for deferred ASID assignment */ + struct work_struct work; + + /* Only one userspace vinstr client per kbase context */ + struct kbase_vinstr_client *vinstr_cli; + struct mutex vinstr_cli_lock; +}; + +enum kbase_reg_access_type { + REG_READ, + REG_WRITE +}; + +enum kbase_share_attr_bits { + /* (1ULL << 8) bit is reserved */ + SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */ + SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */ +}; + +/* Conversion helpers for setting up high resolution timers */ +#define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime((x)*1000000U)) +#define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x)) + +/* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ +#define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 +/* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ +#define KBASE_AS_INACTIVE_MAX_LOOPS 100000 + +/* Maximum number of times a job can be replayed */ +#define BASEP_JD_REPLAY_LIMIT 15 + +#endif /* _KBASE_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_device.c b/drivers/gpu/arm/midgard/mali_kbase_device.c new file mode 100644 index 00000000000..01e41462f17 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_device.c @@ -0,0 +1,659 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Base kernel device APIs + */ + +#include <linux/debugfs.h> +#include <linux/dma-mapping.h> +#include <linux/seq_file.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of_platform.h> + +#include <mali_kbase.h> +#include <mali_kbase_defs.h> +#include <mali_kbase_hw.h> +#include <mali_kbase_config_defaults.h> + +#include <mali_kbase_profiling_gator_api.h> + +/* NOTE: Magic - 0x45435254 (TRCE in ASCII). + * Supports tracing feature provided in the base module. + * Please keep it in sync with the value of base module. + */ +#define TRACE_BUFFER_HEADER_SPECIAL 0x45435254 + +#if KBASE_TRACE_ENABLE +static const char *kbasep_trace_code_string[] = { + /* IMPORTANT: USE OF SPECIAL #INCLUDE OF NON-STANDARD HEADER FILE + * THIS MUST BE USED AT THE START OF THE ARRAY */ +#define KBASE_TRACE_CODE_MAKE_CODE(X) # X +#include "mali_kbase_trace_defs.h" +#undef KBASE_TRACE_CODE_MAKE_CODE +}; +#endif + +#define DEBUG_MESSAGE_SIZE 256 + +static int kbasep_trace_init(struct kbase_device *kbdev); +static void kbasep_trace_term(struct kbase_device *kbdev); +static void kbasep_trace_hook_wrapper(void *param); + +struct kbase_device *kbase_device_alloc(void) +{ + return kzalloc(sizeof(struct kbase_device), GFP_KERNEL); +} + +static int kbase_device_as_init(struct kbase_device *kbdev, int i) +{ + const char format[] = "mali_mmu%d"; + char name[sizeof(format)]; + const char poke_format[] = "mali_mmu%d_poker"; + char poke_name[sizeof(poke_format)]; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) + snprintf(poke_name, sizeof(poke_name), poke_format, i); + + snprintf(name, sizeof(name), format, i); + + kbdev->as[i].number = i; + kbdev->as[i].fault_addr = 0ULL; + + kbdev->as[i].pf_wq = alloc_workqueue(name, 0, 1); + if (!kbdev->as[i].pf_wq) + return -EINVAL; + + mutex_init(&kbdev->as[i].transaction_mutex); + INIT_WORK(&kbdev->as[i].work_pagefault, page_fault_worker); + INIT_WORK(&kbdev->as[i].work_busfault, bus_fault_worker); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) { + struct hrtimer *poke_timer = &kbdev->as[i].poke_timer; + struct work_struct *poke_work = &kbdev->as[i].poke_work; + + kbdev->as[i].poke_wq = alloc_workqueue(poke_name, 0, 1); + if (!kbdev->as[i].poke_wq) { + destroy_workqueue(kbdev->as[i].pf_wq); + return -EINVAL; + } + KBASE_DEBUG_ASSERT(!object_is_on_stack(poke_work)); + INIT_WORK(poke_work, kbasep_as_do_poke); + + hrtimer_init(poke_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + + poke_timer->function = kbasep_as_poke_timer_callback; + + kbdev->as[i].poke_refcount = 0; + kbdev->as[i].poke_state = 0u; + } + + return 0; +} + +static void kbase_device_as_term(struct kbase_device *kbdev, int i) +{ + destroy_workqueue(kbdev->as[i].pf_wq); + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) + destroy_workqueue(kbdev->as[i].poke_wq); +} + +static int kbase_device_all_as_init(struct kbase_device *kbdev) +{ + int i, err; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + err = kbase_device_as_init(kbdev, i); + if (err) + goto free_workqs; + } + + return 0; + +free_workqs: + for (; i > 0; i--) + kbase_device_as_term(kbdev, i); + + return err; +} + +static void kbase_device_all_as_term(struct kbase_device *kbdev) +{ + int i; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) + kbase_device_as_term(kbdev, i); +} + +int kbase_device_init(struct kbase_device * const kbdev) +{ + int i, err; + + spin_lock_init(&kbdev->mmu_mask_change); + /* Get the list of workarounds for issues on the current HW + * (identified by the GPU_ID register) + */ + err = kbase_hw_set_issues_mask(kbdev); + if (err) + goto fail; + /* Set the list of features available on the current HW + * (identified by the GPU_ID register) + */ + kbase_hw_set_features_mask(kbdev); + +#if defined(CONFIG_ARM64) + set_dma_ops(kbdev->dev, &noncoherent_swiotlb_dma_ops); +#endif /* CONFIG_ARM64 */ + + /* Workaround a pre-3.13 Linux issue, where dma_mask is NULL when our + * device structure was created by device-tree + */ + if (!kbdev->dev->dma_mask) + kbdev->dev->dma_mask = &kbdev->dev->coherent_dma_mask; + + err = dma_set_mask(kbdev->dev, + DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); + if (err) + goto dma_set_mask_failed; + + err = dma_set_coherent_mask(kbdev->dev, + DMA_BIT_MASK(kbdev->gpu_props.mmu.pa_bits)); + if (err) + goto dma_set_mask_failed; + + err = kbase_mem_lowlevel_init(kbdev); + if (err) + goto mem_lowlevel_init_failed; + + kbdev->nr_hw_address_spaces = kbdev->gpu_props.num_address_spaces; + + err = kbase_device_all_as_init(kbdev); + if (err) + goto term_lowlevel_mem; + + spin_lock_init(&kbdev->hwcnt.lock); + + err = kbasep_trace_init(kbdev); + if (err) + goto term_as; + + mutex_init(&kbdev->cacheclean_lock); + +#ifdef CONFIG_MALI_TRACE_TIMELINE + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) + kbdev->timeline.slot_atoms_submitted[i] = 0; + + for (i = 0; i <= KBASEP_TIMELINE_PM_EVENT_LAST; ++i) + atomic_set(&kbdev->timeline.pm_event_uid[i], 0); +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + + /* fbdump profiling controls set to 0 - fbdump not enabled until changed by gator */ + for (i = 0; i < FBDUMP_CONTROL_MAX; i++) + kbdev->kbase_profiling_controls[i] = 0; + + kbase_debug_assert_register_hook(&kbasep_trace_hook_wrapper, kbdev); + + atomic_set(&kbdev->ctx_num, 0); + + err = kbase_instr_backend_init(kbdev); + if (err) + goto term_trace; + + kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; + + kbdev->reset_timeout_ms = DEFAULT_RESET_TIMEOUT_MS; + + kbdev->mmu_mode = kbase_mmu_mode_get_lpae(); + + return 0; +term_trace: + kbasep_trace_term(kbdev); +term_as: + kbase_device_all_as_term(kbdev); +term_lowlevel_mem: + kbase_mem_lowlevel_term(kbdev); +mem_lowlevel_init_failed: +dma_set_mask_failed: +fail: + return err; +} + +void kbase_device_term(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + +#if KBASE_TRACE_ENABLE + kbase_debug_assert_register_hook(NULL, NULL); +#endif + + kbase_instr_backend_term(kbdev); + + kbasep_trace_term(kbdev); + + kbase_device_all_as_term(kbdev); + + kbase_mem_lowlevel_term(kbdev); +} + +void kbase_device_free(struct kbase_device *kbdev) +{ + kfree(kbdev); +} + +void kbase_device_trace_buffer_install(struct kbase_context *kctx, u32 *tb, size_t size) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(tb); + + /* set up the header */ + /* magic number in the first 4 bytes */ + tb[0] = TRACE_BUFFER_HEADER_SPECIAL; + /* Store (write offset = 0, wrap counter = 0, transaction active = no) + * write offset 0 means never written. + * Offsets 1 to (wrap_offset - 1) used to store values when trace started + */ + tb[1] = 0; + + /* install trace buffer */ + spin_lock_irqsave(&kctx->jctx.tb_lock, flags); + kctx->jctx.tb_wrap_offset = size / 8; + kctx->jctx.tb = tb; + spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); +} + +void kbase_device_trace_buffer_uninstall(struct kbase_context *kctx) +{ + unsigned long flags; + + KBASE_DEBUG_ASSERT(kctx); + spin_lock_irqsave(&kctx->jctx.tb_lock, flags); + kctx->jctx.tb = NULL; + kctx->jctx.tb_wrap_offset = 0; + spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); +} + +void kbase_device_trace_register_access(struct kbase_context *kctx, enum kbase_reg_access_type type, u16 reg_offset, u32 reg_value) +{ + unsigned long flags; + + spin_lock_irqsave(&kctx->jctx.tb_lock, flags); + if (kctx->jctx.tb) { + u16 wrap_count; + u16 write_offset; + u32 *tb = kctx->jctx.tb; + u32 header_word; + + header_word = tb[1]; + KBASE_DEBUG_ASSERT(0 == (header_word & 0x1)); + + wrap_count = (header_word >> 1) & 0x7FFF; + write_offset = (header_word >> 16) & 0xFFFF; + + /* mark as transaction in progress */ + tb[1] |= 0x1; + mb(); + + /* calculate new offset */ + write_offset++; + if (write_offset == kctx->jctx.tb_wrap_offset) { + /* wrap */ + write_offset = 1; + wrap_count++; + wrap_count &= 0x7FFF; /* 15bit wrap counter */ + } + + /* store the trace entry at the selected offset */ + tb[write_offset * 2 + 0] = (reg_offset & ~0x3) | ((type == REG_WRITE) ? 0x1 : 0x0); + tb[write_offset * 2 + 1] = reg_value; + mb(); + + /* new header word */ + header_word = (write_offset << 16) | (wrap_count << 1) | 0x0; /* transaction complete */ + tb[1] = header_word; + } + spin_unlock_irqrestore(&kctx->jctx.tb_lock, flags); +} + +/* + * Device trace functions + */ +#if KBASE_TRACE_ENABLE + +static int kbasep_trace_init(struct kbase_device *kbdev) +{ + struct kbase_trace *rbuf; + + rbuf = kmalloc_array(KBASE_TRACE_SIZE, sizeof(*rbuf), GFP_KERNEL); + + if (!rbuf) + return -EINVAL; + + kbdev->trace_rbuf = rbuf; + spin_lock_init(&kbdev->trace_lock); + return 0; +} + +static void kbasep_trace_term(struct kbase_device *kbdev) +{ + kfree(kbdev->trace_rbuf); +} + +static void kbasep_trace_format_msg(struct kbase_trace *trace_msg, char *buffer, int len) +{ + s32 written = 0; + + /* Initial part of message */ + written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d.%.6d,%d,%d,%s,%p,", (int)trace_msg->timestamp.tv_sec, (int)(trace_msg->timestamp.tv_nsec / 1000), trace_msg->thread_id, trace_msg->cpu, kbasep_trace_code_string[trace_msg->code], trace_msg->ctx), 0); + + if (trace_msg->katom) + written += MAX(snprintf(buffer + written, MAX(len - written, 0), "atom %d (ud: 0x%llx 0x%llx)", trace_msg->atom_number, trace_msg->atom_udata[0], trace_msg->atom_udata[1]), 0); + + written += MAX(snprintf(buffer + written, MAX(len - written, 0), ",%.8llx,", trace_msg->gpu_addr), 0); + + /* NOTE: Could add function callbacks to handle different message types */ + /* Jobslot present */ + if (trace_msg->flags & KBASE_TRACE_FLAG_JOBSLOT) + written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->jobslot), 0); + + written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); + + /* Refcount present */ + if (trace_msg->flags & KBASE_TRACE_FLAG_REFCOUNT) + written += MAX(snprintf(buffer + written, MAX(len - written, 0), "%d", trace_msg->refcount), 0); + + written += MAX(snprintf(buffer + written, MAX(len - written, 0), ","), 0); + + /* Rest of message */ + written += MAX(snprintf(buffer + written, MAX(len - written, 0), "0x%.8lx", trace_msg->info_val), 0); +} + +static void kbasep_trace_dump_msg(struct kbase_device *kbdev, struct kbase_trace *trace_msg) +{ + char buffer[DEBUG_MESSAGE_SIZE]; + + kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); + dev_dbg(kbdev->dev, "%s", buffer); +} + +void kbasep_trace_add(struct kbase_device *kbdev, enum kbase_trace_code code, void *ctx, struct kbase_jd_atom *katom, u64 gpu_addr, u8 flags, int refcount, int jobslot, unsigned long info_val) +{ + unsigned long irqflags; + struct kbase_trace *trace_msg; + + spin_lock_irqsave(&kbdev->trace_lock, irqflags); + + trace_msg = &kbdev->trace_rbuf[kbdev->trace_next_in]; + + /* Fill the message */ + trace_msg->thread_id = task_pid_nr(current); + trace_msg->cpu = task_cpu(current); + + getnstimeofday(&trace_msg->timestamp); + + trace_msg->code = code; + trace_msg->ctx = ctx; + + if (NULL == katom) { + trace_msg->katom = false; + } else { + trace_msg->katom = true; + trace_msg->atom_number = kbase_jd_atom_id(katom->kctx, katom); + trace_msg->atom_udata[0] = katom->udata.blob[0]; + trace_msg->atom_udata[1] = katom->udata.blob[1]; + } + + trace_msg->gpu_addr = gpu_addr; + trace_msg->jobslot = jobslot; + trace_msg->refcount = MIN((unsigned int)refcount, 0xFF); + trace_msg->info_val = info_val; + trace_msg->flags = flags; + + /* Update the ringbuffer indices */ + kbdev->trace_next_in = (kbdev->trace_next_in + 1) & KBASE_TRACE_MASK; + if (kbdev->trace_next_in == kbdev->trace_first_out) + kbdev->trace_first_out = (kbdev->trace_first_out + 1) & KBASE_TRACE_MASK; + + /* Done */ + + spin_unlock_irqrestore(&kbdev->trace_lock, irqflags); +} + +void kbasep_trace_clear(struct kbase_device *kbdev) +{ + unsigned long flags; + + spin_lock_irqsave(&kbdev->trace_lock, flags); + kbdev->trace_first_out = kbdev->trace_next_in; + spin_unlock_irqrestore(&kbdev->trace_lock, flags); +} + +void kbasep_trace_dump(struct kbase_device *kbdev) +{ + unsigned long flags; + u32 start; + u32 end; + + dev_dbg(kbdev->dev, "Dumping trace:\nsecs,nthread,cpu,code,ctx,katom,gpu_addr,jobslot,refcount,info_val"); + spin_lock_irqsave(&kbdev->trace_lock, flags); + start = kbdev->trace_first_out; + end = kbdev->trace_next_in; + + while (start != end) { + struct kbase_trace *trace_msg = &kbdev->trace_rbuf[start]; + + kbasep_trace_dump_msg(kbdev, trace_msg); + + start = (start + 1) & KBASE_TRACE_MASK; + } + dev_dbg(kbdev->dev, "TRACE_END"); + + spin_unlock_irqrestore(&kbdev->trace_lock, flags); + + KBASE_TRACE_CLEAR(kbdev); +} + +static void kbasep_trace_hook_wrapper(void *param) +{ + struct kbase_device *kbdev = (struct kbase_device *)param; + + kbasep_trace_dump(kbdev); +} + +#ifdef CONFIG_DEBUG_FS +struct trace_seq_state { + struct kbase_trace trace_buf[KBASE_TRACE_SIZE]; + u32 start; + u32 end; +}; + +static void *kbasep_trace_seq_start(struct seq_file *s, loff_t *pos) +{ + struct trace_seq_state *state = s->private; + int i; + + if (*pos > KBASE_TRACE_SIZE) + return NULL; + i = state->start + *pos; + if ((state->end >= state->start && i >= state->end) || + i >= state->end + KBASE_TRACE_SIZE) + return NULL; + + i &= KBASE_TRACE_MASK; + + return &state->trace_buf[i]; +} + +static void kbasep_trace_seq_stop(struct seq_file *s, void *data) +{ +} + +static void *kbasep_trace_seq_next(struct seq_file *s, void *data, loff_t *pos) +{ + struct trace_seq_state *state = s->private; + int i; + + (*pos)++; + + i = (state->start + *pos) & KBASE_TRACE_MASK; + if (i == state->end) + return NULL; + + return &state->trace_buf[i]; +} + +static int kbasep_trace_seq_show(struct seq_file *s, void *data) +{ + struct kbase_trace *trace_msg = data; + char buffer[DEBUG_MESSAGE_SIZE]; + + kbasep_trace_format_msg(trace_msg, buffer, DEBUG_MESSAGE_SIZE); + seq_printf(s, "%s\n", buffer); + return 0; +} + +static const struct seq_operations kbasep_trace_seq_ops = { + .start = kbasep_trace_seq_start, + .next = kbasep_trace_seq_next, + .stop = kbasep_trace_seq_stop, + .show = kbasep_trace_seq_show, +}; + +static int kbasep_trace_debugfs_open(struct inode *inode, struct file *file) +{ + struct kbase_device *kbdev = inode->i_private; + unsigned long flags; + + struct trace_seq_state *state; + + state = __seq_open_private(file, &kbasep_trace_seq_ops, sizeof(*state)); + if (!state) + return -ENOMEM; + + spin_lock_irqsave(&kbdev->trace_lock, flags); + state->start = kbdev->trace_first_out; + state->end = kbdev->trace_next_in; + memcpy(state->trace_buf, kbdev->trace_rbuf, sizeof(state->trace_buf)); + spin_unlock_irqrestore(&kbdev->trace_lock, flags); + + return 0; +} + +static const struct file_operations kbasep_trace_debugfs_fops = { + .open = kbasep_trace_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +void kbasep_trace_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("mali_trace", S_IRUGO, + kbdev->mali_debugfs_directory, kbdev, + &kbasep_trace_debugfs_fops); +} + +#else +void kbasep_trace_debugfs_init(struct kbase_device *kbdev) +{ +} +#endif /* CONFIG_DEBUG_FS */ + +#else /* KBASE_TRACE_ENABLE */ +static int kbasep_trace_init(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); + return 0; +} + +static void kbasep_trace_term(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +static void kbasep_trace_hook_wrapper(void *param) +{ + CSTD_UNUSED(param); +} + +void kbasep_trace_dump(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} +#endif /* KBASE_TRACE_ENABLE */ + +void kbase_set_profiling_control(struct kbase_device *kbdev, u32 control, u32 value) +{ + switch (control) { + case FBDUMP_CONTROL_ENABLE: + /* fall through */ + case FBDUMP_CONTROL_RATE: + /* fall through */ + case SW_COUNTER_ENABLE: + /* fall through */ + case FBDUMP_CONTROL_RESIZE_FACTOR: + kbdev->kbase_profiling_controls[control] = value; + break; + default: + dev_err(kbdev->dev, "Profiling control %d not found\n", control); + break; + } +} + +u32 kbase_get_profiling_control(struct kbase_device *kbdev, u32 control) +{ + u32 ret_value = 0; + + switch (control) { + case FBDUMP_CONTROL_ENABLE: + /* fall through */ + case FBDUMP_CONTROL_RATE: + /* fall through */ + case SW_COUNTER_ENABLE: + /* fall through */ + case FBDUMP_CONTROL_RESIZE_FACTOR: + ret_value = kbdev->kbase_profiling_controls[control]; + break; + default: + dev_err(kbdev->dev, "Profiling control %d not found\n", control); + break; + } + + return ret_value; +} + +/* + * Called by gator to control the production of + * profiling information at runtime + * */ + +void _mali_profiling_control(u32 action, u32 value) +{ + struct kbase_device *kbdev = NULL; + + /* find the first i.e. call with -1 */ + kbdev = kbase_find_device(-1); + + if (NULL != kbdev) + kbase_set_profiling_control(kbdev, action, value); +} +KBASE_EXPORT_SYMBOL(_mali_profiling_control); + diff --git a/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c new file mode 100644 index 00000000000..f70bcccf405 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_disjoint_events.c @@ -0,0 +1,76 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* + * Base kernel disjoint events helper functions + */ + +#include <mali_kbase.h> + +void kbase_disjoint_init(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + atomic_set(&kbdev->disjoint_event.count, 0); + atomic_set(&kbdev->disjoint_event.state, 0); +} + +/* increment the disjoint event count */ +void kbase_disjoint_event(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + atomic_inc(&kbdev->disjoint_event.count); +} + +/* increment the state and the event counter */ +void kbase_disjoint_state_up(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + atomic_inc(&kbdev->disjoint_event.state); + + kbase_disjoint_event(kbdev); +} + +/* decrement the state */ +void kbase_disjoint_state_down(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(atomic_read(&kbdev->disjoint_event.state) > 0); + + kbase_disjoint_event(kbdev); + + atomic_dec(&kbdev->disjoint_event.state); +} + +/* increments the count only if the state is > 0 */ +void kbase_disjoint_event_potential(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + if (atomic_read(&kbdev->disjoint_event.state)) + kbase_disjoint_event(kbdev); +} + +u32 kbase_disjoint_event_get(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev != NULL); + + return atomic_read(&kbdev->disjoint_event.count); +} +KBASE_EXPORT_TEST_API(kbase_disjoint_event_get); diff --git a/drivers/gpu/arm/midgard/mali_kbase_event.c b/drivers/gpu/arm/midgard/mali_kbase_event.c new file mode 100644 index 00000000000..784acecae17 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_event.c @@ -0,0 +1,195 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include <mali_kbase.h> +#include <mali_kbase_debug.h> + +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include <mali_kbase_tlstream.h> +#endif + +static struct base_jd_udata kbase_event_process(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + struct base_jd_udata data; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + + data = katom->udata; + + KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(1, &kctx->timeline.jd_atoms_in_flight)); + +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_nret_atom_ctx(katom, kctx); + kbase_tlstream_tl_del_atom(katom); +#endif + + mutex_lock(&kctx->jctx.lock); + katom->status = KBASE_JD_ATOM_STATE_UNUSED; + mutex_unlock(&kctx->jctx.lock); + + wake_up(&katom->completed); + + return data; +} + +int kbase_event_pending(struct kbase_context *ctx) +{ + int ret; + + KBASE_DEBUG_ASSERT(ctx); + + mutex_lock(&ctx->event_mutex); + ret = (!list_empty(&ctx->event_list)) || (true == ctx->event_closed); + mutex_unlock(&ctx->event_mutex); + + return ret; +} + +KBASE_EXPORT_TEST_API(kbase_event_pending); + +int kbase_event_dequeue(struct kbase_context *ctx, struct base_jd_event_v2 *uevent) +{ + struct kbase_jd_atom *atom; + + KBASE_DEBUG_ASSERT(ctx); + + mutex_lock(&ctx->event_mutex); + + if (list_empty(&ctx->event_list)) { + if (!ctx->event_closed) { + mutex_unlock(&ctx->event_mutex); + return -1; + } + + /* generate the BASE_JD_EVENT_DRV_TERMINATED message on the fly */ + mutex_unlock(&ctx->event_mutex); + uevent->event_code = BASE_JD_EVENT_DRV_TERMINATED; + memset(&uevent->udata, 0, sizeof(uevent->udata)); + dev_dbg(ctx->kbdev->dev, + "event system closed, returning BASE_JD_EVENT_DRV_TERMINATED(0x%X)\n", + BASE_JD_EVENT_DRV_TERMINATED); + return 0; + } + + /* normal event processing */ + atom = list_entry(ctx->event_list.next, struct kbase_jd_atom, dep_item[0]); + list_del(ctx->event_list.next); + + mutex_unlock(&ctx->event_mutex); + + dev_dbg(ctx->kbdev->dev, "event dequeuing %p\n", (void *)atom); + uevent->event_code = atom->event_code; + uevent->atom_number = (atom - ctx->jctx.atoms); + uevent->udata = kbase_event_process(ctx, atom); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_event_dequeue); + +static void kbase_event_post_worker(struct work_struct *data) +{ + struct kbase_jd_atom *atom = container_of(data, struct kbase_jd_atom, work); + struct kbase_context *ctx = atom->kctx; + + if (atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_free_external_resources(atom); + + if (atom->core_req & BASE_JD_REQ_EVENT_ONLY_ON_FAILURE) { + if (atom->event_code == BASE_JD_EVENT_DONE) { + /* Don't report the event */ + kbase_event_process(ctx, atom); + return; + } + } + + if (atom->core_req & BASEP_JD_REQ_EVENT_NEVER) { + /* Don't report the event */ + kbase_event_process(ctx, atom); + return; + } + + mutex_lock(&ctx->event_mutex); + list_add_tail(&atom->dep_item[0], &ctx->event_list); + mutex_unlock(&ctx->event_mutex); + + kbase_event_wakeup(ctx); +} + +void kbase_event_post(struct kbase_context *ctx, struct kbase_jd_atom *atom) +{ + KBASE_DEBUG_ASSERT(ctx); + KBASE_DEBUG_ASSERT(ctx->event_workq); + KBASE_DEBUG_ASSERT(atom); + + INIT_WORK(&atom->work, kbase_event_post_worker); + queue_work(ctx->event_workq, &atom->work); +} + +KBASE_EXPORT_TEST_API(kbase_event_post); + +void kbase_event_close(struct kbase_context *kctx) +{ + mutex_lock(&kctx->event_mutex); + kctx->event_closed = true; + mutex_unlock(&kctx->event_mutex); + kbase_event_wakeup(kctx); +} + +int kbase_event_init(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + + INIT_LIST_HEAD(&kctx->event_list); + mutex_init(&kctx->event_mutex); + kctx->event_closed = false; + kctx->event_workq = alloc_workqueue("kbase_event", WQ_MEM_RECLAIM, 1); + + if (NULL == kctx->event_workq) + return -EINVAL; + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_event_init); + +void kbase_event_cleanup(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(kctx->event_workq); + + flush_workqueue(kctx->event_workq); + destroy_workqueue(kctx->event_workq); + + /* We use kbase_event_dequeue to remove the remaining events as that + * deals with all the cleanup needed for the atoms. + * + * Note: use of kctx->event_list without a lock is safe because this must be the last + * thread using it (because we're about to terminate the lock) + */ + while (!list_empty(&kctx->event_list)) { + struct base_jd_event_v2 event; + + kbase_event_dequeue(kctx, &event); + } +} + +KBASE_EXPORT_TEST_API(kbase_event_cleanup); diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator.h b/drivers/gpu/arm/midgard/mali_kbase_gator.h new file mode 100644 index 00000000000..ce65b5562a2 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gator.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/* NB taken from gator */ +/* + * List of possible actions to be controlled by DS-5 Streamline. + * The following numbers are used by gator to control the frame buffer dumping + * and s/w counter reporting. We cannot use the enums in mali_uk_types.h because + * they are unknown inside gator. + */ +#ifndef _KBASE_GATOR_H_ +#define _KBASE_GATOR_H_ + +#ifdef CONFIG_MALI_GATOR_SUPPORT +#define GATOR_MAKE_EVENT(type, number) (((type) << 24) | ((number) << 16)) +#define GATOR_JOB_SLOT_START 1 +#define GATOR_JOB_SLOT_STOP 2 +#define GATOR_JOB_SLOT_SOFT_STOPPED 3 + +void kbase_trace_mali_job_slots_event(u32 event, const struct kbase_context *kctx, u8 atom_id); +void kbase_trace_mali_pm_status(u32 event, u64 value); +void kbase_trace_mali_pm_power_off(u32 event, u64 value); +void kbase_trace_mali_pm_power_on(u32 event, u64 value); +void kbase_trace_mali_page_fault_insert_pages(int event, u32 value); +void kbase_trace_mali_mmu_as_in_use(int event); +void kbase_trace_mali_mmu_as_released(int event); +void kbase_trace_mali_total_alloc_pages_change(long long int event); + +#endif /* CONFIG_MALI_GATOR_SUPPORT */ + +#endif /* _KBASE_GATOR_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.c b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c new file mode 100644 index 00000000000..6ef4e39c5cc --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.c @@ -0,0 +1,322 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include "mali_kbase.h" +#include "mali_kbase_mem_linux.h" +#include "mali_kbase_gator_api.h" +#include "mali_kbase_gator_hwcnt_names.h" +#include "mali_kbase_instr.h" + +#define MALI_MAX_CORES_PER_GROUP 4 +#define MALI_MAX_NUM_BLOCKS_PER_GROUP 8 +#define MALI_COUNTERS_PER_BLOCK 64 +#define MALI_BYTES_PER_COUNTER 4 + +struct kbase_gator_hwcnt_handles { + struct kbase_device *kbdev; + struct kbase_context *kctx; + u64 hwcnt_gpu_va; + void *hwcnt_cpu_va; + struct kbase_vmap_struct hwcnt_map; +}; + +const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters) +{ + uint32_t gpu_id; + const char * const *hardware_counters; + struct kbase_device *kbdev; + + if (!total_counters) + return NULL; + + /* Get the first device - it doesn't matter in this case */ + kbdev = kbase_find_device(-1); + if (!kbdev) + return NULL; + + gpu_id = kbdev->gpu_props.props.core_props.product_id; + + switch (gpu_id) { + /* If we are using a Mali-T60x device */ + case GPU_ID_PI_T60X: + hardware_counters = hardware_counters_mali_t60x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t60x); + break; + /* If we are using a Mali-T62x device */ + case GPU_ID_PI_T62X: + hardware_counters = hardware_counters_mali_t62x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t62x); + break; + /* If we are using a Mali-T72x device */ + case GPU_ID_PI_T72X: + hardware_counters = hardware_counters_mali_t72x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t72x); + break; + /* If we are using a Mali-T76x device */ + case GPU_ID_PI_T76X: + hardware_counters = hardware_counters_mali_t76x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t76x); + break; + /* If we are using a Mali-T82x device */ + case GPU_ID_PI_T82X: + hardware_counters = hardware_counters_mali_t82x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t82x); + break; + /* If we are using a Mali-T83x device */ + case GPU_ID_PI_T83X: + hardware_counters = hardware_counters_mali_t83x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t83x); + break; + /* If we are using a Mali-T86x device */ + case GPU_ID_PI_T86X: + hardware_counters = hardware_counters_mali_t86x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t86x); + break; + /* If we are using a Mali-T88x device */ + case GPU_ID_PI_TFRX: + hardware_counters = hardware_counters_mali_t88x; + *total_counters = ARRAY_SIZE(hardware_counters_mali_t88x); + break; + default: + hardware_counters = NULL; + *total_counters = 0; + dev_err(kbdev->dev, "Unrecognized gpu ID: %u\n", gpu_id); + break; + } + + /* Release the kbdev reference. */ + kbase_release_device(kbdev); + + /* If we return a string array take a reference on the module (or fail). */ + if (hardware_counters && !try_module_get(THIS_MODULE)) + return NULL; + + return hardware_counters; +} +KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init_names); + +void kbase_gator_hwcnt_term_names(void) +{ + /* Release the module reference. */ + module_put(THIS_MODULE); +} +KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term_names); + +struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info) +{ + struct kbase_gator_hwcnt_handles *hand; + struct kbase_uk_hwcnt_setup setup; + int err; + uint32_t dump_size = 0, i = 0; + struct kbase_va_region *reg; + u64 flags; + u64 nr_pages; + u16 va_alignment = 0; + + if (!in_out_info) + return NULL; + + hand = kzalloc(sizeof(*hand), GFP_KERNEL); + if (!hand) + return NULL; + + /* Get the first device */ + hand->kbdev = kbase_find_device(-1); + if (!hand->kbdev) + goto free_hand; + + /* Create a kbase_context */ + hand->kctx = kbase_create_context(hand->kbdev, true); + if (!hand->kctx) + goto release_device; + + in_out_info->nr_cores = hand->kbdev->gpu_props.num_cores; + in_out_info->nr_core_groups = hand->kbdev->gpu_props.num_core_groups; + in_out_info->gpu_id = hand->kbdev->gpu_props.props.core_props.product_id; + + /* If we are using a Mali-T6xx or Mali-T72x device */ + if (in_out_info->gpu_id == GPU_ID_PI_T60X || + in_out_info->gpu_id == GPU_ID_PI_T62X || + in_out_info->gpu_id == GPU_ID_PI_T72X) { + uint32_t cg, j; + uint64_t core_mask; + + /* There are 8 hardware counters blocks per core group */ + in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * + MALI_MAX_NUM_BLOCKS_PER_GROUP * + in_out_info->nr_core_groups, GFP_KERNEL); + + if (!in_out_info->hwc_layout) + goto destroy_context; + + dump_size = in_out_info->nr_core_groups * + MALI_MAX_NUM_BLOCKS_PER_GROUP * + MALI_COUNTERS_PER_BLOCK * + MALI_BYTES_PER_COUNTER; + + for (cg = 0; cg < in_out_info->nr_core_groups; cg++) { + core_mask = hand->kbdev->gpu_props.props.coherency_info.group[cg].core_mask; + + for (j = 0; j < MALI_MAX_CORES_PER_GROUP; j++) { + if (core_mask & (1u << j)) + in_out_info->hwc_layout[i++] = SHADER_BLOCK; + else + in_out_info->hwc_layout[i++] = RESERVED_BLOCK; + } + + in_out_info->hwc_layout[i++] = TILER_BLOCK; + in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; + + in_out_info->hwc_layout[i++] = RESERVED_BLOCK; + + if (0 == cg) + in_out_info->hwc_layout[i++] = JM_BLOCK; + else + in_out_info->hwc_layout[i++] = RESERVED_BLOCK; + } + /* If we are using any other device */ + } else { + uint32_t nr_l2, nr_sc, j; + uint64_t core_mask; + + nr_l2 = hand->kbdev->gpu_props.props.l2_props.num_l2_slices; + + core_mask = hand->kbdev->gpu_props.props.coherency_info.group[0].core_mask; + + nr_sc = hand->kbdev->gpu_props.props.coherency_info.group[0].num_cores; + + /* The job manager and tiler sets of counters + * are always present */ + in_out_info->hwc_layout = kmalloc(sizeof(enum hwc_type) * (2 + nr_sc + nr_l2), GFP_KERNEL); + + if (!in_out_info->hwc_layout) + goto destroy_context; + + dump_size = (2 + nr_sc + nr_l2) * MALI_COUNTERS_PER_BLOCK * MALI_BYTES_PER_COUNTER; + + in_out_info->hwc_layout[i++] = JM_BLOCK; + in_out_info->hwc_layout[i++] = TILER_BLOCK; + + for (j = 0; j < nr_l2; j++) + in_out_info->hwc_layout[i++] = MMU_L2_BLOCK; + + while (core_mask != 0ull) { + if ((core_mask & 1ull) != 0ull) + in_out_info->hwc_layout[i++] = SHADER_BLOCK; + else + in_out_info->hwc_layout[i++] = RESERVED_BLOCK; + core_mask >>= 1; + } + } + + in_out_info->nr_hwc_blocks = i; + + in_out_info->size = dump_size; + + flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR; + nr_pages = PFN_UP(dump_size); + reg = kbase_mem_alloc(hand->kctx, nr_pages, nr_pages, 0, + &flags, &hand->hwcnt_gpu_va, &va_alignment); + if (!reg) + goto free_layout; + + hand->hwcnt_cpu_va = kbase_vmap(hand->kctx, hand->hwcnt_gpu_va, + dump_size, &hand->hwcnt_map); + + if (!hand->hwcnt_cpu_va) + goto free_buffer; + + in_out_info->kernel_dump_buffer = hand->hwcnt_cpu_va; + + /*setup.dump_buffer = (uintptr_t)in_out_info->kernel_dump_buffer;*/ + setup.dump_buffer = hand->hwcnt_gpu_va; + setup.jm_bm = in_out_info->bitmask[0]; + setup.tiler_bm = in_out_info->bitmask[1]; + setup.shader_bm = in_out_info->bitmask[2]; + setup.mmu_l2_bm = in_out_info->bitmask[3]; + + err = kbase_instr_hwcnt_enable(hand->kctx, &setup); + if (err) + goto free_unmap; + + kbase_instr_hwcnt_clear(hand->kctx); + + return hand; + +free_unmap: + kbase_vunmap(hand->kctx, &hand->hwcnt_map); + +free_buffer: + kbase_mem_free(hand->kctx, hand->hwcnt_gpu_va); + +free_layout: + kfree(in_out_info->hwc_layout); + +destroy_context: + kbase_destroy_context(hand->kctx); + +release_device: + kbase_release_device(hand->kbdev); + +free_hand: + kfree(hand); + + return NULL; +} +KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_init); + +void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles) +{ + if (in_out_info) + kfree(in_out_info->hwc_layout); + + if (opaque_handles) { + kbase_instr_hwcnt_disable(opaque_handles->kctx); + kbase_vunmap(opaque_handles->kctx, &opaque_handles->hwcnt_map); + kbase_mem_free(opaque_handles->kctx, opaque_handles->hwcnt_gpu_va); + kbase_destroy_context(opaque_handles->kctx); + kbase_release_device(opaque_handles->kbdev); + kfree(opaque_handles); + } +} +KBASE_EXPORT_SYMBOL(kbase_gator_hwcnt_term); + +uint32_t kbase_gator_instr_hwcnt_dump_complete( + struct kbase_gator_hwcnt_handles *opaque_handles, + uint32_t * const success) +{ + bool ret_res, success_res; + + if (opaque_handles && success) { + ret_res = kbase_instr_hwcnt_dump_complete(opaque_handles->kctx, + &success_res); + *success = (uint32_t)success_res; + return (uint32_t)(ret_res != 0); + } + return 0; +} +KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_complete); + +uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles) +{ + if (opaque_handles) + return (kbase_instr_hwcnt_request_dump( + opaque_handles->kctx) == 0); + + return 0; +} +KBASE_EXPORT_SYMBOL(kbase_gator_instr_hwcnt_dump_irq); diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h new file mode 100644 index 00000000000..ef9ac0f7b63 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_api.h @@ -0,0 +1,219 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_GATOR_API_H_ +#define _KBASE_GATOR_API_H_ + +/** + * @brief This file describes the API used by Gator to fetch hardware counters. + */ + +/* This define is used by the gator kernel module compile to select which DDK + * API calling convention to use. If not defined (legacy DDK) gator assumes + * version 1. The version to DDK release mapping is: + * Version 1 API: DDK versions r1px, r2px + * Version 2 API: DDK versions r3px, r4px + * Version 3 API: DDK version r5p0 and newer + * + * API Usage + * ========= + * + * 1] Call kbase_gator_hwcnt_init_names() to return the list of short counter + * names for the GPU present in this device. + * + * 2] Create a kbase_gator_hwcnt_info structure and set the counter enables for + * the counters you want enabled. The enables can all be set for simplicity in + * most use cases, but disabling some will let you minimize bandwidth impact. + * + * 3] Call kbase_gator_hwcnt_init() using the above structure, to create a + * counter context. On successful return the DDK will have populated the + * structure with a variety of useful information. + * + * 4] Call kbase_gator_hwcnt_dump_irq() to queue a non-blocking request for a + * counter dump. If this returns a non-zero value the request has been queued, + * otherwise the driver has been unable to do so (typically because of another + * user of the instrumentation exists concurrently). + * + * 5] Call kbase_gator_hwcnt_dump_complete() to test whether the previously + * requested dump has been succesful. If this returns non-zero the counter dump + * has resolved, but the value of *success must also be tested as the dump + * may have not been successful. If it returns zero the counter dump was + * abandoned due to the device being busy (typically because of another + * user of the instrumentation exists concurrently). + * + * 6] Process the counters stored in the buffer pointed to by ... + * + * kbase_gator_hwcnt_info->kernel_dump_buffer + * + * In pseudo code you can find all of the counters via this approach: + * + * + * hwcnt_info # pointer to kbase_gator_hwcnt_info structure + * hwcnt_name # pointer to name list + * + * u32 * hwcnt_data = (u32*)hwcnt_info->kernel_dump_buffer + * + * # Iterate over each 64-counter block in this GPU configuration + * for( i = 0; i < hwcnt_info->nr_hwc_blocks; i++) { + * hwc_type type = hwcnt_info->hwc_layout[i]; + * + * # Skip reserved type blocks - they contain no counters at all + * if( type == RESERVED_BLOCK ) { + * continue; + * } + * + * size_t name_offset = type * 64; + * size_t data_offset = i * 64; + * + * # Iterate over the names of the counters in this block type + * for( j = 0; j < 64; j++) { + * const char * name = hwcnt_name[name_offset+j]; + * + * # Skip empty name strings - there is no counter here + * if( name[0] == '\0' ) { + * continue; + * } + * + * u32 data = hwcnt_data[data_offset+j]; + * + * printk( "COUNTER: %s DATA: %u\n", name, data ); + * } + * } + * + * + * Note that in most implementations you typically want to either SUM or + * AVERAGE multiple instances of the same counter if, for example, you have + * multiple shader cores or multiple L2 caches. The most sensible view for + * analysis is to AVERAGE shader core counters, but SUM L2 cache and MMU + * counters. + * + * 7] Goto 4, repeating until you want to stop collecting counters. + * + * 8] Release the dump resources by calling kbase_gator_hwcnt_term(). + * + * 9] Release the name table resources by calling + * kbase_gator_hwcnt_term_names(). This function must only be called if + * init_names() returned a non-NULL value. + **/ + +#define MALI_DDK_GATOR_API_VERSION 3 + +enum hwc_type { + JM_BLOCK = 0, + TILER_BLOCK, + SHADER_BLOCK, + MMU_L2_BLOCK, + RESERVED_BLOCK +}; + +struct kbase_gator_hwcnt_info { + /* Passed from Gator to kbase */ + + /* the bitmask of enabled hardware counters for each counter block */ + uint16_t bitmask[4]; + + /* Passed from kbase to Gator */ + + /* ptr to counter dump memory */ + void *kernel_dump_buffer; + + /* size of counter dump memory */ + uint32_t size; + + /* the ID of the Mali device */ + uint32_t gpu_id; + + /* the number of shader cores in the GPU */ + uint32_t nr_cores; + + /* the number of core groups */ + uint32_t nr_core_groups; + + /* the memory layout of the performance counters */ + enum hwc_type *hwc_layout; + + /* the total number of hardware couter blocks */ + uint32_t nr_hwc_blocks; +}; + +/** + * @brief Opaque block of Mali data which Gator needs to return to the API later. + */ +struct kbase_gator_hwcnt_handles; + +/** + * @brief Initialize the resources Gator needs for performance profiling. + * + * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the Mali + * specific information that will be returned to Gator. On entry Gator must have populated the + * 'bitmask' field with the counters it wishes to enable for each class of counter block. + * Each entry in the array corresponds to a single counter class based on the "hwc_type" + * enumeration, and each bit corresponds to an enable for 4 sequential counters (LSB enables + * the first 4 counters in the block, and so on). See the GPU counter array as returned by + * kbase_gator_hwcnt_get_names() for the index values of each counter for the curernt GPU. + * + * @return Pointer to an opaque handle block on success, NULL on error. + */ +extern struct kbase_gator_hwcnt_handles *kbase_gator_hwcnt_init(struct kbase_gator_hwcnt_info *in_out_info); + +/** + * @brief Free all resources once Gator has finished using performance counters. + * + * @param in_out_info A pointer to a structure containing the enabled counters passed from Gator and all the + * Mali specific information that will be returned to Gator. + * @param opaque_handles A wrapper structure for kbase structures. + */ +extern void kbase_gator_hwcnt_term(struct kbase_gator_hwcnt_info *in_out_info, struct kbase_gator_hwcnt_handles *opaque_handles); + +/** + * @brief Poll whether a counter dump is successful. + * + * @param opaque_handles A wrapper structure for kbase structures. + * @param[out] success Non-zero on success, zero on failure. + * + * @return Zero if the dump is still pending, non-zero if the dump has completed. Note that a + * completed dump may not have dumped succesfully, so the caller must test for both + * a completed and successful dump before processing counters. + */ +extern uint32_t kbase_gator_instr_hwcnt_dump_complete(struct kbase_gator_hwcnt_handles *opaque_handles, uint32_t * const success); + +/** + * @brief Request the generation of a new counter dump. + * + * @param opaque_handles A wrapper structure for kbase structures. + * + * @return Zero if the hardware device is busy and cannot handle the request, non-zero otherwise. + */ +extern uint32_t kbase_gator_instr_hwcnt_dump_irq(struct kbase_gator_hwcnt_handles *opaque_handles); + +/** + * @brief This function is used to fetch the names table based on the Mali device in use. + * + * @param[out] total_counters The total number of counters short names in the Mali devices' list. + * + * @return Pointer to an array of strings of length *total_counters. + */ +extern const char * const *kbase_gator_hwcnt_init_names(uint32_t *total_counters); + +/** + * @brief This function is used to terminate the use of the names table. + * + * This function must only be called if the initial call to kbase_gator_hwcnt_init_names returned a non-NULL value. + */ +extern void kbase_gator_hwcnt_term_names(void); + +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h new file mode 100644 index 00000000000..d124e82edd0 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gator_hwcnt_names.h @@ -0,0 +1,2159 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_GATOR_HWCNT_NAMES_H_ +#define _KBASE_GATOR_HWCNT_NAMES_H_ + +/* + * "Short names" for hardware counters used by Streamline. Counters names are + * stored in accordance with their memory layout in the binary counter block + * emitted by the Mali GPU. Each "master" in the GPU emits a fixed-size block + * of 64 counters, and each GPU implements the same set of "masters" although + * the counters each master exposes within its block of 64 may vary. + * + * Counters which are an empty string are simply "holes" in the counter memory + * where no counter exists. + */ + +static const char * const hardware_counters_mali_t60x[] = { + /* Job Manager */ + "", + "", + "", + "", + "T60x_MESSAGES_SENT", + "T60x_MESSAGES_RECEIVED", + "T60x_GPU_ACTIVE", + "T60x_IRQ_ACTIVE", + "T60x_JS0_JOBS", + "T60x_JS0_TASKS", + "T60x_JS0_ACTIVE", + "", + "T60x_JS0_WAIT_READ", + "T60x_JS0_WAIT_ISSUE", + "T60x_JS0_WAIT_DEPEND", + "T60x_JS0_WAIT_FINISH", + "T60x_JS1_JOBS", + "T60x_JS1_TASKS", + "T60x_JS1_ACTIVE", + "", + "T60x_JS1_WAIT_READ", + "T60x_JS1_WAIT_ISSUE", + "T60x_JS1_WAIT_DEPEND", + "T60x_JS1_WAIT_FINISH", + "T60x_JS2_JOBS", + "T60x_JS2_TASKS", + "T60x_JS2_ACTIVE", + "", + "T60x_JS2_WAIT_READ", + "T60x_JS2_WAIT_ISSUE", + "T60x_JS2_WAIT_DEPEND", + "T60x_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /*Tiler */ + "", + "", + "", + "T60x_TI_JOBS_PROCESSED", + "T60x_TI_TRIANGLES", + "T60x_TI_QUADS", + "T60x_TI_POLYGONS", + "T60x_TI_POINTS", + "T60x_TI_LINES", + "T60x_TI_VCACHE_HIT", + "T60x_TI_VCACHE_MISS", + "T60x_TI_FRONT_FACING", + "T60x_TI_BACK_FACING", + "T60x_TI_PRIM_VISIBLE", + "T60x_TI_PRIM_CULLED", + "T60x_TI_PRIM_CLIPPED", + "T60x_TI_LEVEL0", + "T60x_TI_LEVEL1", + "T60x_TI_LEVEL2", + "T60x_TI_LEVEL3", + "T60x_TI_LEVEL4", + "T60x_TI_LEVEL5", + "T60x_TI_LEVEL6", + "T60x_TI_LEVEL7", + "T60x_TI_COMMAND_1", + "T60x_TI_COMMAND_2", + "T60x_TI_COMMAND_3", + "T60x_TI_COMMAND_4", + "T60x_TI_COMMAND_4_7", + "T60x_TI_COMMAND_8_15", + "T60x_TI_COMMAND_16_63", + "T60x_TI_COMMAND_64", + "T60x_TI_COMPRESS_IN", + "T60x_TI_COMPRESS_OUT", + "T60x_TI_COMPRESS_FLUSH", + "T60x_TI_TIMESTAMPS", + "T60x_TI_PCACHE_HIT", + "T60x_TI_PCACHE_MISS", + "T60x_TI_PCACHE_LINE", + "T60x_TI_PCACHE_STALL", + "T60x_TI_WRBUF_HIT", + "T60x_TI_WRBUF_MISS", + "T60x_TI_WRBUF_LINE", + "T60x_TI_WRBUF_PARTIAL", + "T60x_TI_WRBUF_STALL", + "T60x_TI_ACTIVE", + "T60x_TI_LOADING_DESC", + "T60x_TI_INDEX_WAIT", + "T60x_TI_INDEX_RANGE_WAIT", + "T60x_TI_VERTEX_WAIT", + "T60x_TI_PCACHE_WAIT", + "T60x_TI_WRBUF_WAIT", + "T60x_TI_BUS_READ", + "T60x_TI_BUS_WRITE", + "", + "", + "", + "", + "", + "T60x_TI_UTLB_STALL", + "T60x_TI_UTLB_REPLAY_MISS", + "T60x_TI_UTLB_REPLAY_FULL", + "T60x_TI_UTLB_NEW_MISS", + "T60x_TI_UTLB_HIT", + + /* Shader Core */ + "", + "", + "", + "", + "T60x_FRAG_ACTIVE", + "T60x_FRAG_PRIMITIVES", + "T60x_FRAG_PRIMITIVES_DROPPED", + "T60x_FRAG_CYCLES_DESC", + "T60x_FRAG_CYCLES_PLR", + "T60x_FRAG_CYCLES_VERT", + "T60x_FRAG_CYCLES_TRISETUP", + "T60x_FRAG_CYCLES_RAST", + "T60x_FRAG_THREADS", + "T60x_FRAG_DUMMY_THREADS", + "T60x_FRAG_QUADS_RAST", + "T60x_FRAG_QUADS_EZS_TEST", + "T60x_FRAG_QUADS_EZS_KILLED", + "T60x_FRAG_THREADS_LZS_TEST", + "T60x_FRAG_THREADS_LZS_KILLED", + "T60x_FRAG_CYCLES_NO_TILE", + "T60x_FRAG_NUM_TILES", + "T60x_FRAG_TRANS_ELIM", + "T60x_COMPUTE_ACTIVE", + "T60x_COMPUTE_TASKS", + "T60x_COMPUTE_THREADS", + "T60x_COMPUTE_CYCLES_DESC", + "T60x_TRIPIPE_ACTIVE", + "T60x_ARITH_WORDS", + "T60x_ARITH_CYCLES_REG", + "T60x_ARITH_CYCLES_L0", + "T60x_ARITH_FRAG_DEPEND", + "T60x_LS_WORDS", + "T60x_LS_ISSUES", + "T60x_LS_RESTARTS", + "T60x_LS_REISSUES_MISS", + "T60x_LS_REISSUES_VD", + "T60x_LS_REISSUE_ATTRIB_MISS", + "T60x_LS_NO_WB", + "T60x_TEX_WORDS", + "T60x_TEX_BUBBLES", + "T60x_TEX_WORDS_L0", + "T60x_TEX_WORDS_DESC", + "T60x_TEX_ISSUES", + "T60x_TEX_RECIRC_FMISS", + "T60x_TEX_RECIRC_DESC", + "T60x_TEX_RECIRC_MULTI", + "T60x_TEX_RECIRC_PMISS", + "T60x_TEX_RECIRC_CONF", + "T60x_LSC_READ_HITS", + "T60x_LSC_READ_MISSES", + "T60x_LSC_WRITE_HITS", + "T60x_LSC_WRITE_MISSES", + "T60x_LSC_ATOMIC_HITS", + "T60x_LSC_ATOMIC_MISSES", + "T60x_LSC_LINE_FETCHES", + "T60x_LSC_DIRTY_LINE", + "T60x_LSC_SNOOPS", + "T60x_AXI_TLB_STALL", + "T60x_AXI_TLB_MIESS", + "T60x_AXI_TLB_TRANSACTION", + "T60x_LS_TLB_MISS", + "T60x_LS_TLB_HIT", + "T60x_AXI_BEATS_READ", + "T60x_AXI_BEATS_WRITTEN", + + /*L2 and MMU */ + "", + "", + "", + "", + "T60x_MMU_HIT", + "T60x_MMU_NEW_MISS", + "T60x_MMU_REPLAY_FULL", + "T60x_MMU_REPLAY_MISS", + "T60x_MMU_TABLE_WALK", + "", + "", + "", + "", + "", + "", + "", + "T60x_UTLB_HIT", + "T60x_UTLB_NEW_MISS", + "T60x_UTLB_REPLAY_FULL", + "T60x_UTLB_REPLAY_MISS", + "T60x_UTLB_STALL", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "T60x_L2_EXT_WRITE_BEATS", + "T60x_L2_EXT_READ_BEATS", + "T60x_L2_ANY_LOOKUP", + "T60x_L2_READ_LOOKUP", + "T60x_L2_SREAD_LOOKUP", + "T60x_L2_READ_REPLAY", + "T60x_L2_READ_SNOOP", + "T60x_L2_READ_HIT", + "T60x_L2_CLEAN_MISS", + "T60x_L2_WRITE_LOOKUP", + "T60x_L2_SWRITE_LOOKUP", + "T60x_L2_WRITE_REPLAY", + "T60x_L2_WRITE_SNOOP", + "T60x_L2_WRITE_HIT", + "T60x_L2_EXT_READ_FULL", + "T60x_L2_EXT_READ_HALF", + "T60x_L2_EXT_WRITE_FULL", + "T60x_L2_EXT_WRITE_HALF", + "T60x_L2_EXT_READ", + "T60x_L2_EXT_READ_LINE", + "T60x_L2_EXT_WRITE", + "T60x_L2_EXT_WRITE_LINE", + "T60x_L2_EXT_WRITE_SMALL", + "T60x_L2_EXT_BARRIER", + "T60x_L2_EXT_AR_STALL", + "T60x_L2_EXT_R_BUF_FULL", + "T60x_L2_EXT_RD_BUF_FULL", + "T60x_L2_EXT_R_RAW", + "T60x_L2_EXT_W_STALL", + "T60x_L2_EXT_W_BUF_FULL", + "T60x_L2_EXT_R_W_HAZARD", + "T60x_L2_TAG_HAZARD", + "T60x_L2_SNOOP_FULL", + "T60x_L2_REPLAY_FULL" +}; +static const char * const hardware_counters_mali_t62x[] = { + /* Job Manager */ + "", + "", + "", + "", + "T62x_MESSAGES_SENT", + "T62x_MESSAGES_RECEIVED", + "T62x_GPU_ACTIVE", + "T62x_IRQ_ACTIVE", + "T62x_JS0_JOBS", + "T62x_JS0_TASKS", + "T62x_JS0_ACTIVE", + "", + "T62x_JS0_WAIT_READ", + "T62x_JS0_WAIT_ISSUE", + "T62x_JS0_WAIT_DEPEND", + "T62x_JS0_WAIT_FINISH", + "T62x_JS1_JOBS", + "T62x_JS1_TASKS", + "T62x_JS1_ACTIVE", + "", + "T62x_JS1_WAIT_READ", + "T62x_JS1_WAIT_ISSUE", + "T62x_JS1_WAIT_DEPEND", + "T62x_JS1_WAIT_FINISH", + "T62x_JS2_JOBS", + "T62x_JS2_TASKS", + "T62x_JS2_ACTIVE", + "", + "T62x_JS2_WAIT_READ", + "T62x_JS2_WAIT_ISSUE", + "T62x_JS2_WAIT_DEPEND", + "T62x_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /*Tiler */ + "", + "", + "", + "T62x_TI_JOBS_PROCESSED", + "T62x_TI_TRIANGLES", + "T62x_TI_QUADS", + "T62x_TI_POLYGONS", + "T62x_TI_POINTS", + "T62x_TI_LINES", + "T62x_TI_VCACHE_HIT", + "T62x_TI_VCACHE_MISS", + "T62x_TI_FRONT_FACING", + "T62x_TI_BACK_FACING", + "T62x_TI_PRIM_VISIBLE", + "T62x_TI_PRIM_CULLED", + "T62x_TI_PRIM_CLIPPED", + "T62x_TI_LEVEL0", + "T62x_TI_LEVEL1", + "T62x_TI_LEVEL2", + "T62x_TI_LEVEL3", + "T62x_TI_LEVEL4", + "T62x_TI_LEVEL5", + "T62x_TI_LEVEL6", + "T62x_TI_LEVEL7", + "T62x_TI_COMMAND_1", + "T62x_TI_COMMAND_2", + "T62x_TI_COMMAND_3", + "T62x_TI_COMMAND_4", + "T62x_TI_COMMAND_5_7", + "T62x_TI_COMMAND_8_15", + "T62x_TI_COMMAND_16_63", + "T62x_TI_COMMAND_64", + "T62x_TI_COMPRESS_IN", + "T62x_TI_COMPRESS_OUT", + "T62x_TI_COMPRESS_FLUSH", + "T62x_TI_TIMESTAMPS", + "T62x_TI_PCACHE_HIT", + "T62x_TI_PCACHE_MISS", + "T62x_TI_PCACHE_LINE", + "T62x_TI_PCACHE_STALL", + "T62x_TI_WRBUF_HIT", + "T62x_TI_WRBUF_MISS", + "T62x_TI_WRBUF_LINE", + "T62x_TI_WRBUF_PARTIAL", + "T62x_TI_WRBUF_STALL", + "T62x_TI_ACTIVE", + "T62x_TI_LOADING_DESC", + "T62x_TI_INDEX_WAIT", + "T62x_TI_INDEX_RANGE_WAIT", + "T62x_TI_VERTEX_WAIT", + "T62x_TI_PCACHE_WAIT", + "T62x_TI_WRBUF_WAIT", + "T62x_TI_BUS_READ", + "T62x_TI_BUS_WRITE", + "", + "", + "", + "", + "", + "T62x_TI_UTLB_STALL", + "T62x_TI_UTLB_REPLAY_MISS", + "T62x_TI_UTLB_REPLAY_FULL", + "T62x_TI_UTLB_NEW_MISS", + "T62x_TI_UTLB_HIT", + + /* Shader Core */ + "", + "", + "", + "T62x_SHADER_CORE_ACTIVE", + "T62x_FRAG_ACTIVE", + "T62x_FRAG_PRIMITIVES", + "T62x_FRAG_PRIMITIVES_DROPPED", + "T62x_FRAG_CYCLES_DESC", + "T62x_FRAG_CYCLES_FPKQ_ACTIVE", + "T62x_FRAG_CYCLES_VERT", + "T62x_FRAG_CYCLES_TRISETUP", + "T62x_FRAG_CYCLES_EZS_ACTIVE", + "T62x_FRAG_THREADS", + "T62x_FRAG_DUMMY_THREADS", + "T62x_FRAG_QUADS_RAST", + "T62x_FRAG_QUADS_EZS_TEST", + "T62x_FRAG_QUADS_EZS_KILLED", + "T62x_FRAG_THREADS_LZS_TEST", + "T62x_FRAG_THREADS_LZS_KILLED", + "T62x_FRAG_CYCLES_NO_TILE", + "T62x_FRAG_NUM_TILES", + "T62x_FRAG_TRANS_ELIM", + "T62x_COMPUTE_ACTIVE", + "T62x_COMPUTE_TASKS", + "T62x_COMPUTE_THREADS", + "T62x_COMPUTE_CYCLES_DESC", + "T62x_TRIPIPE_ACTIVE", + "T62x_ARITH_WORDS", + "T62x_ARITH_CYCLES_REG", + "T62x_ARITH_CYCLES_L0", + "T62x_ARITH_FRAG_DEPEND", + "T62x_LS_WORDS", + "T62x_LS_ISSUES", + "T62x_LS_RESTARTS", + "T62x_LS_REISSUES_MISS", + "T62x_LS_REISSUES_VD", + "T62x_LS_REISSUE_ATTRIB_MISS", + "T62x_LS_NO_WB", + "T62x_TEX_WORDS", + "T62x_TEX_BUBBLES", + "T62x_TEX_WORDS_L0", + "T62x_TEX_WORDS_DESC", + "T62x_TEX_ISSUES", + "T62x_TEX_RECIRC_FMISS", + "T62x_TEX_RECIRC_DESC", + "T62x_TEX_RECIRC_MULTI", + "T62x_TEX_RECIRC_PMISS", + "T62x_TEX_RECIRC_CONF", + "T62x_LSC_READ_HITS", + "T62x_LSC_READ_MISSES", + "T62x_LSC_WRITE_HITS", + "T62x_LSC_WRITE_MISSES", + "T62x_LSC_ATOMIC_HITS", + "T62x_LSC_ATOMIC_MISSES", + "T62x_LSC_LINE_FETCHES", + "T62x_LSC_DIRTY_LINE", + "T62x_LSC_SNOOPS", + "T62x_AXI_TLB_STALL", + "T62x_AXI_TLB_MIESS", + "T62x_AXI_TLB_TRANSACTION", + "T62x_LS_TLB_MISS", + "T62x_LS_TLB_HIT", + "T62x_AXI_BEATS_READ", + "T62x_AXI_BEATS_WRITTEN", + + /*L2 and MMU */ + "", + "", + "", + "", + "T62x_MMU_HIT", + "T62x_MMU_NEW_MISS", + "T62x_MMU_REPLAY_FULL", + "T62x_MMU_REPLAY_MISS", + "T62x_MMU_TABLE_WALK", + "", + "", + "", + "", + "", + "", + "", + "T62x_UTLB_HIT", + "T62x_UTLB_NEW_MISS", + "T62x_UTLB_REPLAY_FULL", + "T62x_UTLB_REPLAY_MISS", + "T62x_UTLB_STALL", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "T62x_L2_EXT_WRITE_BEATS", + "T62x_L2_EXT_READ_BEATS", + "T62x_L2_ANY_LOOKUP", + "T62x_L2_READ_LOOKUP", + "T62x_L2_SREAD_LOOKUP", + "T62x_L2_READ_REPLAY", + "T62x_L2_READ_SNOOP", + "T62x_L2_READ_HIT", + "T62x_L2_CLEAN_MISS", + "T62x_L2_WRITE_LOOKUP", + "T62x_L2_SWRITE_LOOKUP", + "T62x_L2_WRITE_REPLAY", + "T62x_L2_WRITE_SNOOP", + "T62x_L2_WRITE_HIT", + "T62x_L2_EXT_READ_FULL", + "T62x_L2_EXT_READ_HALF", + "T62x_L2_EXT_WRITE_FULL", + "T62x_L2_EXT_WRITE_HALF", + "T62x_L2_EXT_READ", + "T62x_L2_EXT_READ_LINE", + "T62x_L2_EXT_WRITE", + "T62x_L2_EXT_WRITE_LINE", + "T62x_L2_EXT_WRITE_SMALL", + "T62x_L2_EXT_BARRIER", + "T62x_L2_EXT_AR_STALL", + "T62x_L2_EXT_R_BUF_FULL", + "T62x_L2_EXT_RD_BUF_FULL", + "T62x_L2_EXT_R_RAW", + "T62x_L2_EXT_W_STALL", + "T62x_L2_EXT_W_BUF_FULL", + "T62x_L2_EXT_R_W_HAZARD", + "T62x_L2_TAG_HAZARD", + "T62x_L2_SNOOP_FULL", + "T62x_L2_REPLAY_FULL" +}; + +static const char * const hardware_counters_mali_t72x[] = { + /* Job Manager */ + "", + "", + "", + "", + "T72x_GPU_ACTIVE", + "T72x_IRQ_ACTIVE", + "T72x_JS0_JOBS", + "T72x_JS0_TASKS", + "T72x_JS0_ACTIVE", + "T72x_JS1_JOBS", + "T72x_JS1_TASKS", + "T72x_JS1_ACTIVE", + "T72x_JS2_JOBS", + "T72x_JS2_TASKS", + "T72x_JS2_ACTIVE", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /*Tiler */ + "", + "", + "", + "T72x_TI_JOBS_PROCESSED", + "T72x_TI_TRIANGLES", + "T72x_TI_QUADS", + "T72x_TI_POLYGONS", + "T72x_TI_POINTS", + "T72x_TI_LINES", + "T72x_TI_FRONT_FACING", + "T72x_TI_BACK_FACING", + "T72x_TI_PRIM_VISIBLE", + "T72x_TI_PRIM_CULLED", + "T72x_TI_PRIM_CLIPPED", + "", + "", + "", + "", + "", + "", + "", + "", + "T72x_TI_ACTIVE", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /* Shader Core */ + "", + "", + "", + "", + "T72x_FRAG_ACTIVE", + "T72x_FRAG_PRIMITIVES", + "T72x_FRAG_PRIMITIVES_DROPPED", + "T72x_FRAG_THREADS", + "T72x_FRAG_DUMMY_THREADS", + "T72x_FRAG_QUADS_RAST", + "T72x_FRAG_QUADS_EZS_TEST", + "T72x_FRAG_QUADS_EZS_KILLED", + "T72x_FRAG_THREADS_LZS_TEST", + "T72x_FRAG_THREADS_LZS_KILLED", + "T72x_FRAG_CYCLES_NO_TILE", + "T72x_FRAG_NUM_TILES", + "T72x_FRAG_TRANS_ELIM", + "T72x_COMPUTE_ACTIVE", + "T72x_COMPUTE_TASKS", + "T72x_COMPUTE_THREADS", + "T72x_TRIPIPE_ACTIVE", + "T72x_ARITH_WORDS", + "T72x_ARITH_CYCLES_REG", + "T72x_LS_WORDS", + "T72x_LS_ISSUES", + "T72x_LS_RESTARTS", + "T72x_LS_REISSUES_MISS", + "T72x_TEX_WORDS", + "T72x_TEX_BUBBLES", + "T72x_TEX_ISSUES", + "T72x_LSC_READ_HITS", + "T72x_LSC_READ_MISSES", + "T72x_LSC_WRITE_HITS", + "T72x_LSC_WRITE_MISSES", + "T72x_LSC_ATOMIC_HITS", + "T72x_LSC_ATOMIC_MISSES", + "T72x_LSC_LINE_FETCHES", + "T72x_LSC_DIRTY_LINE", + "T72x_LSC_SNOOPS", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /*L2 and MMU */ + "", + "", + "", + "", + "T72x_L2_EXT_WRITE_BEAT", + "T72x_L2_EXT_READ_BEAT", + "T72x_L2_READ_SNOOP", + "T72x_L2_READ_HIT", + "T72x_L2_WRITE_SNOOP", + "T72x_L2_WRITE_HIT", + "T72x_L2_EXT_WRITE_SMALL", + "T72x_L2_EXT_BARRIER", + "T72x_L2_EXT_AR_STALL", + "T72x_L2_EXT_W_STALL", + "T72x_L2_SNOOP_FULL", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "" +}; + +static const char * const hardware_counters_mali_t76x[] = { + /* Job Manager */ + "", + "", + "", + "", + "T76x_MESSAGES_SENT", + "T76x_MESSAGES_RECEIVED", + "T76x_GPU_ACTIVE", + "T76x_IRQ_ACTIVE", + "T76x_JS0_JOBS", + "T76x_JS0_TASKS", + "T76x_JS0_ACTIVE", + "", + "T76x_JS0_WAIT_READ", + "T76x_JS0_WAIT_ISSUE", + "T76x_JS0_WAIT_DEPEND", + "T76x_JS0_WAIT_FINISH", + "T76x_JS1_JOBS", + "T76x_JS1_TASKS", + "T76x_JS1_ACTIVE", + "", + "T76x_JS1_WAIT_READ", + "T76x_JS1_WAIT_ISSUE", + "T76x_JS1_WAIT_DEPEND", + "T76x_JS1_WAIT_FINISH", + "T76x_JS2_JOBS", + "T76x_JS2_TASKS", + "T76x_JS2_ACTIVE", + "", + "T76x_JS2_WAIT_READ", + "T76x_JS2_WAIT_ISSUE", + "T76x_JS2_WAIT_DEPEND", + "T76x_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /*Tiler */ + "", + "", + "", + "T76x_TI_JOBS_PROCESSED", + "T76x_TI_TRIANGLES", + "T76x_TI_QUADS", + "T76x_TI_POLYGONS", + "T76x_TI_POINTS", + "T76x_TI_LINES", + "T76x_TI_VCACHE_HIT", + "T76x_TI_VCACHE_MISS", + "T76x_TI_FRONT_FACING", + "T76x_TI_BACK_FACING", + "T76x_TI_PRIM_VISIBLE", + "T76x_TI_PRIM_CULLED", + "T76x_TI_PRIM_CLIPPED", + "T76x_TI_LEVEL0", + "T76x_TI_LEVEL1", + "T76x_TI_LEVEL2", + "T76x_TI_LEVEL3", + "T76x_TI_LEVEL4", + "T76x_TI_LEVEL5", + "T76x_TI_LEVEL6", + "T76x_TI_LEVEL7", + "T76x_TI_COMMAND_1", + "T76x_TI_COMMAND_2", + "T76x_TI_COMMAND_3", + "T76x_TI_COMMAND_4", + "T76x_TI_COMMAND_5_7", + "T76x_TI_COMMAND_8_15", + "T76x_TI_COMMAND_16_63", + "T76x_TI_COMMAND_64", + "T76x_TI_COMPRESS_IN", + "T76x_TI_COMPRESS_OUT", + "T76x_TI_COMPRESS_FLUSH", + "T76x_TI_TIMESTAMPS", + "T76x_TI_PCACHE_HIT", + "T76x_TI_PCACHE_MISS", + "T76x_TI_PCACHE_LINE", + "T76x_TI_PCACHE_STALL", + "T76x_TI_WRBUF_HIT", + "T76x_TI_WRBUF_MISS", + "T76x_TI_WRBUF_LINE", + "T76x_TI_WRBUF_PARTIAL", + "T76x_TI_WRBUF_STALL", + "T76x_TI_ACTIVE", + "T76x_TI_LOADING_DESC", + "T76x_TI_INDEX_WAIT", + "T76x_TI_INDEX_RANGE_WAIT", + "T76x_TI_VERTEX_WAIT", + "T76x_TI_PCACHE_WAIT", + "T76x_TI_WRBUF_WAIT", + "T76x_TI_BUS_READ", + "T76x_TI_BUS_WRITE", + "", + "", + "", + "", + "", + "T76x_TI_UTLB_HIT", + "T76x_TI_UTLB_NEW_MISS", + "T76x_TI_UTLB_REPLAY_FULL", + "T76x_TI_UTLB_REPLAY_MISS", + "T76x_TI_UTLB_STALL", + + /* Shader Core */ + "", + "", + "", + "", + "T76x_FRAG_ACTIVE", + "T76x_FRAG_PRIMITIVES", + "T76x_FRAG_PRIMITIVES_DROPPED", + "T76x_FRAG_CYCLES_DESC", + "T76x_FRAG_CYCLES_FPKQ_ACTIVE", + "T76x_FRAG_CYCLES_VERT", + "T76x_FRAG_CYCLES_TRISETUP", + "T76x_FRAG_CYCLES_EZS_ACTIVE", + "T76x_FRAG_THREADS", + "T76x_FRAG_DUMMY_THREADS", + "T76x_FRAG_QUADS_RAST", + "T76x_FRAG_QUADS_EZS_TEST", + "T76x_FRAG_QUADS_EZS_KILLED", + "T76x_FRAG_THREADS_LZS_TEST", + "T76x_FRAG_THREADS_LZS_KILLED", + "T76x_FRAG_CYCLES_NO_TILE", + "T76x_FRAG_NUM_TILES", + "T76x_FRAG_TRANS_ELIM", + "T76x_COMPUTE_ACTIVE", + "T76x_COMPUTE_TASKS", + "T76x_COMPUTE_THREADS", + "T76x_COMPUTE_CYCLES_DESC", + "T76x_TRIPIPE_ACTIVE", + "T76x_ARITH_WORDS", + "T76x_ARITH_CYCLES_REG", + "T76x_ARITH_CYCLES_L0", + "T76x_ARITH_FRAG_DEPEND", + "T76x_LS_WORDS", + "T76x_LS_ISSUES", + "T76x_LS_REISSUE_ATTR", + "T76x_LS_REISSUES_VARY", + "T76x_LS_VARY_RV_MISS", + "T76x_LS_VARY_RV_HIT", + "T76x_LS_NO_UNPARK", + "T76x_TEX_WORDS", + "T76x_TEX_BUBBLES", + "T76x_TEX_WORDS_L0", + "T76x_TEX_WORDS_DESC", + "T76x_TEX_ISSUES", + "T76x_TEX_RECIRC_FMISS", + "T76x_TEX_RECIRC_DESC", + "T76x_TEX_RECIRC_MULTI", + "T76x_TEX_RECIRC_PMISS", + "T76x_TEX_RECIRC_CONF", + "T76x_LSC_READ_HITS", + "T76x_LSC_READ_OP", + "T76x_LSC_WRITE_HITS", + "T76x_LSC_WRITE_OP", + "T76x_LSC_ATOMIC_HITS", + "T76x_LSC_ATOMIC_OP", + "T76x_LSC_LINE_FETCHES", + "T76x_LSC_DIRTY_LINE", + "T76x_LSC_SNOOPS", + "T76x_AXI_TLB_STALL", + "T76x_AXI_TLB_MIESS", + "T76x_AXI_TLB_TRANSACTION", + "T76x_LS_TLB_MISS", + "T76x_LS_TLB_HIT", + "T76x_AXI_BEATS_READ", + "T76x_AXI_BEATS_WRITTEN", + + /*L2 and MMU */ + "", + "", + "", + "", + "T76x_MMU_HIT", + "T76x_MMU_NEW_MISS", + "T76x_MMU_REPLAY_FULL", + "T76x_MMU_REPLAY_MISS", + "T76x_MMU_TABLE_WALK", + "T76x_MMU_REQUESTS", + "", + "", + "T76x_UTLB_HIT", + "T76x_UTLB_NEW_MISS", + "T76x_UTLB_REPLAY_FULL", + "T76x_UTLB_REPLAY_MISS", + "T76x_UTLB_STALL", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "T76x_L2_EXT_WRITE_BEATS", + "T76x_L2_EXT_READ_BEATS", + "T76x_L2_ANY_LOOKUP", + "T76x_L2_READ_LOOKUP", + "T76x_L2_SREAD_LOOKUP", + "T76x_L2_READ_REPLAY", + "T76x_L2_READ_SNOOP", + "T76x_L2_READ_HIT", + "T76x_L2_CLEAN_MISS", + "T76x_L2_WRITE_LOOKUP", + "T76x_L2_SWRITE_LOOKUP", + "T76x_L2_WRITE_REPLAY", + "T76x_L2_WRITE_SNOOP", + "T76x_L2_WRITE_HIT", + "T76x_L2_EXT_READ_FULL", + "", + "T76x_L2_EXT_WRITE_FULL", + "T76x_L2_EXT_R_W_HAZARD", + "T76x_L2_EXT_READ", + "T76x_L2_EXT_READ_LINE", + "T76x_L2_EXT_WRITE", + "T76x_L2_EXT_WRITE_LINE", + "T76x_L2_EXT_WRITE_SMALL", + "T76x_L2_EXT_BARRIER", + "T76x_L2_EXT_AR_STALL", + "T76x_L2_EXT_R_BUF_FULL", + "T76x_L2_EXT_RD_BUF_FULL", + "T76x_L2_EXT_R_RAW", + "T76x_L2_EXT_W_STALL", + "T76x_L2_EXT_W_BUF_FULL", + "T76x_L2_EXT_R_BUF_FULL", + "T76x_L2_TAG_HAZARD", + "T76x_L2_SNOOP_FULL", + "T76x_L2_REPLAY_FULL" +}; + +static const char * const hardware_counters_mali_t82x[] = { + /* Job Manager */ + "", + "", + "", + "", + "T82x_MESSAGES_SENT", + "T82x_MESSAGES_RECEIVED", + "T82x_GPU_ACTIVE", + "T82x_IRQ_ACTIVE", + "T82x_JS0_JOBS", + "T82x_JS0_TASKS", + "T82x_JS0_ACTIVE", + "", + "T82x_JS0_WAIT_READ", + "T82x_JS0_WAIT_ISSUE", + "T82x_JS0_WAIT_DEPEND", + "T82x_JS0_WAIT_FINISH", + "T82x_JS1_JOBS", + "T82x_JS1_TASKS", + "T82x_JS1_ACTIVE", + "", + "T82x_JS1_WAIT_READ", + "T82x_JS1_WAIT_ISSUE", + "T82x_JS1_WAIT_DEPEND", + "T82x_JS1_WAIT_FINISH", + "T82x_JS2_JOBS", + "T82x_JS2_TASKS", + "T82x_JS2_ACTIVE", + "", + "T82x_JS2_WAIT_READ", + "T82x_JS2_WAIT_ISSUE", + "T82x_JS2_WAIT_DEPEND", + "T82x_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /*Tiler */ + "", + "", + "", + "T82x_TI_JOBS_PROCESSED", + "T82x_TI_TRIANGLES", + "T82x_TI_QUADS", + "T82x_TI_POLYGONS", + "T82x_TI_POINTS", + "T82x_TI_LINES", + "T82x_TI_FRONT_FACING", + "T82x_TI_BACK_FACING", + "T82x_TI_PRIM_VISIBLE", + "T82x_TI_PRIM_CULLED", + "T82x_TI_PRIM_CLIPPED", + "", + "", + "", + "", + "", + "", + "", + "", + "T82x_TI_ACTIVE", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /* Shader Core */ + "", + "", + "", + "", + "T82x_FRAG_ACTIVE", + "T82x_FRAG_PRIMITIVES", + "T82x_FRAG_PRIMITIVES_DROPPED", + "T82x_FRAG_CYCLES_DESC", + "T82x_FRAG_CYCLES_FPKQ_ACTIVE", + "T82x_FRAG_CYCLES_VERT", + "T82x_FRAG_CYCLES_TRISETUP", + "T82x_FRAG_CYCLES_EZS_ACTIVE", + "T82x_FRAG_THREADS", + "T82x_FRAG_DUMMY_THREADS", + "T82x_FRAG_QUADS_RAST", + "T82x_FRAG_QUADS_EZS_TEST", + "T82x_FRAG_QUADS_EZS_KILLED", + "T82x_FRAG_THREADS_LZS_TEST", + "T82x_FRAG_THREADS_LZS_KILLED", + "T82x_FRAG_CYCLES_NO_TILE", + "T82x_FRAG_NUM_TILES", + "T82x_FRAG_TRANS_ELIM", + "T82x_COMPUTE_ACTIVE", + "T82x_COMPUTE_TASKS", + "T82x_COMPUTE_THREADS", + "T82x_COMPUTE_CYCLES_DESC", + "T82x_TRIPIPE_ACTIVE", + "T82x_ARITH_WORDS", + "T82x_ARITH_CYCLES_REG", + "T82x_ARITH_CYCLES_L0", + "T82x_ARITH_FRAG_DEPEND", + "T82x_LS_WORDS", + "T82x_LS_ISSUES", + "T82x_LS_REISSUE_ATTR", + "T82x_LS_REISSUES_VARY", + "T82x_LS_VARY_RV_MISS", + "T82x_LS_VARY_RV_HIT", + "T82x_LS_NO_UNPARK", + "T82x_TEX_WORDS", + "T82x_TEX_BUBBLES", + "T82x_TEX_WORDS_L0", + "T82x_TEX_WORDS_DESC", + "T82x_TEX_ISSUES", + "T82x_TEX_RECIRC_FMISS", + "T82x_TEX_RECIRC_DESC", + "T82x_TEX_RECIRC_MULTI", + "T82x_TEX_RECIRC_PMISS", + "T82x_TEX_RECIRC_CONF", + "T82x_LSC_READ_HITS", + "T82x_LSC_READ_OP", + "T82x_LSC_WRITE_HITS", + "T82x_LSC_WRITE_OP", + "T82x_LSC_ATOMIC_HITS", + "T82x_LSC_ATOMIC_OP", + "T82x_LSC_LINE_FETCHES", + "T82x_LSC_DIRTY_LINE", + "T82x_LSC_SNOOPS", + "T82x_AXI_TLB_STALL", + "T82x_AXI_TLB_MIESS", + "T82x_AXI_TLB_TRANSACTION", + "T82x_LS_TLB_MISS", + "T82x_LS_TLB_HIT", + "T82x_AXI_BEATS_READ", + "T82x_AXI_BEATS_WRITTEN", + + /*L2 and MMU */ + "", + "", + "", + "", + "T82x_MMU_HIT", + "T82x_MMU_NEW_MISS", + "T82x_MMU_REPLAY_FULL", + "T82x_MMU_REPLAY_MISS", + "T82x_MMU_TABLE_WALK", + "T82x_MMU_REQUESTS", + "", + "", + "T82x_UTLB_HIT", + "T82x_UTLB_NEW_MISS", + "T82x_UTLB_REPLAY_FULL", + "T82x_UTLB_REPLAY_MISS", + "T82x_UTLB_STALL", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "T82x_L2_EXT_WRITE_BEATS", + "T82x_L2_EXT_READ_BEATS", + "T82x_L2_ANY_LOOKUP", + "T82x_L2_READ_LOOKUP", + "T82x_L2_SREAD_LOOKUP", + "T82x_L2_READ_REPLAY", + "T82x_L2_READ_SNOOP", + "T82x_L2_READ_HIT", + "T82x_L2_CLEAN_MISS", + "T82x_L2_WRITE_LOOKUP", + "T82x_L2_SWRITE_LOOKUP", + "T82x_L2_WRITE_REPLAY", + "T82x_L2_WRITE_SNOOP", + "T82x_L2_WRITE_HIT", + "T82x_L2_EXT_READ_FULL", + "", + "T82x_L2_EXT_WRITE_FULL", + "T82x_L2_EXT_R_W_HAZARD", + "T82x_L2_EXT_READ", + "T82x_L2_EXT_READ_LINE", + "T82x_L2_EXT_WRITE", + "T82x_L2_EXT_WRITE_LINE", + "T82x_L2_EXT_WRITE_SMALL", + "T82x_L2_EXT_BARRIER", + "T82x_L2_EXT_AR_STALL", + "T82x_L2_EXT_R_BUF_FULL", + "T82x_L2_EXT_RD_BUF_FULL", + "T82x_L2_EXT_R_RAW", + "T82x_L2_EXT_W_STALL", + "T82x_L2_EXT_W_BUF_FULL", + "T82x_L2_EXT_R_BUF_FULL", + "T82x_L2_TAG_HAZARD", + "T82x_L2_SNOOP_FULL", + "T82x_L2_REPLAY_FULL" +}; + +static const char * const hardware_counters_mali_t83x[] = { + /* Job Manager */ + "", + "", + "", + "", + "T83x_MESSAGES_SENT", + "T83x_MESSAGES_RECEIVED", + "T83x_GPU_ACTIVE", + "T83x_IRQ_ACTIVE", + "T83x_JS0_JOBS", + "T83x_JS0_TASKS", + "T83x_JS0_ACTIVE", + "", + "T83x_JS0_WAIT_READ", + "T83x_JS0_WAIT_ISSUE", + "T83x_JS0_WAIT_DEPEND", + "T83x_JS0_WAIT_FINISH", + "T83x_JS1_JOBS", + "T83x_JS1_TASKS", + "T83x_JS1_ACTIVE", + "", + "T83x_JS1_WAIT_READ", + "T83x_JS1_WAIT_ISSUE", + "T83x_JS1_WAIT_DEPEND", + "T83x_JS1_WAIT_FINISH", + "T83x_JS2_JOBS", + "T83x_JS2_TASKS", + "T83x_JS2_ACTIVE", + "", + "T83x_JS2_WAIT_READ", + "T83x_JS2_WAIT_ISSUE", + "T83x_JS2_WAIT_DEPEND", + "T83x_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /*Tiler */ + "", + "", + "", + "T83x_TI_JOBS_PROCESSED", + "T83x_TI_TRIANGLES", + "T83x_TI_QUADS", + "T83x_TI_POLYGONS", + "T83x_TI_POINTS", + "T83x_TI_LINES", + "T83x_TI_FRONT_FACING", + "T83x_TI_BACK_FACING", + "T83x_TI_PRIM_VISIBLE", + "T83x_TI_PRIM_CULLED", + "T83x_TI_PRIM_CLIPPED", + "", + "", + "", + "", + "", + "", + "", + "", + "T83x_TI_ACTIVE", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /* Shader Core */ + "", + "", + "", + "", + "T83x_FRAG_ACTIVE", + "T83x_FRAG_PRIMITIVES", + "T83x_FRAG_PRIMITIVES_DROPPED", + "T83x_FRAG_CYCLES_DESC", + "T83x_FRAG_CYCLES_FPKQ_ACTIVE", + "T83x_FRAG_CYCLES_VERT", + "T83x_FRAG_CYCLES_TRISETUP", + "T83x_FRAG_CYCLES_EZS_ACTIVE", + "T83x_FRAG_THREADS", + "T83x_FRAG_DUMMY_THREADS", + "T83x_FRAG_QUADS_RAST", + "T83x_FRAG_QUADS_EZS_TEST", + "T83x_FRAG_QUADS_EZS_KILLED", + "T83x_FRAG_THREADS_LZS_TEST", + "T83x_FRAG_THREADS_LZS_KILLED", + "T83x_FRAG_CYCLES_NO_TILE", + "T83x_FRAG_NUM_TILES", + "T83x_FRAG_TRANS_ELIM", + "T83x_COMPUTE_ACTIVE", + "T83x_COMPUTE_TASKS", + "T83x_COMPUTE_THREADS", + "T83x_COMPUTE_CYCLES_DESC", + "T83x_TRIPIPE_ACTIVE", + "T83x_ARITH_WORDS", + "T83x_ARITH_CYCLES_REG", + "T83x_ARITH_CYCLES_L0", + "T83x_ARITH_FRAG_DEPEND", + "T83x_LS_WORDS", + "T83x_LS_ISSUES", + "T83x_LS_REISSUE_ATTR", + "T83x_LS_REISSUES_VARY", + "T83x_LS_VARY_RV_MISS", + "T83x_LS_VARY_RV_HIT", + "T83x_LS_NO_UNPARK", + "T83x_TEX_WORDS", + "T83x_TEX_BUBBLES", + "T83x_TEX_WORDS_L0", + "T83x_TEX_WORDS_DESC", + "T83x_TEX_ISSUES", + "T83x_TEX_RECIRC_FMISS", + "T83x_TEX_RECIRC_DESC", + "T83x_TEX_RECIRC_MULTI", + "T83x_TEX_RECIRC_PMISS", + "T83x_TEX_RECIRC_CONF", + "T83x_LSC_READ_HITS", + "T83x_LSC_READ_OP", + "T83x_LSC_WRITE_HITS", + "T83x_LSC_WRITE_OP", + "T83x_LSC_ATOMIC_HITS", + "T83x_LSC_ATOMIC_OP", + "T83x_LSC_LINE_FETCHES", + "T83x_LSC_DIRTY_LINE", + "T83x_LSC_SNOOPS", + "T83x_AXI_TLB_STALL", + "T83x_AXI_TLB_MIESS", + "T83x_AXI_TLB_TRANSACTION", + "T83x_LS_TLB_MISS", + "T83x_LS_TLB_HIT", + "T83x_AXI_BEATS_READ", + "T83x_AXI_BEATS_WRITTEN", + + /*L2 and MMU */ + "", + "", + "", + "", + "T83x_MMU_HIT", + "T83x_MMU_NEW_MISS", + "T83x_MMU_REPLAY_FULL", + "T83x_MMU_REPLAY_MISS", + "T83x_MMU_TABLE_WALK", + "T83x_MMU_REQUESTS", + "", + "", + "T83x_UTLB_HIT", + "T83x_UTLB_NEW_MISS", + "T83x_UTLB_REPLAY_FULL", + "T83x_UTLB_REPLAY_MISS", + "T83x_UTLB_STALL", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "T83x_L2_EXT_WRITE_BEATS", + "T83x_L2_EXT_READ_BEATS", + "T83x_L2_ANY_LOOKUP", + "T83x_L2_READ_LOOKUP", + "T83x_L2_SREAD_LOOKUP", + "T83x_L2_READ_REPLAY", + "T83x_L2_READ_SNOOP", + "T83x_L2_READ_HIT", + "T83x_L2_CLEAN_MISS", + "T83x_L2_WRITE_LOOKUP", + "T83x_L2_SWRITE_LOOKUP", + "T83x_L2_WRITE_REPLAY", + "T83x_L2_WRITE_SNOOP", + "T83x_L2_WRITE_HIT", + "T83x_L2_EXT_READ_FULL", + "", + "T83x_L2_EXT_WRITE_FULL", + "T83x_L2_EXT_R_W_HAZARD", + "T83x_L2_EXT_READ", + "T83x_L2_EXT_READ_LINE", + "T83x_L2_EXT_WRITE", + "T83x_L2_EXT_WRITE_LINE", + "T83x_L2_EXT_WRITE_SMALL", + "T83x_L2_EXT_BARRIER", + "T83x_L2_EXT_AR_STALL", + "T83x_L2_EXT_R_BUF_FULL", + "T83x_L2_EXT_RD_BUF_FULL", + "T83x_L2_EXT_R_RAW", + "T83x_L2_EXT_W_STALL", + "T83x_L2_EXT_W_BUF_FULL", + "T83x_L2_EXT_R_BUF_FULL", + "T83x_L2_TAG_HAZARD", + "T83x_L2_SNOOP_FULL", + "T83x_L2_REPLAY_FULL" +}; + +static const char * const hardware_counters_mali_t86x[] = { + /* Job Manager */ + "", + "", + "", + "", + "T86x_MESSAGES_SENT", + "T86x_MESSAGES_RECEIVED", + "T86x_GPU_ACTIVE", + "T86x_IRQ_ACTIVE", + "T86x_JS0_JOBS", + "T86x_JS0_TASKS", + "T86x_JS0_ACTIVE", + "", + "T86x_JS0_WAIT_READ", + "T86x_JS0_WAIT_ISSUE", + "T86x_JS0_WAIT_DEPEND", + "T86x_JS0_WAIT_FINISH", + "T86x_JS1_JOBS", + "T86x_JS1_TASKS", + "T86x_JS1_ACTIVE", + "", + "T86x_JS1_WAIT_READ", + "T86x_JS1_WAIT_ISSUE", + "T86x_JS1_WAIT_DEPEND", + "T86x_JS1_WAIT_FINISH", + "T86x_JS2_JOBS", + "T86x_JS2_TASKS", + "T86x_JS2_ACTIVE", + "", + "T86x_JS2_WAIT_READ", + "T86x_JS2_WAIT_ISSUE", + "T86x_JS2_WAIT_DEPEND", + "T86x_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /*Tiler */ + "", + "", + "", + "T86x_TI_JOBS_PROCESSED", + "T86x_TI_TRIANGLES", + "T86x_TI_QUADS", + "T86x_TI_POLYGONS", + "T86x_TI_POINTS", + "T86x_TI_LINES", + "T86x_TI_VCACHE_HIT", + "T86x_TI_VCACHE_MISS", + "T86x_TI_FRONT_FACING", + "T86x_TI_BACK_FACING", + "T86x_TI_PRIM_VISIBLE", + "T86x_TI_PRIM_CULLED", + "T86x_TI_PRIM_CLIPPED", + "T86x_TI_LEVEL0", + "T86x_TI_LEVEL1", + "T86x_TI_LEVEL2", + "T86x_TI_LEVEL3", + "T86x_TI_LEVEL4", + "T86x_TI_LEVEL5", + "T86x_TI_LEVEL6", + "T86x_TI_LEVEL7", + "T86x_TI_COMMAND_1", + "T86x_TI_COMMAND_2", + "T86x_TI_COMMAND_3", + "T86x_TI_COMMAND_4", + "T86x_TI_COMMAND_5_7", + "T86x_TI_COMMAND_8_15", + "T86x_TI_COMMAND_16_63", + "T86x_TI_COMMAND_64", + "T86x_TI_COMPRESS_IN", + "T86x_TI_COMPRESS_OUT", + "T86x_TI_COMPRESS_FLUSH", + "T86x_TI_TIMESTAMPS", + "T86x_TI_PCACHE_HIT", + "T86x_TI_PCACHE_MISS", + "T86x_TI_PCACHE_LINE", + "T86x_TI_PCACHE_STALL", + "T86x_TI_WRBUF_HIT", + "T86x_TI_WRBUF_MISS", + "T86x_TI_WRBUF_LINE", + "T86x_TI_WRBUF_PARTIAL", + "T86x_TI_WRBUF_STALL", + "T86x_TI_ACTIVE", + "T86x_TI_LOADING_DESC", + "T86x_TI_INDEX_WAIT", + "T86x_TI_INDEX_RANGE_WAIT", + "T86x_TI_VERTEX_WAIT", + "T86x_TI_PCACHE_WAIT", + "T86x_TI_WRBUF_WAIT", + "T86x_TI_BUS_READ", + "T86x_TI_BUS_WRITE", + "", + "", + "", + "", + "", + "T86x_TI_UTLB_HIT", + "T86x_TI_UTLB_NEW_MISS", + "T86x_TI_UTLB_REPLAY_FULL", + "T86x_TI_UTLB_REPLAY_MISS", + "T86x_TI_UTLB_STALL", + + /* Shader Core */ + "", + "", + "", + "", + "T86x_FRAG_ACTIVE", + "T86x_FRAG_PRIMITIVES", + "T86x_FRAG_PRIMITIVES_DROPPED", + "T86x_FRAG_CYCLES_DESC", + "T86x_FRAG_CYCLES_FPKQ_ACTIVE", + "T86x_FRAG_CYCLES_VERT", + "T86x_FRAG_CYCLES_TRISETUP", + "T86x_FRAG_CYCLES_EZS_ACTIVE", + "T86x_FRAG_THREADS", + "T86x_FRAG_DUMMY_THREADS", + "T86x_FRAG_QUADS_RAST", + "T86x_FRAG_QUADS_EZS_TEST", + "T86x_FRAG_QUADS_EZS_KILLED", + "T86x_FRAG_THREADS_LZS_TEST", + "T86x_FRAG_THREADS_LZS_KILLED", + "T86x_FRAG_CYCLES_NO_TILE", + "T86x_FRAG_NUM_TILES", + "T86x_FRAG_TRANS_ELIM", + "T86x_COMPUTE_ACTIVE", + "T86x_COMPUTE_TASKS", + "T86x_COMPUTE_THREADS", + "T86x_COMPUTE_CYCLES_DESC", + "T86x_TRIPIPE_ACTIVE", + "T86x_ARITH_WORDS", + "T86x_ARITH_CYCLES_REG", + "T86x_ARITH_CYCLES_L0", + "T86x_ARITH_FRAG_DEPEND", + "T86x_LS_WORDS", + "T86x_LS_ISSUES", + "T86x_LS_REISSUE_ATTR", + "T86x_LS_REISSUES_VARY", + "T86x_LS_VARY_RV_MISS", + "T86x_LS_VARY_RV_HIT", + "T86x_LS_NO_UNPARK", + "T86x_TEX_WORDS", + "T86x_TEX_BUBBLES", + "T86x_TEX_WORDS_L0", + "T86x_TEX_WORDS_DESC", + "T86x_TEX_ISSUES", + "T86x_TEX_RECIRC_FMISS", + "T86x_TEX_RECIRC_DESC", + "T86x_TEX_RECIRC_MULTI", + "T86x_TEX_RECIRC_PMISS", + "T86x_TEX_RECIRC_CONF", + "T86x_LSC_READ_HITS", + "T86x_LSC_READ_OP", + "T86x_LSC_WRITE_HITS", + "T86x_LSC_WRITE_OP", + "T86x_LSC_ATOMIC_HITS", + "T86x_LSC_ATOMIC_OP", + "T86x_LSC_LINE_FETCHES", + "T86x_LSC_DIRTY_LINE", + "T86x_LSC_SNOOPS", + "T86x_AXI_TLB_STALL", + "T86x_AXI_TLB_MIESS", + "T86x_AXI_TLB_TRANSACTION", + "T86x_LS_TLB_MISS", + "T86x_LS_TLB_HIT", + "T86x_AXI_BEATS_READ", + "T86x_AXI_BEATS_WRITTEN", + + /*L2 and MMU */ + "", + "", + "", + "", + "T86x_MMU_HIT", + "T86x_MMU_NEW_MISS", + "T86x_MMU_REPLAY_FULL", + "T86x_MMU_REPLAY_MISS", + "T86x_MMU_TABLE_WALK", + "T86x_MMU_REQUESTS", + "", + "", + "T86x_UTLB_HIT", + "T86x_UTLB_NEW_MISS", + "T86x_UTLB_REPLAY_FULL", + "T86x_UTLB_REPLAY_MISS", + "T86x_UTLB_STALL", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "T86x_L2_EXT_WRITE_BEATS", + "T86x_L2_EXT_READ_BEATS", + "T86x_L2_ANY_LOOKUP", + "T86x_L2_READ_LOOKUP", + "T86x_L2_SREAD_LOOKUP", + "T86x_L2_READ_REPLAY", + "T86x_L2_READ_SNOOP", + "T86x_L2_READ_HIT", + "T86x_L2_CLEAN_MISS", + "T86x_L2_WRITE_LOOKUP", + "T86x_L2_SWRITE_LOOKUP", + "T86x_L2_WRITE_REPLAY", + "T86x_L2_WRITE_SNOOP", + "T86x_L2_WRITE_HIT", + "T86x_L2_EXT_READ_FULL", + "", + "T86x_L2_EXT_WRITE_FULL", + "T86x_L2_EXT_R_W_HAZARD", + "T86x_L2_EXT_READ", + "T86x_L2_EXT_READ_LINE", + "T86x_L2_EXT_WRITE", + "T86x_L2_EXT_WRITE_LINE", + "T86x_L2_EXT_WRITE_SMALL", + "T86x_L2_EXT_BARRIER", + "T86x_L2_EXT_AR_STALL", + "T86x_L2_EXT_R_BUF_FULL", + "T86x_L2_EXT_RD_BUF_FULL", + "T86x_L2_EXT_R_RAW", + "T86x_L2_EXT_W_STALL", + "T86x_L2_EXT_W_BUF_FULL", + "T86x_L2_EXT_R_BUF_FULL", + "T86x_L2_TAG_HAZARD", + "T86x_L2_SNOOP_FULL", + "T86x_L2_REPLAY_FULL" +}; + +static const char * const hardware_counters_mali_t88x[] = { + /* Job Manager */ + "", + "", + "", + "", + "T88x_MESSAGES_SENT", + "T88x_MESSAGES_RECEIVED", + "T88x_GPU_ACTIVE", + "T88x_IRQ_ACTIVE", + "T88x_JS0_JOBS", + "T88x_JS0_TASKS", + "T88x_JS0_ACTIVE", + "", + "T88x_JS0_WAIT_READ", + "T88x_JS0_WAIT_ISSUE", + "T88x_JS0_WAIT_DEPEND", + "T88x_JS0_WAIT_FINISH", + "T88x_JS1_JOBS", + "T88x_JS1_TASKS", + "T88x_JS1_ACTIVE", + "", + "T88x_JS1_WAIT_READ", + "T88x_JS1_WAIT_ISSUE", + "T88x_JS1_WAIT_DEPEND", + "T88x_JS1_WAIT_FINISH", + "T88x_JS2_JOBS", + "T88x_JS2_TASKS", + "T88x_JS2_ACTIVE", + "", + "T88x_JS2_WAIT_READ", + "T88x_JS2_WAIT_ISSUE", + "T88x_JS2_WAIT_DEPEND", + "T88x_JS2_WAIT_FINISH", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + + /*Tiler */ + "", + "", + "", + "T88x_TI_JOBS_PROCESSED", + "T88x_TI_TRIANGLES", + "T88x_TI_QUADS", + "T88x_TI_POLYGONS", + "T88x_TI_POINTS", + "T88x_TI_LINES", + "T88x_TI_VCACHE_HIT", + "T88x_TI_VCACHE_MISS", + "T88x_TI_FRONT_FACING", + "T88x_TI_BACK_FACING", + "T88x_TI_PRIM_VISIBLE", + "T88x_TI_PRIM_CULLED", + "T88x_TI_PRIM_CLIPPED", + "T88x_TI_LEVEL0", + "T88x_TI_LEVEL1", + "T88x_TI_LEVEL2", + "T88x_TI_LEVEL3", + "T88x_TI_LEVEL4", + "T88x_TI_LEVEL5", + "T88x_TI_LEVEL6", + "T88x_TI_LEVEL7", + "T88x_TI_COMMAND_1", + "T88x_TI_COMMAND_2", + "T88x_TI_COMMAND_3", + "T88x_TI_COMMAND_4", + "T88x_TI_COMMAND_5_7", + "T88x_TI_COMMAND_8_15", + "T88x_TI_COMMAND_16_63", + "T88x_TI_COMMAND_64", + "T88x_TI_COMPRESS_IN", + "T88x_TI_COMPRESS_OUT", + "T88x_TI_COMPRESS_FLUSH", + "T88x_TI_TIMESTAMPS", + "T88x_TI_PCACHE_HIT", + "T88x_TI_PCACHE_MISS", + "T88x_TI_PCACHE_LINE", + "T88x_TI_PCACHE_STALL", + "T88x_TI_WRBUF_HIT", + "T88x_TI_WRBUF_MISS", + "T88x_TI_WRBUF_LINE", + "T88x_TI_WRBUF_PARTIAL", + "T88x_TI_WRBUF_STALL", + "T88x_TI_ACTIVE", + "T88x_TI_LOADING_DESC", + "T88x_TI_INDEX_WAIT", + "T88x_TI_INDEX_RANGE_WAIT", + "T88x_TI_VERTEX_WAIT", + "T88x_TI_PCACHE_WAIT", + "T88x_TI_WRBUF_WAIT", + "T88x_TI_BUS_READ", + "T88x_TI_BUS_WRITE", + "", + "", + "", + "", + "", + "T88x_TI_UTLB_HIT", + "T88x_TI_UTLB_NEW_MISS", + "T88x_TI_UTLB_REPLAY_FULL", + "T88x_TI_UTLB_REPLAY_MISS", + "T88x_TI_UTLB_STALL", + + /* Shader Core */ + "", + "", + "", + "", + "T88x_FRAG_ACTIVE", + "T88x_FRAG_PRIMITIVES", + "T88x_FRAG_PRIMITIVES_DROPPED", + "T88x_FRAG_CYCLES_DESC", + "T88x_FRAG_CYCLES_FPKQ_ACTIVE", + "T88x_FRAG_CYCLES_VERT", + "T88x_FRAG_CYCLES_TRISETUP", + "T88x_FRAG_CYCLES_EZS_ACTIVE", + "T88x_FRAG_THREADS", + "T88x_FRAG_DUMMY_THREADS", + "T88x_FRAG_QUADS_RAST", + "T88x_FRAG_QUADS_EZS_TEST", + "T88x_FRAG_QUADS_EZS_KILLED", + "T88x_FRAG_THREADS_LZS_TEST", + "T88x_FRAG_THREADS_LZS_KILLED", + "T88x_FRAG_CYCLES_NO_TILE", + "T88x_FRAG_NUM_TILES", + "T88x_FRAG_TRANS_ELIM", + "T88x_COMPUTE_ACTIVE", + "T88x_COMPUTE_TASKS", + "T88x_COMPUTE_THREADS", + "T88x_COMPUTE_CYCLES_DESC", + "T88x_TRIPIPE_ACTIVE", + "T88x_ARITH_WORDS", + "T88x_ARITH_CYCLES_REG", + "T88x_ARITH_CYCLES_L0", + "T88x_ARITH_FRAG_DEPEND", + "T88x_LS_WORDS", + "T88x_LS_ISSUES", + "T88x_LS_REISSUE_ATTR", + "T88x_LS_REISSUES_VARY", + "T88x_LS_VARY_RV_MISS", + "T88x_LS_VARY_RV_HIT", + "T88x_LS_NO_UNPARK", + "T88x_TEX_WORDS", + "T88x_TEX_BUBBLES", + "T88x_TEX_WORDS_L0", + "T88x_TEX_WORDS_DESC", + "T88x_TEX_ISSUES", + "T88x_TEX_RECIRC_FMISS", + "T88x_TEX_RECIRC_DESC", + "T88x_TEX_RECIRC_MULTI", + "T88x_TEX_RECIRC_PMISS", + "T88x_TEX_RECIRC_CONF", + "T88x_LSC_READ_HITS", + "T88x_LSC_READ_OP", + "T88x_LSC_WRITE_HITS", + "T88x_LSC_WRITE_OP", + "T88x_LSC_ATOMIC_HITS", + "T88x_LSC_ATOMIC_OP", + "T88x_LSC_LINE_FETCHES", + "T88x_LSC_DIRTY_LINE", + "T88x_LSC_SNOOPS", + "T88x_AXI_TLB_STALL", + "T88x_AXI_TLB_MIESS", + "T88x_AXI_TLB_TRANSACTION", + "T88x_LS_TLB_MISS", + "T88x_LS_TLB_HIT", + "T88x_AXI_BEATS_READ", + "T88x_AXI_BEATS_WRITTEN", + + /*L2 and MMU */ + "", + "", + "", + "", + "T88x_MMU_HIT", + "T88x_MMU_NEW_MISS", + "T88x_MMU_REPLAY_FULL", + "T88x_MMU_REPLAY_MISS", + "T88x_MMU_TABLE_WALK", + "T88x_MMU_REQUESTS", + "", + "", + "T88x_UTLB_HIT", + "T88x_UTLB_NEW_MISS", + "T88x_UTLB_REPLAY_FULL", + "T88x_UTLB_REPLAY_MISS", + "T88x_UTLB_STALL", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "T88x_L2_EXT_WRITE_BEATS", + "T88x_L2_EXT_READ_BEATS", + "T88x_L2_ANY_LOOKUP", + "T88x_L2_READ_LOOKUP", + "T88x_L2_SREAD_LOOKUP", + "T88x_L2_READ_REPLAY", + "T88x_L2_READ_SNOOP", + "T88x_L2_READ_HIT", + "T88x_L2_CLEAN_MISS", + "T88x_L2_WRITE_LOOKUP", + "T88x_L2_SWRITE_LOOKUP", + "T88x_L2_WRITE_REPLAY", + "T88x_L2_WRITE_SNOOP", + "T88x_L2_WRITE_HIT", + "T88x_L2_EXT_READ_FULL", + "", + "T88x_L2_EXT_WRITE_FULL", + "T88x_L2_EXT_R_W_HAZARD", + "T88x_L2_EXT_READ", + "T88x_L2_EXT_READ_LINE", + "T88x_L2_EXT_WRITE", + "T88x_L2_EXT_WRITE_LINE", + "T88x_L2_EXT_WRITE_SMALL", + "T88x_L2_EXT_BARRIER", + "T88x_L2_EXT_AR_STALL", + "T88x_L2_EXT_R_BUF_FULL", + "T88x_L2_EXT_RD_BUF_FULL", + "T88x_L2_EXT_R_RAW", + "T88x_L2_EXT_W_STALL", + "T88x_L2_EXT_W_BUF_FULL", + "T88x_L2_EXT_R_BUF_FULL", + "T88x_L2_TAG_HAZARD", + "T88x_L2_SNOOP_FULL", + "T88x_L2_REPLAY_FULL" +}; + +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c new file mode 100644 index 00000000000..ca264049653 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.c @@ -0,0 +1,98 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase_gpu_memory_debugfs.h> + +#ifdef CONFIG_DEBUG_FS +/** Show callback for the @c gpu_memory debugfs file. + * + * This function is called to get the contents of the @c gpu_memory debugfs + * file. This is a report of current gpu memory usage. + * + * @param sfile The debugfs entry + * @param data Data associated with the entry + * + * @return 0 if successfully prints data in debugfs entry file + * -1 if it encountered an error + */ + +static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data) +{ + ssize_t ret = 0; + struct list_head *entry; + const struct list_head *kbdev_list; + + kbdev_list = kbase_dev_list_get(); + list_for_each(entry, kbdev_list) { + struct kbase_device *kbdev = NULL; + struct kbasep_kctx_list_element *element; + + kbdev = list_entry(entry, struct kbase_device, entry); + /* output the total memory usage and cap for this device */ + ret = seq_printf(sfile, "%-16s %10u\n", + kbdev->devname, + atomic_read(&(kbdev->memdev.used_pages))); + mutex_lock(&kbdev->kctx_list_lock); + list_for_each_entry(element, &kbdev->kctx_list, link) { + /* output the memory usage and cap for each kctx + * opened on this device */ + ret = seq_printf(sfile, " %s-0x%p %10u\n", + "kctx", + element->kctx, + atomic_read(&(element->kctx->used_pages))); + } + mutex_unlock(&kbdev->kctx_list_lock); + } + kbase_dev_list_put(kbdev_list); + return ret; +} + +/* + * File operations related to debugfs entry for gpu_memory + */ +static int kbasep_gpu_memory_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_gpu_memory_seq_show , NULL); +} + +static const struct file_operations kbasep_gpu_memory_debugfs_fops = { + .open = kbasep_gpu_memory_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * Initialize debugfs entry for gpu_memory + */ +void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("gpu_memory", S_IRUGO, + kbdev->mali_debugfs_directory, NULL, + &kbasep_gpu_memory_debugfs_fops); + return; +} + +#else +/* + * Stub functions for when debugfs is disabled + */ +void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) +{ + return; +} +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h new file mode 100644 index 00000000000..3cf30a4e767 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpu_memory_debugfs.h @@ -0,0 +1,38 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_gpu_memory_debugfs.h + * Header file for gpu_memory entry in debugfs + * + */ + +#ifndef _KBASE_GPU_MEMORY_H +#define _KBASE_GPU_MEMORY_H + +#include <mali_kbase.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +/** + * @brief Initialize gpu_memory debugfs entry + */ +void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev); + +#endif /*_KBASE_GPU_MEMORY_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c new file mode 100644 index 00000000000..4276665fbc9 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.c @@ -0,0 +1,296 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Base kernel property query APIs + */ + +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#include <mali_kbase_gpuprops.h> +#include <mali_kbase_config_defaults.h> +#include <mali_kbase_hwaccess_gpuprops.h> +#include <linux/clk.h> + +/** + * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield. + * @value: The value from which to extract bits. + * @offset: The first bit to extract (0 being the LSB). + * @size: The number of bits to extract. + * + * Context: @offset + @size <= 32. + * + * Return: Bits [@offset, @offset + @size) from @value. + */ +/* from mali_cdsb.h */ +#define KBASE_UBFX32(value, offset, size) \ + (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1)) + +int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props) +{ + kbase_gpu_clk_speed_func get_gpu_speed_mhz; + u32 gpu_speed_mhz; + int rc = 1; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != kbase_props); + + /* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function. + * If that function fails, or the function is not provided by the system integrator, we report the maximum + * GPU speed as specified by GPU_FREQ_KHZ_MAX. + */ + get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC; + if (get_gpu_speed_mhz != NULL) { + rc = get_gpu_speed_mhz(&gpu_speed_mhz); +#ifdef CONFIG_MALI_DEBUG + /* Issue a warning message when the reported GPU speed falls outside the min/max range */ + if (rc == 0) { + u32 gpu_speed_khz = gpu_speed_mhz * 1000; + + if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min || + gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max) + dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n", + (unsigned long)gpu_speed_khz, + (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min, + (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max); + } +#endif /* CONFIG_MALI_DEBUG */ + } + if (kctx->kbdev->clock) { + gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000; + rc = 0; + } + if (rc != 0) + gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000; + + kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz; + + memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props)); + + /* Before API 8.2 they expect L3 cache info here, which was always 0 */ + if (kctx->api_version < KBASE_API_VERSION(8, 2)) + kbase_props->props.raw_props.suspend_size = 0; + + + return 0; +} + +static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props) +{ + struct mali_base_gpu_coherent_group *current_group; + u64 group_present; + u64 group_mask; + u64 first_set, first_set_prev; + u32 num_groups = 0; + + KBASE_DEBUG_ASSERT(NULL != props); + + props->coherency_info.coherency = props->raw_props.mem_features; + props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); + + if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { + /* Group is l2 coherent */ + group_present = props->raw_props.l2_present; + } else { + /* Group is l1 coherent */ + group_present = props->raw_props.shader_present; + } + + /* + * The coherent group mask can be computed from the l2 present + * register. + * + * For the coherent group n: + * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1) + * where first_set is group_present with only its nth set-bit kept + * (i.e. the position from where a new group starts). + * + * For instance if the groups are l2 coherent and l2_present=0x0..01111: + * The first mask is: + * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1) + * = (0x0..010 - 1) & ~(0x0..01 - 1) + * = 0x0..00f + * The second mask is: + * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1) + * = (0x0..100 - 1) & ~(0x0..010 - 1) + * = 0x0..0f0 + * And so on until all the bits from group_present have been cleared + * (i.e. there is no group left). + */ + + current_group = props->coherency_info.group; + first_set = group_present & ~(group_present - 1); + + while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) { + group_present -= first_set; /* Clear the current group bit */ + first_set_prev = first_set; + + first_set = group_present & ~(group_present - 1); + group_mask = (first_set - 1) & ~(first_set_prev - 1); + + /* Populate the coherent_group structure for each group */ + current_group->core_mask = group_mask & props->raw_props.shader_present; + current_group->num_cores = hweight64(current_group->core_mask); + + num_groups++; + current_group++; + } + + if (group_present != 0) + pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS); + + props->coherency_info.num_groups = num_groups; +} + +/** + * kbase_gpuprops_get_props - Get the GPU configuration + * @gpu_props: The &base_gpu_props structure + * @kbdev: The &struct kbase_device structure for the device + * + * Fill the &base_gpu_props structure with values from the GPU configuration + * registers. Only the raw properties are filled in this function + */ +static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) +{ + struct kbase_gpuprops_regdump regdump; + int i; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != gpu_props); + + /* Dump relevant registers */ + kbase_backend_gpuprops_get(kbdev, ®dump); + + gpu_props->raw_props.gpu_id = regdump.gpu_id; + gpu_props->raw_props.tiler_features = regdump.tiler_features; + gpu_props->raw_props.mem_features = regdump.mem_features; + gpu_props->raw_props.mmu_features = regdump.mmu_features; + gpu_props->raw_props.l2_features = regdump.l2_features; + gpu_props->raw_props.suspend_size = regdump.suspend_size; + + gpu_props->raw_props.as_present = regdump.as_present; + gpu_props->raw_props.js_present = regdump.js_present; + gpu_props->raw_props.shader_present = ((u64) regdump.shader_present_hi << 32) + regdump.shader_present_lo; + gpu_props->raw_props.tiler_present = ((u64) regdump.tiler_present_hi << 32) + regdump.tiler_present_lo; + gpu_props->raw_props.l2_present = ((u64) regdump.l2_present_hi << 32) + regdump.l2_present_lo; + + for (i = 0; i < GPU_MAX_JOB_SLOTS; i++) + gpu_props->raw_props.js_features[i] = regdump.js_features[i]; + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + gpu_props->raw_props.texture_features[i] = regdump.texture_features[i]; + + gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size; + gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads; + gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size; + gpu_props->raw_props.thread_features = regdump.thread_features; + +} + +/** + * kbase_gpuprops_calculate_props - Calculate the derived properties + * @gpu_props: The &base_gpu_props structure + * @kbdev: The &struct kbase_device structure for the device + * + * Fill the &base_gpu_props structure with values derived from the GPU + * configuration registers + */ +static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev) +{ + int i; + + /* Populate the base_gpu_props structure */ + gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4); + gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8); + gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4); + gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16); + gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2; + gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT; + + for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++) + gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i]; + + gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8); + gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8); + gpu_props->l2_props.num_l2_slices = 1; + if (gpu_props->core_props.product_id == GPU_ID_PI_T76X) + gpu_props->l2_props.num_l2_slices = KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1; + + gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6); + gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4); + + if (gpu_props->raw_props.thread_max_threads == 0) + gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT; + else + gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads; + + if (gpu_props->raw_props.thread_max_workgroup_size == 0) + gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT; + else + gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size; + + if (gpu_props->raw_props.thread_max_barrier_size == 0) + gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT; + else + gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size; + + gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16); + gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8); + gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6); + gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2); + + /* If values are not specified, then use defaults */ + if (gpu_props->thread_props.max_registers == 0) { + gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT; + gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT; + gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT; + } + /* Initialize the coherent_group structure for each group */ + kbase_gpuprops_construct_coherent_groups(gpu_props); +} + +void kbase_gpuprops_set(struct kbase_device *kbdev) +{ + struct kbase_gpu_props *gpu_props; + struct gpu_raw_gpu_props *raw; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + gpu_props = &kbdev->gpu_props; + raw = &gpu_props->props.raw_props; + + /* Initialize the base_gpu_props structure from the hardware */ + kbase_gpuprops_get_props(&gpu_props->props, kbdev); + + /* Populate the derived properties */ + kbase_gpuprops_calculate_props(&gpu_props->props, kbdev); + + /* Populate kbase-only fields */ + gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8); + gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8); + + gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1); + + gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8); + gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8); + + gpu_props->num_cores = hweight64(raw->shader_present); + gpu_props->num_core_groups = hweight64(raw->l2_present); + gpu_props->num_address_spaces = hweight32(raw->as_present); + gpu_props->num_job_slots = hweight32(raw->js_present); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h new file mode 100644 index 00000000000..af97d97bf94 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops.h @@ -0,0 +1,54 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_gpuprops.h + * Base kernel property query APIs + */ + +#ifndef _KBASE_GPUPROPS_H_ +#define _KBASE_GPUPROPS_H_ + +#include "mali_kbase_gpuprops_types.h" + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; + +/** + * @brief Set up Kbase GPU properties. + * + * Set up Kbase GPU properties with information from the GPU registers + * + * @param kbdev The struct kbase_device structure for the device + */ +void kbase_gpuprops_set(struct kbase_device *kbdev); + +/** + * @brief Provide GPU properties to userside through UKU call. + * + * Fill the struct kbase_uk_gpuprops with values from GPU configuration registers. + * + * @param kctx The struct kbase_context structure + * @param kbase_props A copy of the struct kbase_uk_gpuprops structure from userspace + * + * @return 0 on success. Any other value indicates failure. + */ +int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props); + +#endif /* _KBASE_GPUPROPS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h new file mode 100644 index 00000000000..e37e43c2fb0 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_gpuprops_types.h @@ -0,0 +1,91 @@ +/* + * + * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_gpuprops_types.h + * Base kernel property query APIs + */ + +#ifndef _KBASE_GPUPROPS_TYPES_H_ +#define _KBASE_GPUPROPS_TYPES_H_ + +#include "mali_base_kernel.h" + +#define KBASE_GPU_SPEED_MHZ 123 +#define KBASE_GPU_PC_SIZE_LOG2 24U + +struct kbase_gpuprops_regdump { + u32 gpu_id; + u32 l2_features; + u32 suspend_size; /* API 8.2+ */ + u32 tiler_features; + u32 mem_features; + u32 mmu_features; + u32 as_present; + u32 js_present; + u32 thread_max_threads; + u32 thread_max_workgroup_size; + u32 thread_max_barrier_size; + u32 thread_features; + u32 texture_features[BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS]; + u32 js_features[GPU_MAX_JOB_SLOTS]; + u32 shader_present_lo; + u32 shader_present_hi; + u32 tiler_present_lo; + u32 tiler_present_hi; + u32 l2_present_lo; + u32 l2_present_hi; +}; + +struct kbase_gpu_cache_props { + u8 associativity; + u8 external_bus_width; +}; + +struct kbase_gpu_mem_props { + u8 core_group; +}; + +struct kbase_gpu_mmu_props { + u8 va_bits; + u8 pa_bits; +}; + +struct kbase_gpu_props { + /* kernel-only properties */ + u8 num_cores; + u8 num_core_groups; + u8 num_address_spaces; + u8 num_job_slots; + + struct kbase_gpu_cache_props l2_props; + + struct kbase_gpu_mem_props mem; + struct kbase_gpu_mmu_props mmu; + + /** + * Implementation specific irq throttle value (us), should be adjusted during integration. + */ + int irq_throttle_time_us; + + /* Properties shared with userspace */ + base_gpu_props props; +}; + +#endif /* _KBASE_GPUPROPS_TYPES_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.c b/drivers/gpu/arm/midgard/mali_kbase_hw.c new file mode 100644 index 00000000000..1a5a21b3812 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.c @@ -0,0 +1,210 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Run-time work-arounds helpers + */ + +#include <mali_base_hwconfig_features.h> +#include <mali_base_hwconfig_issues.h> +#include <mali_midg_regmap.h> +#include "mali_kbase.h" +#include "mali_kbase_hw.h" + +void kbase_hw_set_features_mask(struct kbase_device *kbdev) +{ + const enum base_hw_feature *features; + u32 gpu_id; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + gpu_id &= GPU_ID_VERSION_PRODUCT_ID; + gpu_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT; + + switch (gpu_id) { + case GPU_ID_PI_TFRX: + /* FALLTHROUGH */ + case GPU_ID_PI_T86X: + features = base_hw_features_tFxx; + break; + case GPU_ID_PI_T83X: + features = base_hw_features_t83x; + break; + case GPU_ID_PI_T82X: + features = base_hw_features_t82x; + break; + case GPU_ID_PI_T76X: + features = base_hw_features_t76x; + break; + case GPU_ID_PI_T72X: + features = base_hw_features_t72x; + break; + case GPU_ID_PI_T62X: + features = base_hw_features_t62x; + break; + case GPU_ID_PI_T60X: + features = base_hw_features_t60x; + break; + default: + features = base_hw_features_generic; + break; + } + + for (; *features != BASE_HW_FEATURE_END; features++) + set_bit(*features, &kbdev->hw_features_mask[0]); +} + +int kbase_hw_set_issues_mask(struct kbase_device *kbdev) +{ + const enum base_hw_issue *issues; + u32 gpu_id; + u32 impl_tech; + + gpu_id = kbdev->gpu_props.props.raw_props.gpu_id; + impl_tech = kbdev->gpu_props.props.thread_props.impl_tech; + + if (impl_tech != IMPLEMENTATION_MODEL) { + switch (gpu_id) { + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_15DEV0): + issues = base_hw_issues_t60x_r0p0_15dev0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 0, GPU_ID_S_EAC): + issues = base_hw_issues_t60x_r0p0_eac; + break; + case GPU_ID_MAKE(GPU_ID_PI_T60X, 0, 1, 0): + issues = base_hw_issues_t60x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 0, 1, 0): + issues = base_hw_issues_t62x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 0, 1): + issues = base_hw_issues_t62x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T62X, 1, 1, 0): + issues = base_hw_issues_t62x_r1p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 0, 1): + issues = base_hw_issues_t76x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 1): + issues = base_hw_issues_t76x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 1, 9): + issues = base_hw_issues_t76x_r0p1_50rel0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 2, 1): + issues = base_hw_issues_t76x_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 0, 3, 1): + issues = base_hw_issues_t76x_r0p3; + break; + case GPU_ID_MAKE(GPU_ID_PI_T76X, 1, 0, 0): + issues = base_hw_issues_t76x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 0): + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 1): + case GPU_ID_MAKE(GPU_ID_PI_T72X, 0, 0, 2): + issues = base_hw_issues_t72x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 0, 0): + issues = base_hw_issues_t72x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T72X, 1, 1, 0): + issues = base_hw_issues_t72x_r1p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 1, 2): + issues = base_hw_issues_tFRx_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 0, 2, 0): + issues = base_hw_issues_tFRx_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 1, 0, 0): + issues = base_hw_issues_tFRx_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_TFRX, 2, 0, 0): + issues = base_hw_issues_tFRx_r2p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T86X, 0, 2, 0): + issues = base_hw_issues_t86x_r0p2; + break; + case GPU_ID_MAKE(GPU_ID_PI_T86X, 1, 0, 0): + issues = base_hw_issues_t86x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T86X, 2, 0, 0): + issues = base_hw_issues_t86x_r2p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T83X, 0, 1, 0): + issues = base_hw_issues_t83x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T83X, 1, 0, 0): + issues = base_hw_issues_t83x_r1p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 0, 0): + issues = base_hw_issues_t82x_r0p0; + break; + case GPU_ID_MAKE(GPU_ID_PI_T82X, 0, 1, 0): + issues = base_hw_issues_t82x_r0p1; + break; + case GPU_ID_MAKE(GPU_ID_PI_T82X, 1, 0, 0): + issues = base_hw_issues_t82x_r1p0; + break; + default: + dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + } else { + /* Software model */ + switch (gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT) { + case GPU_ID_PI_T60X: + issues = base_hw_issues_model_t60x; + break; + case GPU_ID_PI_T62X: + issues = base_hw_issues_model_t62x; + break; + case GPU_ID_PI_T72X: + issues = base_hw_issues_model_t72x; + break; + case GPU_ID_PI_T76X: + issues = base_hw_issues_model_t76x; + break; + case GPU_ID_PI_TFRX: + issues = base_hw_issues_model_tFRx; + break; + case GPU_ID_PI_T86X: + issues = base_hw_issues_model_t86x; + break; + case GPU_ID_PI_T83X: + issues = base_hw_issues_model_t83x; + break; + case GPU_ID_PI_T82X: + issues = base_hw_issues_model_t82x; + break; + default: + dev_err(kbdev->dev, "Unknown GPU ID %x", gpu_id); + return -EINVAL; + } + } + + dev_info(kbdev->dev, "GPU identified as 0x%04x r%dp%d status %d", (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> GPU_ID_VERSION_PRODUCT_ID_SHIFT, (gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT, (gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT, (gpu_id & GPU_ID_VERSION_STATUS) >> GPU_ID_VERSION_STATUS_SHIFT); + + for (; *issues != BASE_HW_ISSUE_END; issues++) + set_bit(*issues, &kbdev->hw_issues_mask[0]); + + return 0; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_hw.h b/drivers/gpu/arm/midgard/mali_kbase_hw.h new file mode 100644 index 00000000000..fce7d29558e --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hw.h @@ -0,0 +1,52 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file + * Run-time work-arounds helpers + */ + +#ifndef _KBASE_HW_H_ +#define _KBASE_HW_H_ + +#include "mali_kbase_defs.h" + +/** + * @brief Tell whether a work-around should be enabled + */ +#define kbase_hw_has_issue(kbdev, issue)\ + test_bit(issue, &(kbdev)->hw_issues_mask[0]) + +/** + * @brief Tell whether a feature is supported + */ +#define kbase_hw_has_feature(kbdev, feature)\ + test_bit(feature, &(kbdev)->hw_features_mask[0]) + +/** + * @brief Set the HW issues mask depending on the GPU ID + */ +int kbase_hw_set_issues_mask(struct kbase_device *kbdev); + +/** + * @brief Set the features mask depending on the GPU ID + */ +void kbase_hw_set_features_mask(struct kbase_device *kbdev); + +#endif /* _KBASE_HW_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h new file mode 100644 index 00000000000..b09be99e6b4 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_backend.h @@ -0,0 +1,54 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * HW access backend common APIs + */ + +#ifndef _KBASE_HWACCESS_BACKEND_H_ +#define _KBASE_HWACCESS_BACKEND_H_ + +/** + * kbase_backend_early_init - Perform any backend-specific initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +int kbase_backend_early_init(struct kbase_device *kbdev); + +/** + * kbase_backend_late_init - Perform any backend-specific initialization. + * @kbdev: Device pointer + * + * Return: 0 on success, or an error code on failure. + */ +int kbase_backend_late_init(struct kbase_device *kbdev); + +/** + * kbase_backend_early_term - Perform any backend-specific termination. + * @kbdev: Device pointer + */ +void kbase_backend_early_term(struct kbase_device *kbdev); + +/** + * kbase_backend_late_term - Perform any backend-specific termination. + * @kbdev: Device pointer + */ +void kbase_backend_late_term(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_BACKEND_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h new file mode 100644 index 00000000000..261453e8f1a --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_defs.h @@ -0,0 +1,37 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/** + * @file mali_kbase_hwaccess_gpu_defs.h + * HW access common definitions + */ + +#ifndef _KBASE_HWACCESS_DEFS_H_ +#define _KBASE_HWACCESS_DEFS_H_ + +#include <mali_kbase_jm_defs.h> + +/* The kbasep_js_device_data::runpool_irq::lock (a spinlock) must be held when + * accessing this structure */ +struct kbase_hwaccess_data { + struct kbase_context *active_kctx; + + struct kbase_backend_data backend; +}; + +#endif /* _KBASE_HWACCESS_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h new file mode 100644 index 00000000000..f93ca9d8680 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_gpuprops.h @@ -0,0 +1,35 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/** + * Base kernel property query backend APIs + */ + +#ifndef _KBASE_HWACCESS_GPUPROPS_H_ +#define _KBASE_HWACCESS_GPUPROPS_H_ + +/** + * kbase_backend_gpuprops_get() - Fill @regdump with GPU properties read from + * GPU + * @kbdev: Device pointer + * @regdump: Pointer to struct kbase_gpuprops_regdump structure + */ +void kbase_backend_gpuprops_get(struct kbase_device *kbdev, + struct kbase_gpuprops_regdump *regdump); + +#endif /* _KBASE_HWACCESS_GPUPROPS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h new file mode 100644 index 00000000000..5de2b7535bb --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_instr.h @@ -0,0 +1,116 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * HW Access instrumentation common APIs + */ + +#ifndef _KBASE_HWACCESS_INSTR_H_ +#define _KBASE_HWACCESS_INSTR_H_ + +#include <mali_kbase_instr_defs.h> + +/** + * kbase_instr_hwcnt_enable_internal - Enable HW counters collection + * @kbdev: Kbase device + * @kctx: Kbase context + * @setup: HW counter setup parameters + * + * Context: might sleep, waiting for reset to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *setup); + +/** + * kbase_instr_hwcnt_disable_internal - Disable HW counters collection + * @kctx: Kbase context + * + * Context: might sleep, waiting for an ongoing dump to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_request_dump() - Request HW counter dump from GPU + * @kctx: Kbase context + * + * Caller must either wait for kbase_instr_hwcnt_dump_complete() to return true, + * of call kbase_instr_hwcnt_wait_for_dump(). + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_request_dump(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_wait_for_dump() - Wait until pending HW counter dump has + * completed. + * @kctx: Kbase context + * + * Context: will sleep, waiting for dump to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_wait_for_dump(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_dump_complete - Tell whether the HW counters dump has + * completed + * @kctx: Kbase context + * @success: Set to true if successful + * + * Context: does not sleep. + * + * Return: true if the dump is complete + */ +bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx, + bool * const success); + +/** + * kbase_instr_hwcnt_clear() - Clear HW counters + * @kctx: Kbase context + * + * Context: might sleep, waiting for reset to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_clear(struct kbase_context *kctx); + +/** + * kbase_instr_backend_init() - Initialise the instrumentation backend + * @kbdev: Kbase device + * + * This function should be called during driver initialization. + * + * Return: 0 on success + */ +int kbase_instr_backend_init(struct kbase_device *kbdev); + +/** + * kbase_instr_backend_init() - Terminate the instrumentation backend + * @kbdev: Kbase device + * + * This function should be called during driver termination. + */ +void kbase_instr_backend_term(struct kbase_device *kbdev); + +#endif /* _KBASE_HWACCESS_INSTR_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h new file mode 100644 index 00000000000..a2fdf247881 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_jm.h @@ -0,0 +1,283 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * HW access job manager common APIs + */ + +#ifndef _KBASE_HWACCESS_JM_H_ +#define _KBASE_HWACCESS_JM_H_ + +/** + * kbase_backend_run_atom() - Run an atom on the GPU + * @kbdev: Device pointer + * @atom: Atom to run + * + * Caller must hold the HW access lock + */ +void kbase_backend_run_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_backend_find_free_address_space() - Find a free address space. + * @kbdev: Device pointer + * @kctx: Context pointer + * + * If no address spaces are currently free, then this function can evict an + * idle context from the runpool, freeing up the address space it was using. + * + * The address space is marked as in use. The caller must either assign a + * context using kbase_gpu_use_ctx(), or release it using + * kbase_gpu_release_free_address_space() + * + * Return: Number of free address space, or KBASEP_AS_NR_INVALID if none + * available + */ +int kbase_backend_find_free_address_space(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_backend_release_free_address_space() - Release an address space. + * @kbdev: Device pointer + * @as_nr: Address space to release + * + * The address space must have been returned by + * kbase_gpu_find_free_address_space(). + */ +void kbase_backend_release_free_address_space(struct kbase_device *kbdev, + int as_nr); + +/** + * kbase_backend_use_ctx() - Activate a currently unscheduled context, using the + * provided address space. + * @kbdev: Device pointer + * @kctx: Context pointer. May be NULL + * @as_nr: Free address space to use + * + * kbase_gpu_next_job() will pull atoms from the active context. + * + * Return: true if successful, false if ASID not assigned. If kctx->as_pending + * is true then ASID assignment will complete at some point in the + * future and will re-start scheduling, otherwise no ASIDs are available + */ +bool kbase_backend_use_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, + int as_nr); + +/** + * kbase_backend_use_ctx_sched() - Activate a context. + * @kbdev: Device pointer + * @kctx: Context pointer + * + * kbase_gpu_next_job() will pull atoms from the active context. + * + * The context must already be scheduled and assigned to an address space. If + * the context is not scheduled, then kbase_gpu_use_ctx() should be used + * instead. + * + * Caller must hold runpool_irq.lock + * + * Return: true if context is now active, false otherwise (ie if context does + * not have an address space assigned) + */ +bool kbase_backend_use_ctx_sched(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_backend_release_ctx_irq - Release a context from the GPU. This will + * de-assign the assigned address space. + * @kbdev: Device pointer + * @kctx: Context pointer + * + * Caller must hold as->transaction_mutex and runpool_irq.lock + */ +void kbase_backend_release_ctx_irq(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_backend_release_ctx_noirq - Release a context from the GPU. This will + * de-assign the assigned address space. + * @kbdev: Device pointer + * @kctx: Context pointer + * + * Caller must hold as->transaction_mutex + * + * This function must perform any operations that could not be performed in IRQ + * context by kbase_backend_release_ctx_irq(). + */ +void kbase_backend_release_ctx_noirq(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * kbase_backend_complete_wq() - Perform backend-specific actions required on + * completing an atom. + * @kbdev: Device pointer + * @katom: Pointer to the atom to complete + * + * This function should only be called from jd_done_worker(). + * + * Return: true if atom has completed, false if atom should be re-submitted + */ +void kbase_backend_complete_wq(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_backend_reset() - The GPU is being reset. Cancel all jobs on the GPU + * and remove any others from the ringbuffers. + * @kbdev: Device pointer + * @end_timestamp: Timestamp of reset + */ +void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp); + +/** + * kbase_backend_inspect_head() - Return the atom currently at the head of slot + * @js + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Atom currently at the head of slot @js, or NULL + */ +struct kbase_jd_atom *kbase_backend_inspect_head(struct kbase_device *kbdev, + int js); + +/** + * kbase_backend_inspect_tail - Return the atom currently at the tail of slot + * @js + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Atom currently at the head of slot @js, or NULL + */ +struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev, + int js); + +/** + * kbase_backend_nr_atoms_on_slot() - Return the number of atoms currently on a + * slot. + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Number of atoms currently on slot + */ +int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js); + +/** + * kbase_backend_nr_atoms_submitted() - Return the number of atoms on a slot + * that are currently on the GPU. + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Number of atoms currently on slot @js that are currently on the GPU. + */ +int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js); + +/** + * kbase_backend_ctx_count_changed() - Number of contexts ready to submit jobs + * has changed. + * @kbdev: Device pointer + * + * Perform any required backend-specific actions (eg starting/stopping + * scheduling timers). + */ +void kbase_backend_ctx_count_changed(struct kbase_device *kbdev); + +/** + * kbase_backend_slot_free() - Return the number of jobs that can be currently + * submitted to slot @js. + * @kbdev: Device pointer + * @js: Job slot to inspect + * + * Return : Number of jobs that can be submitted. + */ +int kbase_backend_slot_free(struct kbase_device *kbdev, int js); + +/** + * kbase_job_check_enter_disjoint - potentially leave disjoint state + * @kbdev: kbase device + * @target_katom: atom which is finishing + * + * Work out whether to leave disjoint state when finishing an atom that was + * originated by kbase_job_check_enter_disjoint(). + */ +void kbase_job_check_leave_disjoint(struct kbase_device *kbdev, + struct kbase_jd_atom *target_katom); + +/** + * kbase_backend_jm_kill_jobs_from_kctx - Kill all jobs that are currently + * running from a context + * @kctx: Context pointer + * + * This is used in response to a page fault to remove all jobs from the faulting + * context from the hardware. + */ +void kbase_backend_jm_kill_jobs_from_kctx(struct kbase_context *kctx); + +/** + * kbase_jm_wait_for_zero_jobs - Wait for context to have zero jobs running, and + * to be descheduled. + * @kctx: Context pointer + * + * This should be called following kbase_js_zap_context(), to ensure the context + * can be safely destroyed. + */ +void kbase_jm_wait_for_zero_jobs(struct kbase_context *kctx); + +#if KBASE_GPU_RESET_EN +/** + * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. + * @kbdev: Device pointer + * + * This function just soft-stops all the slots to ensure that as many jobs as + * possible are saved. + * + * Return: a boolean which should be interpreted as follows: + * - true - Prepared for reset, kbase_reset_gpu should be called. + * - false - Another thread is performing a reset, kbase_reset_gpu should + * not be called. + */ +bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev); + +/** + * kbase_reset_gpu - Reset the GPU + * @kbdev: Device pointer + * + * This function should be called after kbase_prepare_to_reset_gpu if it returns + * true. It should never be called without a corresponding call to + * kbase_prepare_to_reset_gpu. + * + * After this function is called (or not called if kbase_prepare_to_reset_gpu + * returned false), the caller should wait for kbdev->reset_waitq to be + * signalled to know when the reset has completed. + */ +void kbase_reset_gpu(struct kbase_device *kbdev); +#endif + +/** + * kbase_job_slot_hardstop - Hard-stop the specified job slot + * @kctx: The kbase context that contains the job(s) that should + * be hard-stopped + * @js: The job slot to hard-stop + * @target_katom: The job that should be hard-stopped (or NULL for all + * jobs from the context) + * Context: + * The job slot lock must be held when calling this function. + */ +void kbase_job_slot_hardstop(struct kbase_context *kctx, int js, + struct kbase_jd_atom *target_katom); + +#endif /* _KBASE_HWACCESS_JM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h new file mode 100644 index 00000000000..dbdcd3def22 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_pm.h @@ -0,0 +1,206 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/** + * @file mali_kbase_hwaccess_pm.h + * HW access power manager common APIs + */ + +#ifndef _KBASE_HWACCESS_PM_H_ +#define _KBASE_HWACCESS_PM_H_ + +#include <mali_midg_regmap.h> +#include <linux/atomic.h> + +#include <mali_kbase_pm_defs.h> + +/* Forward definition - see mali_kbase.h */ +struct kbase_device; + +/* Functions common to all HW access backends */ + +/** + * Initialize the power management framework. + * + * Must be called before any other power management function + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * + * @return 0 if the power management framework was successfully + * initialized. + */ +int kbase_hwaccess_pm_init(struct kbase_device *kbdev); + +/** + * Terminate the power management framework. + * + * No power management functions may be called after this (except + * @ref kbase_pm_init) + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_term(struct kbase_device *kbdev); + +/** + * kbase_hwaccess_pm_powerup - Power up the GPU. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @flags: Flags to pass on to kbase_pm_init_hw + * + * Power up GPU after all modules have been initialized and interrupt handlers + * installed. + * + * Return: 0 if powerup was successful. + */ +int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev, + unsigned int flags); + +/** + * Halt the power management framework. + * + * Should ensure that no new interrupts are generated, but allow any currently + * running interrupt handlers to complete successfully. The GPU is forced off by + * the time this function returns, regardless of whether or not the active power + * policy asks for the GPU to be powered off. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_halt(struct kbase_device *kbdev); + +/** + * Perform any backend-specific actions to suspend the GPU + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_suspend(struct kbase_device *kbdev); + +/** + * Perform any backend-specific actions to resume the GPU from a suspend + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_resume(struct kbase_device *kbdev); + +/** + * Perform any required actions for activating the GPU. Called when the first + * context goes active. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev); + +/** + * Perform any required actions for idling the GPU. Called when the last + * context goes idle. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + */ +void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev); + + +/** + * Set the debug core mask. + * + * This determines which cores the power manager is allowed to use. + * + * @param kbdev The kbase device structure for the device (must be a + * valid pointer) + * @param new_core_mask The core mask to use + */ +void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, + u64 new_core_mask); + + +/** + * Get the current policy. + * + * Returns the policy that is currently active. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * + * @return The current policy + */ +const struct kbase_pm_ca_policy +*kbase_pm_ca_get_policy(struct kbase_device *kbdev); + +/** + * Change the policy to the one specified. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param policy The policy to change to (valid pointer returned from + * @ref kbase_pm_ca_list_policies) + */ +void kbase_pm_ca_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_ca_policy *policy); + +/** + * Retrieve a static list of the available policies. + * + * @param[out] policies An array pointer to take the list of policies. This may + * be NULL. The contents of this array must not be + * modified. + * + * @return The number of policies + */ +int +kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies); + + +/** + * Get the current policy. + * + * Returns the policy that is currently active. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * + * @return The current policy + */ +const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev); + +/** + * Change the policy to the one specified. + * + * @param kbdev The kbase device structure for the device (must be a valid + * pointer) + * @param policy The policy to change to (valid pointer returned from + * @ref kbase_pm_list_policies) + */ +void kbase_pm_set_policy(struct kbase_device *kbdev, + const struct kbase_pm_policy *policy); + +/** + * Retrieve a static list of the available policies. + * + * @param[out] policies An array pointer to take the list of policies. This may + * be NULL. The contents of this array must not be + * modified. + * + * @return The number of policies + */ +int kbase_pm_list_policies(const struct kbase_pm_policy * const **policies); + +#endif /* _KBASE_HWACCESS_PM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h new file mode 100644 index 00000000000..89d26eaf09a --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_hwaccess_time.h @@ -0,0 +1,53 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/** + * + */ + +#ifndef _KBASE_BACKEND_TIME_H_ +#define _KBASE_BACKEND_TIME_H_ + +/** + * kbase_backend_get_gpu_time() - Get current GPU time + * @kbdev: Device pointer + * @cycle_counter: Pointer to u64 to store cycle counter in + * @system_time: Pointer to u64 to store system time in + * @ts: Pointer to struct timespec to store current monotonic + * time in + */ +void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, + u64 *system_time, struct timespec *ts); + +/** + * kbase_wait_write_flush() - Wait for GPU write flush + * @kctx: Context pointer + * + * Wait 1000 GPU clock cycles. This delay is known to give the GPU time to flush + * its write buffer. + * + * If GPU resets occur then the counters are reset to zero, the delay may not be + * as expected. + * + * This function is only in use for BASE_HW_ISSUE_6367 + */ +#ifndef CONFIG_MALI_NO_MALI +void kbase_wait_write_flush(struct kbase_context *kctx); +#endif + +#endif /* _KBASE_BACKEND_TIME_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.c b/drivers/gpu/arm/midgard/mali_kbase_instr.c new file mode 100644 index 00000000000..9c2043ac948 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_instr.c @@ -0,0 +1,157 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Base kernel instrumentation APIs. + */ + +#include <mali_kbase.h> +#include <mali_midg_regmap.h> + +void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev) +{ + struct kbase_context *kctx; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(!kbdev->hwcnt.suspended_kctx); + + kctx = kbdev->hwcnt.kctx; + kbdev->hwcnt.suspended_kctx = kctx; + + /* Relevant state was saved into hwcnt.suspended_state when enabling the + * counters */ + + if (kctx) { + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_PRIVILEGED); + kbase_instr_hwcnt_disable(kctx); + } +} + +void kbase_instr_hwcnt_resume(struct kbase_device *kbdev) +{ + struct kbase_context *kctx; + + KBASE_DEBUG_ASSERT(kbdev); + + kctx = kbdev->hwcnt.suspended_kctx; + kbdev->hwcnt.suspended_kctx = NULL; + + if (kctx) { + int err; + + err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, + &kbdev->hwcnt.suspended_state); + WARN(err, "Failed to restore instrumented hardware counters on resume\n"); + } +} + +int kbase_instr_hwcnt_enable(struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *setup) +{ + struct kbase_device *kbdev; + bool access_allowed; + int err; + + kbdev = kctx->kbdev; + + /* Determine if the calling task has access to this capability */ + access_allowed = kbase_security_has_capability(kctx, + KBASE_SEC_INSTR_HW_COUNTERS_COLLECT, + KBASE_SEC_FLAG_NOAUDIT); + if (!access_allowed) + return -EINVAL; + + /* Mark the context as active so the GPU is kept turned on */ + /* A suspend won't happen here, because we're in a syscall from a + * userspace thread. */ + kbase_pm_context_active(kbdev); + + /* Schedule the context in */ + kbasep_js_schedule_privileged_ctx(kbdev, kctx); + err = kbase_instr_hwcnt_enable_internal(kbdev, kctx, setup); + if (err) { + /* Release the context. This had its own Power Manager Active + * reference */ + kbasep_js_release_privileged_ctx(kbdev, kctx); + + /* Also release our Power Manager Active reference */ + kbase_pm_context_idle(kbdev); + } + + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_enable); + +int kbase_instr_hwcnt_disable(struct kbase_context *kctx) +{ + int err = -EINVAL; + struct kbase_device *kbdev = kctx->kbdev; + + err = kbase_instr_hwcnt_disable_internal(kctx); + if (err) + goto out; + + /* Release the context. This had its own Power Manager Active reference + */ + kbasep_js_release_privileged_ctx(kbdev, kctx); + + /* Also release our Power Manager Active reference */ + kbase_pm_context_idle(kbdev); + + dev_dbg(kbdev->dev, "HW counters dumping disabled for context %p", + kctx); +out: + return err; +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_disable); + +int kbase_instr_hwcnt_setup(struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *setup) +{ + struct kbase_vinstr_context *vinstr_ctx = kctx->kbdev->vinstr_ctx; + u32 bitmap[4]; + + if (setup == NULL) + return -EINVAL; + + bitmap[SHADER_HWCNT_BM] = setup->shader_bm; + bitmap[TILER_HWCNT_BM] = setup->tiler_bm; + bitmap[MMU_L2_HWCNT_BM] = setup->mmu_l2_bm; + bitmap[JM_HWCNT_BM] = setup->jm_bm; + if (setup->dump_buffer) { + if (kctx->vinstr_cli) + return -EBUSY; + kctx->vinstr_cli = kbase_vinstr_attach_client(vinstr_ctx, false, + setup->dump_buffer, bitmap); + if (!kctx->vinstr_cli) + return -ENOMEM; + } else { + kbase_vinstr_detach_client(vinstr_ctx, kctx->vinstr_cli); + kctx->vinstr_cli = NULL; + } + + return 0; +} + +int kbase_instr_hwcnt_dump(struct kbase_context *kctx) +{ + return kbase_vinstr_dump(kctx->kbdev->vinstr_ctx, kctx->vinstr_cli); +} +KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump); diff --git a/drivers/gpu/arm/midgard/mali_kbase_instr.h b/drivers/gpu/arm/midgard/mali_kbase_instr.h new file mode 100644 index 00000000000..5613453a3c0 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_instr.h @@ -0,0 +1,85 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Instrumentation API definitions + */ + +#ifndef _KBASE_INSTR_H_ +#define _KBASE_INSTR_H_ + +#include <mali_kbase_hwaccess_instr.h> + +/** + * kbase_instr_hwcnt_setup() - Configure HW counters collection + * @kctx: Kbase context + * @setup: &struct kbase_uk_hwcnt_setup containing configuration + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_setup(struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *setup); + +/** + * kbase_instr_hwcnt_enable() - Enable HW counters collection + * @kctx: Kbase context + * @setup: &struct kbase_uk_hwcnt_setup containing configuration + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_enable(struct kbase_context *kctx, + struct kbase_uk_hwcnt_setup *setup); + +/** + * kbase_instr_hwcnt_disable() - Disable HW counters collection + * @kctx: Kbase context + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_disable(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_dump() - Trigger dump of HW counters and wait for + * completion + * @kctx: Kbase context + * + * Context: might sleep, waiting for dump to complete + * + * Return: 0 on success + */ +int kbase_instr_hwcnt_dump(struct kbase_context *kctx); + +/** + * kbase_instr_hwcnt_suspend() - GPU is suspending, stop HW counter collection + * @kbdev: Kbase device + * + * It's assumed that there's only one privileged context. + * + * Safe to do this without lock when doing an OS suspend, because it only + * changes in response to user-space IOCTLs + */ +void kbase_instr_hwcnt_suspend(struct kbase_device *kbdev); + +/** + * kbase_instr_hwcnt_resume() - GPU is resuming, resume HW counter collection + * @kbdev: Kbase device + */ +void kbase_instr_hwcnt_resume(struct kbase_device *kbdev); + +#endif /* _KBASE_INSTR_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd.c b/drivers/gpu/arm/midgard/mali_kbase_jd.c new file mode 100644 index 00000000000..c5a86234049 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_jd.c @@ -0,0 +1,1726 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#if defined(CONFIG_DMA_SHARED_BUFFER) +#include <linux/dma-buf.h> +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ +#ifdef CONFIG_COMPAT +#include <linux/compat.h> +#endif +#include <mali_kbase.h> +#include <mali_kbase_uku.h> +#ifdef CONFIG_UMP +#include <linux/ump.h> +#endif /* CONFIG_UMP */ +#include <linux/random.h> +#include <linux/version.h> +#include <linux/ratelimit.h> + +#include <mali_kbase_jm.h> +#include <mali_kbase_hwaccess_jm.h> + +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include <mali_kbase_tlstream.h> +#endif + +#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) +/* random32 was renamed to prandom_u32 in 3.8 */ +#define prandom_u32 random32 +#endif + +/* Return whether katom will run on the GPU or not. Currently only soft jobs and + * dependency-only atoms do not run on the GPU */ +#define IS_GPU_ATOM(katom) (!((katom->core_req & BASE_JD_REQ_SOFT_JOB) || \ + ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == \ + BASE_JD_REQ_DEP))) +/* + * This is the kernel side of the API. Only entry points are: + * - kbase_jd_submit(): Called from userspace to submit a single bag + * - kbase_jd_done(): Called from interrupt context to track the + * completion of a job. + * Callouts: + * - to the job manager (enqueue a job) + * - to the event subsystem (signals the completion/failure of bag/job-chains). + */ + +static void __user * +get_compat_pointer(struct kbase_context *kctx, const union kbase_pointer *p) +{ +#ifdef CONFIG_COMPAT + if (kctx->is_compat) + return compat_ptr(p->compat_value); +#endif + return p->value; +} + +/* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs + * + * Returns whether the JS needs a reschedule. + * + * Note that the caller must also check the atom status and + * if it is KBASE_JD_ATOM_STATE_COMPLETED must call jd_done_nolock + */ +static int jd_run_atom(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) == BASE_JD_REQ_DEP) { + /* Dependency only atom */ + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + return 0; + } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + /* Soft-job */ + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + == BASE_JD_REQ_SOFT_REPLAY) { + if (!kbase_replay_process(katom)) + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + } else if (kbase_process_soft_job(katom) == 0) { + kbase_finish_soft_job(katom); + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + } else { + /* The job has not completed */ + list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); + } + return 0; + } + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + /* Queue an action about whether we should try scheduling a context */ + return kbasep_js_add_job(kctx, katom); +} + +#ifdef CONFIG_KDS + +/* Add the katom to the kds waiting list. + * Atoms must be added to the waiting list after a successful call to kds_async_waitall. + * The caller must hold the kbase_jd_context.lock */ + +static void kbase_jd_kds_waiters_add(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx; + + KBASE_DEBUG_ASSERT(katom); + + kctx = katom->kctx; + + list_add_tail(&katom->node, &kctx->waiting_kds_resource); +} + +/* Remove the katom from the kds waiting list. + * Atoms must be removed from the waiting list before a call to kds_resource_set_release_sync. + * The supplied katom must first have been added to the list with a call to kbase_jd_kds_waiters_add. + * The caller must hold the kbase_jd_context.lock */ + +static void kbase_jd_kds_waiters_remove(struct kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(katom); + list_del(&katom->node); +} + +static void kds_dep_clear(void *callback_parameter, void *callback_extra_parameter) +{ + struct kbase_jd_atom *katom; + struct kbase_jd_context *ctx; + struct kbase_device *kbdev; + + katom = (struct kbase_jd_atom *)callback_parameter; + KBASE_DEBUG_ASSERT(katom); + ctx = &katom->kctx->jctx; + kbdev = katom->kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + mutex_lock(&ctx->lock); + + /* KDS resource has already been satisfied (e.g. due to zapping) */ + if (katom->kds_dep_satisfied) + goto out; + + /* This atom's KDS dependency has now been met */ + katom->kds_dep_satisfied = true; + + /* Check whether the atom's other dependencies were already met. If + * katom is a GPU atom then the job scheduler may be able to represent + * the dependencies, hence we may attempt to submit it before they are + * met. Other atoms must have had both dependencies resolved */ + if (IS_GPU_ATOM(katom) || + (!kbase_jd_katom_dep_atom(&katom->dep[0]) && + !kbase_jd_katom_dep_atom(&katom->dep[1]))) { + /* katom dep complete, attempt to run it */ + bool resched = false; + + resched = jd_run_atom(katom); + + if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + /* The atom has already finished */ + resched |= jd_done_nolock(katom); + } + + if (resched) + kbase_js_sched_all(kbdev); + } + out: + mutex_unlock(&ctx->lock); +} + +static void kbase_cancel_kds_wait_job(struct kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(katom); + + /* Prevent job_done_nolock from being called twice on an atom when + * there is a race between job completion and cancellation */ + + if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) { + /* Wait was cancelled - zap the atom */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (jd_done_nolock(katom)) + kbase_js_sched_all(katom->kctx->kbdev); + } +} +#endif /* CONFIG_KDS */ + +#ifdef CONFIG_DMA_SHARED_BUFFER +static int kbase_jd_umm_map(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + struct sg_table *sgt; + struct scatterlist *s; + int i; + phys_addr_t *pa; + int err; + size_t count = 0; + struct kbase_mem_phy_alloc *alloc; + + alloc = reg->gpu_alloc; + + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM); + KBASE_DEBUG_ASSERT(NULL == alloc->imported.umm.sgt); + sgt = dma_buf_map_attachment(alloc->imported.umm.dma_attachment, DMA_BIDIRECTIONAL); + + if (IS_ERR_OR_NULL(sgt)) + return -EINVAL; + + /* save for later */ + alloc->imported.umm.sgt = sgt; + + pa = kbase_get_gpu_phy_pages(reg); + KBASE_DEBUG_ASSERT(pa); + + for_each_sg(sgt->sgl, s, sgt->nents, i) { + int j; + size_t pages = PFN_UP(sg_dma_len(s)); + + WARN_ONCE(sg_dma_len(s) & (PAGE_SIZE-1), + "sg_dma_len(s)=%u is not a multiple of PAGE_SIZE\n", + sg_dma_len(s)); + + WARN_ONCE(sg_dma_address(s) & (PAGE_SIZE-1), + "sg_dma_address(s)=%llx is not aligned to PAGE_SIZE\n", + (unsigned long long) sg_dma_address(s)); + + for (j = 0; (j < pages) && (count < reg->nr_pages); j++, count++) + *pa++ = sg_dma_address(s) + (j << PAGE_SHIFT); + WARN_ONCE(j < pages, + "sg list from dma_buf_map_attachment > dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size); + } + + if (WARN_ONCE(count < reg->nr_pages, + "sg list from dma_buf_map_attachment < dma_buf->size=%zu\n", + alloc->imported.umm.dma_buf->size)) { + err = -EINVAL; + goto out; + } + + /* Update nents as we now have pages to map */ + alloc->nents = count; + + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags | KBASE_REG_GPU_WR | KBASE_REG_GPU_RD); + +out: + if (err) { + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + } + + return err; +} + +static void kbase_jd_umm_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(alloc); + KBASE_DEBUG_ASSERT(alloc->imported.umm.dma_attachment); + KBASE_DEBUG_ASSERT(alloc->imported.umm.sgt); + dma_buf_unmap_attachment(alloc->imported.umm.dma_attachment, + alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + alloc->imported.umm.sgt = NULL; + alloc->nents = 0; +} +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +void kbase_jd_free_external_resources(struct kbase_jd_atom *katom) +{ +#ifdef CONFIG_KDS + if (katom->kds_rset) { + struct kbase_jd_context *jctx = &katom->kctx->jctx; + + /* + * As the atom is no longer waiting, remove it from + * the waiting list. + */ + + mutex_lock(&jctx->lock); + kbase_jd_kds_waiters_remove(katom); + mutex_unlock(&jctx->lock); + + /* Release the kds resource or cancel if zapping */ + kds_resource_set_release_sync(&katom->kds_rset); + } +#endif /* CONFIG_KDS */ +} + +static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom) +{ + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + +#ifdef CONFIG_KDS + /* Prevent the KDS resource from triggering the atom in case of zapping */ + if (katom->kds_rset) + katom->kds_dep_satisfied = true; +#endif /* CONFIG_KDS */ + + kbase_gpu_vm_lock(katom->kctx); + /* only roll back if extres is non-NULL */ + if (katom->extres) { + u32 res_no; + + res_no = katom->nr_extres; + while (res_no-- > 0) { + struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; +#ifdef CONFIG_DMA_SHARED_BUFFER + if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + alloc->imported.umm.current_mapping_usage_count--; + + if (0 == alloc->imported.umm.current_mapping_usage_count) { + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); + + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages( + katom->kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + kbase_jd_umm_unmap(katom->kctx, alloc); + } + } +#endif /* CONFIG_DMA_SHARED_BUFFER */ + kbase_mem_phy_alloc_put(katom->extres[res_no].alloc); + } + kfree(katom->extres); + katom->extres = NULL; + } + kbase_gpu_vm_unlock(katom->kctx); +} + +#if (defined(CONFIG_KDS) && defined(CONFIG_UMP)) || defined(CONFIG_DMA_SHARED_BUFFER_USES_KDS) +static void add_kds_resource(struct kds_resource *kds_res, struct kds_resource **kds_resources, u32 *kds_res_count, unsigned long *kds_access_bitmap, bool exclusive) +{ + u32 i; + + for (i = 0; i < *kds_res_count; i++) { + /* Duplicate resource, ignore */ + if (kds_resources[i] == kds_res) + return; + } + + kds_resources[*kds_res_count] = kds_res; + if (exclusive) + set_bit(*kds_res_count, kds_access_bitmap); + (*kds_res_count)++; +} +#endif + +/* + * Set up external resources needed by this job. + * + * jctx.lock must be held when this is called. + */ + +static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom) +{ + int err_ret_val = -EINVAL; + u32 res_no; +#ifdef CONFIG_KDS + u32 kds_res_count = 0; + struct kds_resource **kds_resources = NULL; + unsigned long *kds_access_bitmap = NULL; +#endif /* CONFIG_KDS */ + struct base_external_resource *input_extres; + + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES); + + /* no resources encoded, early out */ + if (!katom->nr_extres) + return -EINVAL; + + katom->extres = kmalloc_array(katom->nr_extres, sizeof(*katom->extres), GFP_KERNEL); + if (NULL == katom->extres) { + err_ret_val = -ENOMEM; + goto early_err_out; + } + + /* copy user buffer to the end of our real buffer. + * Make sure the struct sizes haven't changed in a way + * we don't support */ + BUILD_BUG_ON(sizeof(*input_extres) > sizeof(*katom->extres)); + input_extres = (struct base_external_resource *) + (((unsigned char *)katom->extres) + + (sizeof(*katom->extres) - sizeof(*input_extres)) * + katom->nr_extres); + + if (copy_from_user(input_extres, + get_compat_pointer(katom->kctx, &user_atom->extres_list), + sizeof(*input_extres) * katom->nr_extres) != 0) { + err_ret_val = -EINVAL; + goto early_err_out; + } +#ifdef CONFIG_KDS + /* assume we have to wait for all */ + KBASE_DEBUG_ASSERT(0 != katom->nr_extres); + kds_resources = kmalloc_array(katom->nr_extres, sizeof(struct kds_resource *), GFP_KERNEL); + + if (NULL == kds_resources) { + err_ret_val = -ENOMEM; + goto early_err_out; + } + + KBASE_DEBUG_ASSERT(0 != katom->nr_extres); + kds_access_bitmap = kzalloc(sizeof(unsigned long) * ((katom->nr_extres + BITS_PER_LONG - 1) / BITS_PER_LONG), GFP_KERNEL); + + if (NULL == kds_access_bitmap) { + err_ret_val = -ENOMEM; + goto early_err_out; + } +#endif /* CONFIG_KDS */ + + /* need to keep the GPU VM locked while we set up UMM buffers */ + kbase_gpu_vm_lock(katom->kctx); + for (res_no = 0; res_no < katom->nr_extres; res_no++) { + struct base_external_resource *res; + struct kbase_va_region *reg; + + res = &input_extres[res_no]; + reg = kbase_region_tracker_find_region_enclosing_address(katom->kctx, + res->ext_resource & ~BASE_EXT_RES_ACCESS_EXCLUSIVE); + /* did we find a matching region object? */ + if (NULL == reg || (reg->flags & KBASE_REG_FREE)) { + /* roll back */ + goto failed_loop; + } + + if (!(katom->core_req & BASE_JD_REQ_SOFT_JOB) && + (reg->flags & KBASE_REG_SECURE)) { + katom->atom_flags |= KBASE_KATOM_FLAG_SECURE; + if ((katom->core_req & BASE_JD_REQ_FS) == 0) { + WARN_RATELIMIT(1, "Secure non-fragment jobs not supported"); + goto failed_loop; + } + } + + /* decide what needs to happen for this resource */ + switch (reg->gpu_alloc->type) { + case BASE_TMEM_IMPORT_TYPE_UMP: + { +#if defined(CONFIG_KDS) && defined(CONFIG_UMP) + struct kds_resource *kds_res; + + kds_res = ump_dd_kds_resource_get(reg->gpu_alloc->imported.ump_handle); + if (kds_res) + add_kds_resource(kds_res, kds_resources, &kds_res_count, + kds_access_bitmap, + res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); +#endif /*defined(CONFIG_KDS) && defined(CONFIG_UMP) */ + break; + } +#ifdef CONFIG_DMA_SHARED_BUFFER + case BASE_TMEM_IMPORT_TYPE_UMM: + { +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + struct kds_resource *kds_res; + + kds_res = get_dma_buf_kds_resource(reg->gpu_alloc->imported.umm.dma_buf); + if (kds_res) + add_kds_resource(kds_res, kds_resources, &kds_res_count, kds_access_bitmap, res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE); +#endif + reg->gpu_alloc->imported.umm.current_mapping_usage_count++; + if (1 == reg->gpu_alloc->imported.umm.current_mapping_usage_count) { + /* use a local variable to not pollute err_ret_val + * with a potential success value as some other gotos depend + * on the default error code stored in err_ret_val */ + int tmp; + + tmp = kbase_jd_umm_map(katom->kctx, reg); + if (tmp) { + /* failed to map this buffer, roll back */ + err_ret_val = tmp; + reg->gpu_alloc->imported.umm.current_mapping_usage_count--; + goto failed_loop; + } + } + break; + } +#endif + default: + goto failed_loop; + } + + /* finish with updating out array with the data we found */ + /* NOTE: It is important that this is the last thing we do (or + * at least not before the first write) as we overwrite elements + * as we loop and could be overwriting ourself, so no writes + * until the last read for an element. + * */ + katom->extres[res_no].gpu_address = reg->start_pfn << PAGE_SHIFT; /* save the start_pfn (as an address, not pfn) to use fast lookup later */ + katom->extres[res_no].alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + } + /* successfully parsed the extres array */ + /* drop the vm lock before we call into kds */ + kbase_gpu_vm_unlock(katom->kctx); + +#ifdef CONFIG_KDS + if (kds_res_count) { + int wait_failed; + + /* We have resources to wait for with kds */ + katom->kds_dep_satisfied = false; + + wait_failed = kds_async_waitall(&katom->kds_rset, + &katom->kctx->jctx.kds_cb, katom, NULL, + kds_res_count, kds_access_bitmap, + kds_resources); + + if (wait_failed) + goto failed_kds_setup; + else + kbase_jd_kds_waiters_add(katom); + } else { + /* Nothing to wait for, so kds dep met */ + katom->kds_dep_satisfied = true; + } + kfree(kds_resources); + kfree(kds_access_bitmap); +#endif /* CONFIG_KDS */ + + /* all done OK */ + return 0; + +/* error handling section */ + +#ifdef CONFIG_KDS + failed_kds_setup: + + /* lock before we unmap */ + kbase_gpu_vm_lock(katom->kctx); +#endif /* CONFIG_KDS */ + + failed_loop: + /* undo the loop work */ + while (res_no-- > 0) { + struct kbase_mem_phy_alloc *alloc = katom->extres[res_no].alloc; +#ifdef CONFIG_DMA_SHARED_BUFFER + if (alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) { + alloc->imported.umm.current_mapping_usage_count--; + + if (0 == alloc->imported.umm.current_mapping_usage_count) { + struct kbase_va_region *reg; + + reg = kbase_region_tracker_find_region_base_address( + katom->kctx, + katom->extres[res_no].gpu_address); + + if (reg && reg->gpu_alloc == alloc) + kbase_mmu_teardown_pages(katom->kctx, + reg->start_pfn, + kbase_reg_current_backed_size(reg)); + + kbase_jd_umm_unmap(katom->kctx, alloc); + } + } +#endif /* CONFIG_DMA_SHARED_BUFFER */ + kbase_mem_phy_alloc_put(alloc); + } + kbase_gpu_vm_unlock(katom->kctx); + + early_err_out: + kfree(katom->extres); + katom->extres = NULL; +#ifdef CONFIG_KDS + kfree(kds_resources); + kfree(kds_access_bitmap); +#endif /* CONFIG_KDS */ + return err_ret_val; +} + +static inline void jd_resolve_dep(struct list_head *out_list, + struct kbase_jd_atom *katom, + u8 d, + bool ctx_is_dying) +{ + u8 other_d = !d; + + while (!list_empty(&katom->dep_head[d])) { + struct kbase_jd_atom *dep_atom; + u8 dep_type; + + dep_atom = list_entry(katom->dep_head[d].next, + struct kbase_jd_atom, dep_item[d]); + + list_del(katom->dep_head[d].next); + + dep_type = kbase_jd_katom_dep_type(&dep_atom->dep[d]); + kbase_jd_katom_dep_clear(&dep_atom->dep[d]); + + if (katom->event_code != BASE_JD_EVENT_DONE && + (dep_type != BASE_JD_DEP_TYPE_ORDER || ctx_is_dying)) { + /* Atom failed, so remove the other dependencies and immediately fail the atom */ + if (kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { + list_del(&dep_atom->dep_item[other_d]); + kbase_jd_katom_dep_clear(&dep_atom->dep[other_d]); + } +#ifdef CONFIG_KDS + if (!dep_atom->kds_dep_satisfied) { + /* Just set kds_dep_satisfied to true. If the callback happens after this then it will early out and + * do nothing. If the callback doesn't happen then kbase_jd_post_external_resources will clean up + */ + dep_atom->kds_dep_satisfied = true; + } +#endif + + dep_atom->event_code = katom->event_code; + KBASE_DEBUG_ASSERT(dep_atom->status != + KBASE_JD_ATOM_STATE_UNUSED); + dep_atom->status = KBASE_JD_ATOM_STATE_COMPLETED; + + list_add_tail(&dep_atom->dep_item[0], out_list); + } else if (!kbase_jd_katom_dep_atom(&dep_atom->dep[other_d])) { +#ifdef CONFIG_KDS + if (dep_atom->kds_dep_satisfied) +#endif + list_add_tail(&dep_atom->dep_item[0], out_list); + } + } +} + +KBASE_EXPORT_TEST_API(jd_resolve_dep); + +#if MALI_CUSTOMER_RELEASE == 0 +static void jd_force_failure(struct kbase_device *kbdev, struct kbase_jd_atom *katom) +{ + kbdev->force_replay_count++; + + if (kbdev->force_replay_count >= kbdev->force_replay_limit) { + kbdev->force_replay_count = 0; + katom->event_code = BASE_JD_EVENT_FORCE_REPLAY; + + if (kbdev->force_replay_random) + kbdev->force_replay_limit = + (prandom_u32() % KBASEP_FORCE_REPLAY_RANDOM_LIMIT) + 1; + + dev_info(kbdev->dev, "force_replay : promoting to error\n"); + } +} + +/** Test to see if atom should be forced to fail. + * + * This function will check if an atom has a replay job as a dependent. If so + * then it will be considered for forced failure. */ +static void jd_check_force_failure(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + int i; + + if ((kbdev->force_replay_limit == KBASEP_FORCE_REPLAY_DISABLED) || + (katom->core_req & BASEP_JD_REQ_EVENT_NEVER)) + return; + + for (i = 1; i < BASE_JD_ATOM_COUNT; i++) { + if (kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[0]) == katom || + kbase_jd_katom_dep_atom(&kctx->jctx.atoms[i].dep[1]) == katom) { + struct kbase_jd_atom *dep_atom = &kctx->jctx.atoms[i]; + + if ((dep_atom->core_req & BASEP_JD_REQ_ATOM_TYPE) == + BASE_JD_REQ_SOFT_REPLAY && + (dep_atom->core_req & kbdev->force_replay_core_req) + == kbdev->force_replay_core_req) { + jd_force_failure(kbdev, katom); + return; + } + } + } +} +#endif + +/* + * Perform the necessary handling of an atom that has finished running + * on the GPU. + * + * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller + * is responsible for calling kbase_finish_soft_job *before* calling this function. + * + * The caller must hold the kbase_jd_context.lock. + */ +bool jd_done_nolock(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + struct list_head completed_jobs; + struct list_head runnable_jobs; + bool need_to_try_schedule_context = false; + int i; + + INIT_LIST_HEAD(&completed_jobs); + INIT_LIST_HEAD(&runnable_jobs); + + KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED); + +#if MALI_CUSTOMER_RELEASE == 0 + jd_check_force_failure(katom); +#endif + + + /* This is needed in case an atom is failed due to being invalid, this + * can happen *before* the jobs that the atom depends on have completed */ + for (i = 0; i < 2; i++) { + if (kbase_jd_katom_dep_atom(&katom->dep[i])) { + list_del(&katom->dep_item[i]); + kbase_jd_katom_dep_clear(&katom->dep[i]); + } + } + + /* With PRLAM-10817 or PRLAM-10959 the last tile of a fragment job being soft-stopped can fail with + * BASE_JD_EVENT_TILE_RANGE_FAULT. + * + * So here if the fragment job failed with TILE_RANGE_FAULT and it has been soft-stopped, then we promote the + * error code to BASE_JD_EVENT_DONE + */ + + if ((kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10817) || kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_10959)) && + katom->event_code == BASE_JD_EVENT_TILE_RANGE_FAULT) { + if ((katom->core_req & BASE_JD_REQ_FS) && (katom->atom_flags & KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED)) { + /* Promote the failure to job done */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->atom_flags = katom->atom_flags & (~KBASE_KATOM_FLAG_BEEN_SOFT_STOPPPED); + } + } + + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; + list_add_tail(&katom->dep_item[0], &completed_jobs); + + while (!list_empty(&completed_jobs)) { + katom = list_entry(completed_jobs.prev, struct kbase_jd_atom, dep_item[0]); + list_del(completed_jobs.prev); + + KBASE_DEBUG_ASSERT(katom->status == KBASE_JD_ATOM_STATE_COMPLETED); + + for (i = 0; i < 2; i++) + jd_resolve_dep(&runnable_jobs, katom, i, + js_kctx_info->ctx.is_dying); + + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) + kbase_jd_post_external_resources(katom); + + while (!list_empty(&runnable_jobs)) { + struct kbase_jd_atom *node; + + node = list_entry(runnable_jobs.next, + struct kbase_jd_atom, dep_item[0]); + + list_del(runnable_jobs.next); + + KBASE_DEBUG_ASSERT(node->status != KBASE_JD_ATOM_STATE_UNUSED); + + if (node->status != KBASE_JD_ATOM_STATE_COMPLETED) { + need_to_try_schedule_context |= jd_run_atom(node); + } else { + node->event_code = katom->event_code; + + if ((node->core_req & BASEP_JD_REQ_ATOM_TYPE) + == BASE_JD_REQ_SOFT_REPLAY) { + if (kbase_replay_process(node)) + /* Don't complete this atom */ + continue; + } else if (node->core_req & + BASE_JD_REQ_SOFT_JOB) { + /* If this is a fence wait then remove it from the list of sync waiters. */ + if (BASE_JD_REQ_SOFT_FENCE_WAIT == node->core_req) + list_del(&node->dep_item[0]); + + kbase_finish_soft_job(node); + } + node->status = KBASE_JD_ATOM_STATE_COMPLETED; + } + + if (node->status == KBASE_JD_ATOM_STATE_COMPLETED) + list_add_tail(&node->dep_item[0], &completed_jobs); + } + + /* Register a completed job as a disjoint event when the GPU + * is in a disjoint state (ie. being reset or replaying jobs). + */ + kbase_disjoint_event_potential(kctx->kbdev); + kbase_event_post(kctx, katom); + + /* Decrement and check the TOTAL number of jobs. This includes + * those not tracked by the scheduler: 'not ready to run' and + * 'dependency-only' jobs. */ + if (--kctx->jctx.job_nr == 0) + wake_up(&kctx->jctx.zero_jobs_wait); /* All events are safely queued now, and we can signal any waiter + * that we've got no more jobs (so we can be safely terminated) */ + } + + return need_to_try_schedule_context; +} + +KBASE_EXPORT_TEST_API(jd_done_nolock); + +#ifdef CONFIG_GPU_TRACEPOINTS +enum { + CORE_REQ_DEP_ONLY, + CORE_REQ_SOFT, + CORE_REQ_COMPUTE, + CORE_REQ_FRAGMENT, + CORE_REQ_VERTEX, + CORE_REQ_TILER, + CORE_REQ_FRAGMENT_VERTEX, + CORE_REQ_FRAGMENT_VERTEX_TILER, + CORE_REQ_FRAGMENT_TILER, + CORE_REQ_VERTEX_TILER, + CORE_REQ_UNKNOWN +}; +static const char * const core_req_strings[] = { + "Dependency Only Job", + "Soft Job", + "Compute Shader Job", + "Fragment Shader Job", + "Vertex/Geometry Shader Job", + "Tiler Job", + "Fragment Shader + Vertex/Geometry Shader Job", + "Fragment Shader + Vertex/Geometry Shader Job + Tiler Job", + "Fragment Shader + Tiler Job", + "Vertex/Geometry Shader Job + Tiler Job", + "Unknown Job" +}; +static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req) +{ + if (core_req & BASE_JD_REQ_SOFT_JOB) + return core_req_strings[CORE_REQ_SOFT]; + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + return core_req_strings[CORE_REQ_COMPUTE]; + switch (core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) { + case BASE_JD_REQ_DEP: + return core_req_strings[CORE_REQ_DEP_ONLY]; + case BASE_JD_REQ_FS: + return core_req_strings[CORE_REQ_FRAGMENT]; + case BASE_JD_REQ_CS: + return core_req_strings[CORE_REQ_VERTEX]; + case BASE_JD_REQ_T: + return core_req_strings[CORE_REQ_TILER]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_CS): + return core_req_strings[CORE_REQ_FRAGMENT_VERTEX]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_FRAGMENT_TILER]; + case (BASE_JD_REQ_CS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_VERTEX_TILER]; + case (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T): + return core_req_strings[CORE_REQ_FRAGMENT_VERTEX_TILER]; + } + return core_req_strings[CORE_REQ_UNKNOWN]; +} +#endif + +bool jd_submit_atom(struct kbase_context *kctx, + const struct base_jd_atom_v2 *user_atom, + struct kbase_jd_atom *katom) +{ + struct kbase_jd_context *jctx = &kctx->jctx; + base_jd_core_req core_req; + int queued = 0; + int i; + int sched_prio; + bool ret; + + /* Update the TOTAL number of jobs. This includes those not tracked by + * the scheduler: 'not ready to run' and 'dependency-only' jobs. */ + jctx->job_nr++; + + core_req = user_atom->core_req; + + katom->start_timestamp.tv64 = 0; + katom->time_spent_us = 0; + katom->udata = user_atom->udata; + katom->kctx = kctx; + katom->nr_extres = user_atom->nr_extres; + katom->extres = NULL; + katom->device_nr = user_atom->device_nr; + katom->affinity = 0; + katom->jc = user_atom->jc; + katom->coreref_state = KBASE_ATOM_COREREF_STATE_NO_CORES_REQUESTED; + katom->core_req = core_req; + katom->atom_flags = 0; + katom->retry_count = 0; + katom->need_cache_flush_cores_retained = 0; + katom->x_pre_dep = NULL; + katom->x_post_dep = NULL; +#ifdef CONFIG_KDS + /* Start by assuming that the KDS dependencies are satisfied, + * kbase_jd_pre_external_resources will correct this if there are dependencies */ + katom->kds_dep_satisfied = true; + katom->kds_rset = NULL; +#endif /* CONFIG_KDS */ + + /* Don't do anything if there is a mess up with dependencies. + This is done in a separate cycle to check both the dependencies at ones, otherwise + it will be extra complexity to deal with 1st dependency ( just added to the list ) + if only the 2nd one has invalid config. + */ + for (i = 0; i < 2; i++) { + int dep_atom_number = user_atom->pre_dep[i].atom_id; + base_jd_dep_type dep_atom_type = user_atom->pre_dep[i].dependency_type; + + if (dep_atom_number) { + if (dep_atom_type != BASE_JD_DEP_TYPE_ORDER && + dep_atom_type != BASE_JD_DEP_TYPE_DATA) { + katom->event_code = BASE_JD_EVENT_JOB_CONFIG_FAULT; + katom->status = KBASE_JD_ATOM_STATE_COMPLETED; +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_new_atom( + katom, + kbase_jd_atom_id(kctx, katom)); + kbase_tlstream_tl_ret_atom_ctx( + katom, kctx); +#endif + ret = jd_done_nolock(katom); + goto out; + } + } + } + + /* Add dependencies */ + for (i = 0; i < 2; i++) { + int dep_atom_number = user_atom->pre_dep[i].atom_id; + base_jd_dep_type dep_atom_type; + struct kbase_jd_atom *dep_atom = &jctx->atoms[dep_atom_number]; + + dep_atom_type = user_atom->pre_dep[i].dependency_type; + kbase_jd_katom_dep_clear(&katom->dep[i]); + + if (!dep_atom_number) + continue; + + if (dep_atom->status == KBASE_JD_ATOM_STATE_UNUSED || + dep_atom->status == KBASE_JD_ATOM_STATE_COMPLETED) { + + if (dep_atom->event_code == BASE_JD_EVENT_DONE) + continue; + /* don't stop this atom if it has an order dependency + * only to the failed one, try to submit it throught + * the normal path + */ + if (dep_atom_type == BASE_JD_DEP_TYPE_ORDER && + dep_atom->event_code > BASE_JD_EVENT_ACTIVE) { + continue; + } + + if (i == 1 && kbase_jd_katom_dep_atom(&katom->dep[0])) { + /* Remove the previous dependency */ + list_del(&katom->dep_item[0]); + kbase_jd_katom_dep_clear(&katom->dep[0]); + } + + /* Atom has completed, propagate the error code if any */ + katom->event_code = dep_atom->event_code; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_new_atom( + katom, + kbase_jd_atom_id(kctx, katom)); + kbase_tlstream_tl_ret_atom_ctx(katom, kctx); +#endif + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + == BASE_JD_REQ_SOFT_REPLAY) { + if (kbase_replay_process(katom)) { + ret = false; + goto out; + } + } + ret = jd_done_nolock(katom); + + goto out; + } else { + /* Atom is in progress, add this atom to the list */ + list_add_tail(&katom->dep_item[i], &dep_atom->dep_head[i]); + kbase_jd_katom_dep_set(&katom->dep[i], dep_atom, dep_atom_type); + queued = 1; + } + } + + /* These must occur after the above loop to ensure that an atom that + * depends on a previous atom with the same number behaves as expected */ + katom->event_code = BASE_JD_EVENT_DONE; + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_new_atom( + katom, + kbase_jd_atom_id(kctx, katom)); + kbase_tlstream_tl_ret_atom_ctx(katom, kctx); +#endif + + /* Reject atoms with job chain = NULL, as these cause issues with soft-stop */ + if (!katom->jc && (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + dev_warn(kctx->kbdev->dev, "Rejecting atom with jc = NULL"); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + ret = jd_done_nolock(katom); + goto out; + } + + /* Reject atoms with an invalid device_nr */ + if ((katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) && + (katom->device_nr >= kctx->kbdev->gpu_props.num_core_groups)) { + dev_warn(kctx->kbdev->dev, + "Rejecting atom with invalid device_nr %d", + katom->device_nr); + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + ret = jd_done_nolock(katom); + goto out; + } + + /* For invalid priority, be most lenient and choose the default */ + sched_prio = kbasep_js_atom_prio_to_sched_prio(user_atom->prio); + if (sched_prio == KBASE_JS_ATOM_SCHED_PRIO_INVALID) + sched_prio = KBASE_JS_ATOM_SCHED_PRIO_DEFAULT; + katom->sched_priority = sched_prio; + + if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + /* handle what we need to do to access the external resources */ + if (kbase_jd_pre_external_resources(katom, user_atom) != 0) { + /* setup failed (no access, bad resource, unknown resource types, etc.) */ + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + ret = jd_done_nolock(katom); + goto out; + } + } + + /* Validate the atom. Function will return error if the atom is + * malformed. + * + * Soft-jobs never enter the job scheduler but have their own initialize method. + * + * If either fail then we immediately complete the atom with an error. + */ + if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) { + if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + ret = jd_done_nolock(katom); + goto out; + } + } else { + /* Soft-job */ + if (kbase_prepare_soft_job(katom) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_INVALID; + ret = jd_done_nolock(katom); + goto out; + } + } + +#ifdef CONFIG_GPU_TRACEPOINTS + katom->work_id = atomic_inc_return(&jctx->work_id); + trace_gpu_job_enqueue((u32)kctx, katom->work_id, kbasep_map_core_reqs_to_string(katom->core_req)); +#endif + + if (queued && !IS_GPU_ATOM(katom)) { + ret = false; + goto out; + } +#ifdef CONFIG_KDS + if (!katom->kds_dep_satisfied) { + /* Queue atom due to KDS dependency */ + ret = false; + goto out; + } +#endif /* CONFIG_KDS */ + + if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) + == BASE_JD_REQ_SOFT_REPLAY) { + if (kbase_replay_process(katom)) + ret = false; + else + ret = jd_done_nolock(katom); + + goto out; + } else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) { + if (kbase_process_soft_job(katom) == 0) { + kbase_finish_soft_job(katom); + ret = jd_done_nolock(katom); + goto out; + } + /* The job has not yet completed */ + list_add_tail(&katom->dep_item[0], &kctx->waiting_soft_jobs); + ret = false; + } else if ((katom->core_req & BASEP_JD_REQ_ATOM_TYPE) != BASE_JD_REQ_DEP) { + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + ret = kbasep_js_add_job(kctx, katom); + /* If job was cancelled then resolve immediately */ + if (katom->event_code == BASE_JD_EVENT_JOB_CANCELLED) + ret = jd_done_nolock(katom); + } else { + /* This is a pure dependency. Resolve it immediately */ + ret = jd_done_nolock(katom); + } + + out: + return ret; +} + +#ifdef BASE_LEGACY_UK6_SUPPORT +int kbase_jd_submit(struct kbase_context *kctx, + const struct kbase_uk_job_submit *submit_data, + int uk6_atom) +#else +int kbase_jd_submit(struct kbase_context *kctx, + const struct kbase_uk_job_submit *submit_data) +#endif /* BASE_LEGACY_UK6_SUPPORT */ +{ + struct kbase_jd_context *jctx = &kctx->jctx; + int err = 0; + int i; + bool need_to_try_schedule_context = false; + struct kbase_device *kbdev; + void __user *user_addr; + + /* + * kbase_jd_submit isn't expected to fail and so all errors with the jobs + * are reported by immediately falling them (through event system) + */ + kbdev = kctx->kbdev; + + beenthere(kctx, "%s", "Enter"); + + if ((kctx->jctx.sched_info.ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != 0) { + dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it"); + return -EINVAL; + } + +#ifdef BASE_LEGACY_UK6_SUPPORT + if ((uk6_atom && submit_data->stride != + sizeof(struct base_jd_atom_v2_uk6)) || + submit_data->stride != sizeof(base_jd_atom_v2)) { +#else + if (submit_data->stride != sizeof(base_jd_atom_v2)) { +#endif /* BASE_LEGACY_UK6_SUPPORT */ + dev_err(kbdev->dev, "Stride passed to job_submit doesn't match kernel"); + return -EINVAL; + } + + user_addr = get_compat_pointer(kctx, &submit_data->addr); + + KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_add_return(submit_data->nr_atoms, &kctx->timeline.jd_atoms_in_flight)); + + for (i = 0; i < submit_data->nr_atoms; i++) { + struct base_jd_atom_v2 user_atom; + struct kbase_jd_atom *katom; + +#ifdef BASE_LEGACY_UK6_SUPPORT + if (uk6_atom) { + struct base_jd_atom_v2_uk6 user_atom_v6; + base_jd_dep_type dep_types[2] = {BASE_JD_DEP_TYPE_DATA, BASE_JD_DEP_TYPE_DATA}; + + if (copy_from_user(&user_atom_v6, user_addr, + sizeof(user_atom_v6))) { + err = -EINVAL; + KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, + atomic_sub_return( + submit_data->nr_atoms - i, + &kctx->timeline.jd_atoms_in_flight)); + break; + } + /* Convert from UK6 atom format to UK7 format */ + user_atom.jc = user_atom_v6.jc; + user_atom.udata = user_atom_v6.udata; + user_atom.extres_list = user_atom_v6.extres_list; + user_atom.nr_extres = user_atom_v6.nr_extres; + user_atom.core_req = user_atom_v6.core_req; + + /* atom number 0 is used for no dependency atoms */ + if (!user_atom_v6.pre_dep[0]) + dep_types[0] = BASE_JD_DEP_TYPE_INVALID; + + base_jd_atom_dep_set(&user_atom.pre_dep[0], + user_atom_v6.pre_dep[0], + dep_types[0]); + + /* atom number 0 is used for no dependency atoms */ + if (!user_atom_v6.pre_dep[1]) + dep_types[1] = BASE_JD_DEP_TYPE_INVALID; + + base_jd_atom_dep_set(&user_atom.pre_dep[1], + user_atom_v6.pre_dep[1], + dep_types[1]); + + user_atom.atom_number = user_atom_v6.atom_number; + user_atom.prio = user_atom_v6.prio; + user_atom.device_nr = user_atom_v6.device_nr; + } else { +#endif /* BASE_LEGACY_UK6_SUPPORT */ + if (copy_from_user(&user_atom, user_addr, sizeof(user_atom)) != 0) { + err = -EINVAL; + KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, atomic_sub_return(submit_data->nr_atoms - i, &kctx->timeline.jd_atoms_in_flight)); + break; + } +#ifdef BASE_LEGACY_UK6_SUPPORT + } +#endif /* BASE_LEGACY_UK6_SUPPORT */ + + user_addr = (void __user *)((uintptr_t) user_addr + submit_data->stride); + + mutex_lock(&jctx->lock); +#ifndef compiletime_assert +#define compiletime_assert_defined +#define compiletime_assert(x, msg) do { switch (0) { case 0: case (x):; } } \ +while (false) +#endif + compiletime_assert((1 << (8*sizeof(user_atom.atom_number))) == + BASE_JD_ATOM_COUNT, + "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); + compiletime_assert(sizeof(user_atom.pre_dep[0].atom_id) == + sizeof(user_atom.atom_number), + "BASE_JD_ATOM_COUNT and base_atom_id type out of sync"); +#ifdef compiletime_assert_defined +#undef compiletime_assert +#undef compiletime_assert_defined +#endif + katom = &jctx->atoms[user_atom.atom_number]; + + while (katom->status != KBASE_JD_ATOM_STATE_UNUSED) { + /* Atom number is already in use, wait for the atom to + * complete + */ + mutex_unlock(&jctx->lock); + + /* This thread will wait for the atom to complete. Due + * to thread scheduling we are not sure that the other + * thread that owns the atom will also schedule the + * context, so we force the scheduler to be active and + * hence eventually schedule this context at some point + * later. + */ + kbase_js_sched_all(kbdev); + + if (wait_event_killable(katom->completed, + katom->status == KBASE_JD_ATOM_STATE_UNUSED)) { + /* We're being killed so the result code + * doesn't really matter + */ + return 0; + } + mutex_lock(&jctx->lock); + } + + need_to_try_schedule_context |= + jd_submit_atom(kctx, &user_atom, katom); + + /* Register a completed job as a disjoint event when the GPU is in a disjoint state + * (ie. being reset or replaying jobs). + */ + kbase_disjoint_event_potential(kbdev); + + mutex_unlock(&jctx->lock); + } + + if (need_to_try_schedule_context) + kbase_js_sched_all(kbdev); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_jd_submit); + +/** + * jd_done_worker - Handle a job completion + * @data: a &struct work_struct + * + * This function requeues the job from the runpool (if it was soft-stopped or + * removed from NEXT registers). + * + * Removes it from the system if it finished/failed/was cancelled. + * + * Resolves dependencies to add dependent jobs to the context, potentially + * starting them if necessary (which may add more references to the context) + * + * Releases the reference to the context from the no-longer-running job. + * + * Handles retrying submission outside of IRQ context if it failed from within + * IRQ context. + */ +static void jd_done_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); + struct kbase_jd_context *jctx; + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + union kbasep_js_policy *js_policy; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + u64 cache_jc = katom->jc; + struct kbasep_js_atom_retained_state katom_retained_state; + bool schedule = false; + + /* Soft jobs should never reach this function */ + KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + + kctx = katom->kctx; + jctx = &kctx->jctx; + kbdev = kctx->kbdev; + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + + KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER, kctx, katom, katom->jc, 0); + + kbase_backend_complete_wq(kbdev, katom); + + /* + * Begin transaction on JD context and JS context + */ + mutex_lock(&jctx->lock); + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + /* This worker only gets called on contexts that are scheduled *in*. This is + * because it only happens in response to an IRQ from a job that was + * running. + */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); + + if (katom->event_code == BASE_JD_EVENT_STOPPED) { + /* Atom has been promoted to stopped */ + unsigned long flags; + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + mutex_unlock(&jctx->lock); + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + katom->status = KBASE_JD_ATOM_STATE_IN_JS; + kbase_js_unpull(kctx, katom); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return; + } + + if (katom->event_code != BASE_JD_EVENT_DONE) + dev_err(kbdev->dev, + "t6xx: GPU fault 0x%02lx from job slot %d\n", + (unsigned long)katom->event_code, + katom->slot_nr); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) + kbase_as_poking_timer_release_atom(kbdev, kctx, katom); + + /* Retain state before the katom disappears */ + kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + + if (!kbasep_js_has_atom_finished(&katom_retained_state)) { + mutex_lock(&js_devdata->runpool_mutex); + kbasep_js_clear_job_retry_submit(katom); + /* An atom that has been hard-stopped might have previously + * been soft-stopped and has just finished before the hard-stop + * occurred. For this reason, clear the hard-stopped flag */ + katom->atom_flags &= ~(KBASE_KATOM_FLAG_BEEN_HARD_STOPPED); + mutex_unlock(&js_devdata->runpool_mutex); + } + + if (kbasep_js_has_atom_finished(&katom_retained_state)) + schedule = true; + + kbase_js_complete_atom_wq(kctx, katom); + + KBASE_DEBUG_ASSERT(kbasep_js_has_atom_finished(&katom_retained_state)); + + kbasep_js_remove_job(kbdev, kctx, katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; + /* jd_done_nolock() requires the jsctx_mutex lock to be dropped */ + schedule |= jd_done_nolock(katom); + + /* katom may have been freed now, do not use! */ + + /* + * Transaction complete + */ + mutex_unlock(&jctx->lock); + + /* Job is now no longer running, so can now safely release the context + * reference, and handle any actions that were logged against the atom's retained state */ + + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, &katom_retained_state); + + if (schedule) + kbase_js_sched_all(kbdev); + + KBASE_TRACE_ADD(kbdev, JD_DONE_WORKER_END, kctx, NULL, cache_jc, 0); +} + +/** + * jd_cancel_worker - Work queue job cancel function. + * @data: a &struct work_struct + * + * Only called as part of 'Zapping' a context (which occurs on termination). + * Operates serially with the jd_done_worker() on the work queue. + * + * This can only be called on contexts that aren't scheduled. + * + * We don't need to release most of the resources that would occur on + * kbase_jd_done() or jd_done_worker(), because the atoms here must not be + * running (by virtue of only being called on contexts that aren't + * scheduled). + */ +static void jd_cancel_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, work); + struct kbase_jd_context *jctx; + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + bool need_to_try_schedule_context; + bool attr_state_changed; + struct kbase_device *kbdev; + + /* Soft jobs should never reach this function */ + KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + + kctx = katom->kctx; + kbdev = kctx->kbdev; + jctx = &kctx->jctx; + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); + + /* This only gets called on contexts that are scheduled out. Hence, we must + * make sure we don't de-ref the number of running jobs (there aren't + * any), nor must we try to schedule out the context (it's already + * scheduled out). + */ + KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); + + /* Scheduler: Remove the job from the system */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + attr_state_changed = kbasep_js_remove_cancelled_job(kbdev, kctx, katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&jctx->lock); + + need_to_try_schedule_context = jd_done_nolock(katom); + /* Because we're zapping, we're not adding any more jobs to this ctx, so no need to + * schedule the context. There's also no need for the jsctx_mutex to have been taken + * around this too. */ + KBASE_DEBUG_ASSERT(!need_to_try_schedule_context); + + /* katom may have been freed now, do not use! */ + mutex_unlock(&jctx->lock); + + if (attr_state_changed) + kbase_js_sched_all(kbdev); +} + +/** + * jd_evict_worker - Work queue job evict function + * @data: a &struct work_struct + * + * Only called as part of evicting failed jobs. This is only called on jobs that + * were never submitted to HW Access. Jobs that were submitted are handled + * through jd_done_worker(). + * Operates serially with the jd_done_worker() on the work queue. + * + * We don't need to release most of the resources that would occur on + * kbase_jd_done() or jd_done_worker(), because the atoms here must not be + * running (by virtue of having not been submitted to HW Access). + */ +static void jd_evict_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, + work); + struct kbase_jd_context *jctx; + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + struct kbase_device *kbdev; + + /* Soft jobs should never reach this function */ + KBASE_DEBUG_ASSERT((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0); + + kctx = katom->kctx; + kbdev = kctx->kbdev; + jctx = &kctx->jctx; + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TRACE_ADD(kbdev, JD_CANCEL_WORKER, kctx, katom, katom->jc, 0); + + /* Scheduler: Remove the job from the system */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + kbasep_js_remove_cancelled_job(kbdev, kctx, katom); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&jctx->lock); + jd_done_nolock(katom); + /* katom may have been freed now, do not use! */ + mutex_unlock(&jctx->lock); + + kbase_js_sched_all(kbdev); +} + +/** + * kbase_jd_done - Complete a job that has been removed from the Hardware + * @katom: atom which has been completed + * @slot_nr: slot the atom was on + * @end_timestamp: completion time + * @done_code: completion code + * + * This must be used whenever a job has been removed from the Hardware, e.g.: + * An IRQ indicates that the job finished (for both error and 'done' codes), or + * the job was evicted from the JS_HEAD_NEXT registers during a Soft/Hard stop. + * + * Some work is carried out immediately, and the rest is deferred onto a + * workqueue + * + * Context: + * This can be called safely from atomic context. + * The caller must hold kbasep_js_device_data.runpool_irq.lock + */ +void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, + ktime_t *end_timestamp, kbasep_js_atom_done_code done_code) +{ + struct kbase_context *kctx; + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(katom); + kctx = katom->kctx; + KBASE_DEBUG_ASSERT(kctx); + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev); + + if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + + KBASE_TRACE_ADD(kbdev, JD_DONE, kctx, katom, katom->jc, 0); + + kbase_job_check_leave_disjoint(kbdev, katom); + + katom->slot_nr = slot_nr; + + WARN_ON(work_pending(&katom->work)); + + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, jd_done_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +KBASE_EXPORT_TEST_API(kbase_jd_done); + +void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != katom); + kctx = katom->kctx; + KBASE_DEBUG_ASSERT(NULL != kctx); + + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); + + /* This should only be done from a context that is not scheduled */ + KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); + + WARN_ON(work_pending(&katom->work)); + + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, jd_cancel_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +void kbase_jd_evict(struct kbase_device *kbdev, struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(NULL != kbdev); + KBASE_DEBUG_ASSERT(NULL != katom); + kctx = katom->kctx; + KBASE_DEBUG_ASSERT(NULL != kctx); + + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TRACE_ADD(kbdev, JD_CANCEL, kctx, katom, katom->jc, 0); + + /* This should only be done from a context that is currently scheduled + */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); + + WARN_ON(work_pending(&katom->work)); + + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, jd_evict_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +void kbase_jd_zap_context(struct kbase_context *kctx) +{ + struct kbase_jd_atom *katom; + struct list_head *entry, *tmp; + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(kctx); + + kbdev = kctx->kbdev; + + KBASE_TRACE_ADD(kbdev, JD_ZAP_CONTEXT, kctx, NULL, 0u, 0u); + + kbase_js_zap_context(kctx); + kbase_jm_wait_for_zero_jobs(kctx); + + mutex_lock(&kctx->jctx.lock); + + /* + * While holding the struct kbase_jd_context lock clean up jobs which are known to kbase but are + * queued outside the job scheduler. + */ + + list_for_each_safe(entry, tmp, &kctx->waiting_soft_jobs) { + katom = list_entry(entry, struct kbase_jd_atom, dep_item[0]); + kbase_cancel_soft_job(katom); + } + + +#ifdef CONFIG_KDS + + /* For each job waiting on a kds resource, cancel the wait and force the job to + * complete early, this is done so that we don't leave jobs outstanding waiting + * on kds resources which may never be released when contexts are zapped, resulting + * in a hang. + * + * Note that we can safely iterate over the list as the struct kbase_jd_context lock is held, + * this prevents items being removed when calling job_done_nolock in kbase_cancel_kds_wait_job. + */ + + list_for_each(entry, &kctx->waiting_kds_resource) { + katom = list_entry(entry, struct kbase_jd_atom, node); + + kbase_cancel_kds_wait_job(katom); + } +#endif + + mutex_unlock(&kctx->jctx.lock); +} + +KBASE_EXPORT_TEST_API(kbase_jd_zap_context); + +int kbase_jd_init(struct kbase_context *kctx) +{ + int i; + int mali_err = 0; +#ifdef CONFIG_KDS + int err; +#endif /* CONFIG_KDS */ + + KBASE_DEBUG_ASSERT(kctx); + + kctx->jctx.job_done_wq = alloc_workqueue("mali_jd", 0, 1); + if (NULL == kctx->jctx.job_done_wq) { + mali_err = -ENOMEM; + goto out1; + } + + for (i = 0; i < BASE_JD_ATOM_COUNT; i++) { + init_waitqueue_head(&kctx->jctx.atoms[i].completed); + + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[0]); + INIT_LIST_HEAD(&kctx->jctx.atoms[i].dep_head[1]); + + /* Catch userspace attempting to use an atom which doesn't exist as a pre-dependency */ + kctx->jctx.atoms[i].event_code = BASE_JD_EVENT_JOB_INVALID; + kctx->jctx.atoms[i].status = KBASE_JD_ATOM_STATE_UNUSED; + } + + mutex_init(&kctx->jctx.lock); + + init_waitqueue_head(&kctx->jctx.zero_jobs_wait); + + spin_lock_init(&kctx->jctx.tb_lock); + +#ifdef CONFIG_KDS + err = kds_callback_init(&kctx->jctx.kds_cb, 0, kds_dep_clear); + if (0 != err) { + mali_err = -EINVAL; + goto out2; + } +#endif /* CONFIG_KDS */ + + kctx->jctx.job_nr = 0; + + return 0; + +#ifdef CONFIG_KDS + out2: + destroy_workqueue(kctx->jctx.job_done_wq); +#endif /* CONFIG_KDS */ + out1: + return mali_err; +} + +KBASE_EXPORT_TEST_API(kbase_jd_init); + +void kbase_jd_exit(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx); + +#ifdef CONFIG_KDS + kds_callback_term(&kctx->jctx.kds_cb); +#endif /* CONFIG_KDS */ + /* Work queue is emptied by this */ + destroy_workqueue(kctx->jctx.job_done_wq); +} + +KBASE_EXPORT_TEST_API(kbase_jd_exit); diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c new file mode 100644 index 00000000000..b37f280a647 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.c @@ -0,0 +1,114 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <linux/seq_file.h> + +#include <mali_kbase_jd_debugfs.h> + +#ifdef CONFIG_DEBUG_FS + +/** + * kbasep_jd_debugfs_atoms_show - Show callback for the JD atoms debugfs file. + * @sfile: The debugfs entry + * @data: Data associated with the entry + * + * This function is called to get the contents of the JD atoms debugfs file. + * This is a report of all atoms managed by kbase_jd_context.atoms + * + * Return: 0 if successfully prints data in debugfs entry file, failure + * otherwise + */ +static int kbasep_jd_debugfs_atoms_show(struct seq_file *sfile, void *data) +{ + struct kbase_context *kctx = sfile->private; + struct kbase_jd_atom *atoms; + unsigned long irq_flags; + int i; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + /* Print table heading */ + seq_puts(sfile, "atom id,core reqs,status,coreref status,predeps,start time,time on gpu\n"); + + atoms = kctx->jctx.atoms; + /* General atom states */ + mutex_lock(&kctx->jctx.lock); + /* JS-related states */ + spin_lock_irqsave(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags); + for (i = 0; i != BASE_JD_ATOM_COUNT; ++i) { + struct kbase_jd_atom *atom = &atoms[i]; + s64 start_timestamp = 0; + + if (atom->status == KBASE_JD_ATOM_STATE_UNUSED) + continue; + + /* start_timestamp is cleared as soon as the atom leaves UNUSED state + * and set before a job is submitted to the h/w, a non-zero value means + * it is valid */ + if (ktime_to_ns(atom->start_timestamp)) + start_timestamp = ktime_to_ns( + ktime_sub(ktime_get(), atom->start_timestamp)); + + seq_printf(sfile, + "%i,%u,%u,%u,%u %u,%lli,%llu\n", + i, atom->core_req, atom->status, atom->coreref_state, + (unsigned)(atom->dep[0].atom ? + atom->dep[0].atom - atoms : 0), + (unsigned)(atom->dep[1].atom ? + atom->dep[1].atom - atoms : 0), + (signed long long)start_timestamp, + (unsigned long long)(atom->time_spent_us ? + atom->time_spent_us * 1000 : start_timestamp) + ); + } + spin_unlock_irqrestore(&kctx->kbdev->js_data.runpool_irq.lock, irq_flags); + mutex_unlock(&kctx->jctx.lock); + + return 0; +} + + +/** + * kbasep_jd_debugfs_atoms_open - open operation for atom debugfs file + * @in: &struct inode pointer + * @file: &struct file pointer + * + * Return: file descriptor + */ +static int kbasep_jd_debugfs_atoms_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_jd_debugfs_atoms_show, in->i_private); +} + +static const struct file_operations kbasep_jd_debugfs_atoms_fops = { + .open = kbasep_jd_debugfs_atoms_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + + /* Expose all atoms */ + debugfs_create_file("atoms", S_IRUGO, kctx->kctx_dentry, kctx, + &kbasep_jd_debugfs_atoms_fops); + +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h new file mode 100644 index 00000000000..703e4cf6a5f --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_jd_debugfs.h @@ -0,0 +1,37 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_jd_debugfs.h + * Header file for job dispatcher-related entries in debugfs + */ + +#ifndef _KBASE_JD_DEBUGFS_H +#define _KBASE_JD_DEBUGFS_H + +#include <linux/debugfs.h> + +#include <mali_kbase.h> + +/** + * kbasep_jd_debugfs_ctx_add() - Add debugfs entries for JD system + * + * @kctx Pointer to kbase_context + */ +void kbasep_jd_debugfs_ctx_add(struct kbase_context *kctx); + +#endif /*_KBASE_JD_DEBUGFS_H*/ diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.c b/drivers/gpu/arm/midgard/mali_kbase_jm.c new file mode 100644 index 00000000000..bc5ad805818 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_jm.c @@ -0,0 +1,132 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * HW access job manager common APIs + */ + +#include <mali_kbase.h> +#include "mali_kbase_hwaccess_jm.h" +#include "mali_kbase_jm.h" + +/** + * kbase_jm_next_job() - Attempt to run the next @nr_jobs_to_submit jobs on slot + * @js on the active context. + * @kbdev: Device pointer + * @js: Job slot to run on + * @nr_jobs_to_submit: Number of jobs to attempt to submit + * + * Return: true if slot can still be submitted on, false if slot is now full. + */ +static bool kbase_jm_next_job(struct kbase_device *kbdev, int js, + int nr_jobs_to_submit) +{ + struct kbase_context *kctx; + int i; + + kctx = kbdev->hwaccess.active_kctx; + + if (!kctx) + return true; + + for (i = 0; i < nr_jobs_to_submit; i++) { + struct kbase_jd_atom *katom = kbase_js_pull(kctx, js); + + if (!katom) + return true; /* Context has no jobs on this slot */ + + kbase_backend_run_atom(kbdev, katom); + } + + return false; /* Slot ringbuffer should now be full */ +} + +u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask) +{ + u32 ret_mask = 0; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + while (js_mask) { + int js = ffs(js_mask) - 1; + int nr_jobs_to_submit = kbase_backend_slot_free(kbdev, js); + + if (kbase_jm_next_job(kbdev, js, nr_jobs_to_submit)) + ret_mask |= (1 << js); + + js_mask &= ~(1 << js); + } + + return ret_mask; +} + +void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + if (!down_trylock(&js_devdata->schedule_sem)) { + kbase_jm_kick(kbdev, js_mask); + up(&js_devdata->schedule_sem); + } +} + +void kbase_jm_try_kick_all(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + if (!down_trylock(&js_devdata->schedule_sem)) { + kbase_jm_kick_all(kbdev); + up(&js_devdata->schedule_sem); + } +} + +void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + WARN_ON(atomic_read(&kctx->atoms_pulled)); + + if (kbdev->hwaccess.active_kctx == kctx) + kbdev->hwaccess.active_kctx = NULL; +} + +void kbase_jm_return_atom_to_js(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if (katom->event_code != BASE_JD_EVENT_STOPPED && + katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) { + kbase_js_complete_atom(katom, NULL); + } else { + kbase_js_unpull(katom->kctx, katom); + } +} + +void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + ktime_t *end_timestamp) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + kbase_js_complete_atom(katom, end_timestamp); +} + diff --git a/drivers/gpu/arm/midgard/mali_kbase_jm.h b/drivers/gpu/arm/midgard/mali_kbase_jm.h new file mode 100644 index 00000000000..27aca3a699f --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_jm.h @@ -0,0 +1,106 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +/* + * Job manager common APIs + */ + +#ifndef _KBASE_JM_H_ +#define _KBASE_JM_H_ + +/** + * kbase_jm_kick() - Indicate that there are jobs ready to run. + * @kbdev: Device pointer + * @js_mask: Mask of the job slots that can be pulled from. + * + * Caller must hold the runpool_irq lock and schedule_sem semaphore + * + * Return: Mask of the job slots that can still be submitted to. + */ +u32 kbase_jm_kick(struct kbase_device *kbdev, u32 js_mask); + +/** + * kbase_jm_kick_all() - Indicate that there are jobs ready to run on all job + * slots. + * @kbdev: Device pointer + * + * Caller must hold the runpool_irq lock and schedule_sem semaphore + * + * Return: Mask of the job slots that can still be submitted to. + */ +static inline u32 kbase_jm_kick_all(struct kbase_device *kbdev) +{ + return kbase_jm_kick(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + +/** + * kbase_jm_try_kick - Attempt to call kbase_jm_kick + * @kbdev: Device pointer + * @js_mask: Mask of the job slots that can be pulled from + * Context: Caller must hold runpool_irq lock + * + * If schedule_sem can be immediately obtained then this function will call + * kbase_jm_kick() otherwise it will do nothing. + */ +void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask); + +/** + * kbase_jm_try_kick_all() - Attempt to call kbase_jm_kick_all + * @kbdev: Device pointer + * Context: Caller must hold runpool_irq lock + * + * If schedule_sem can be immediately obtained then this function will call + * kbase_jm_kick_all() otherwise it will do nothing. + */ +void kbase_jm_try_kick_all(struct kbase_device *kbdev); + +/** + * kbase_jm_idle_ctx() - Mark a context as idle. + * @kbdev: Device pointer + * @kctx: Context to mark as idle + * + * No more atoms will be pulled from this context until it is marked as active + * by kbase_js_use_ctx(). + * + * The context should have no atoms currently pulled from it + * (kctx->atoms_pulled == 0). + * + * Caller must hold the runpool_irq lock + */ +void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has + * been soft-stopped or will fail due to a + * dependency + * @kbdev: Device pointer + * @katom: Atom that has been stopped or will be failed + */ +void kbase_jm_return_atom_to_js(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/** + * kbase_jm_complete() - Complete an atom + * @kbdev: Device pointer + * @katom: Atom that has completed + * @end_timestamp: Timestamp of atom completion + */ +void kbase_jm_complete(struct kbase_device *kbdev, struct kbase_jd_atom *katom, + ktime_t *end_timestamp); + +#endif /* _KBASE_JM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.c b/drivers/gpu/arm/midgard/mali_kbase_js.c new file mode 100644 index 00000000000..c49d622b761 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js.c @@ -0,0 +1,3177 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Job Scheduler Implementation + */ +#include <mali_kbase.h> +#include <mali_kbase_js.h> +#if defined(CONFIG_MALI_GATOR_SUPPORT) +#include <mali_kbase_gator.h> +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include <mali_kbase_tlstream.h> +#endif +#include <mali_kbase_hw.h> + +#include <mali_kbase_defs.h> +#include <mali_kbase_config_defaults.h> + +#include "mali_kbase_jm.h" +#include "mali_kbase_hwaccess_jm.h" + +/* + * Private types + */ + +/* Bitpattern indicating the result of releasing a context */ +enum { + /* The context was descheduled - caller should try scheduling in a new + * one to keep the runpool full */ + KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED = (1u << 0), + /* Ctx attributes were changed - caller should try scheduling all + * contexts */ + KBASEP_JS_RELEASE_RESULT_SCHED_ALL = (1u << 1) +}; + +typedef u32 kbasep_js_release_result; + +const int kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS] = { + KBASE_JS_ATOM_SCHED_PRIO_MED, /* BASE_JD_PRIO_MEDIUM */ + KBASE_JS_ATOM_SCHED_PRIO_HIGH, /* BASE_JD_PRIO_HIGH */ + KBASE_JS_ATOM_SCHED_PRIO_LOW /* BASE_JD_PRIO_LOW */ +}; + +const base_jd_prio +kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT] = { + BASE_JD_PRIO_HIGH, /* KBASE_JS_ATOM_SCHED_PRIO_HIGH */ + BASE_JD_PRIO_MEDIUM, /* KBASE_JS_ATOM_SCHED_PRIO_MED */ + BASE_JD_PRIO_LOW /* KBASE_JS_ATOM_SCHED_PRIO_LOW */ +}; + + +/* + * Private function prototypes + */ +static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state); + +static int kbase_js_get_slot(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, + kbasep_js_policy_ctx_job_cb callback); + +static bool kbase_js_evict_atom(struct kbase_context *kctx, + struct kbase_jd_atom *katom_evict, + struct kbase_jd_atom *start_katom, + struct kbase_jd_atom *head_katom, + struct list_head *evict_list, + struct jsctx_rb *rb, int idx); + +/* Helper for trace subcodes */ +#if KBASE_TRACE_ENABLE +static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + int as_nr; + int refcnt = 0; + + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + as_nr = kctx->as_nr; + if (as_nr != KBASEP_AS_NR_INVALID) { + struct kbasep_js_per_as_data *js_per_as_data; + + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + refcnt = js_per_as_data->as_busy_refcount; + } + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return refcnt; +} + +static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + int as_nr; + int refcnt = 0; + + js_devdata = &kbdev->js_data; + + as_nr = kctx->as_nr; + if (as_nr != KBASEP_AS_NR_INVALID) { + struct kbasep_js_per_as_data *js_per_as_data; + + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + refcnt = js_per_as_data->as_busy_refcount; + } + + return refcnt; +} +#else /* KBASE_TRACE_ENABLE */ +static int kbasep_js_trace_get_refcnt(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(kctx); + return 0; +} +static int kbasep_js_trace_get_refcnt_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(kctx); + return 0; +} +#endif /* KBASE_TRACE_ENABLE */ + +/* + * Private types + */ +enum { + JS_DEVDATA_INIT_NONE = 0, + JS_DEVDATA_INIT_CONSTANTS = (1 << 0), + JS_DEVDATA_INIT_POLICY = (1 << 1), + JS_DEVDATA_INIT_ALL = ((1 << 2) - 1) +}; + +enum { + JS_KCTX_INIT_NONE = 0, + JS_KCTX_INIT_CONSTANTS = (1 << 0), + JS_KCTX_INIT_POLICY = (1 << 1), + JS_KCTX_INIT_ALL = ((1 << 2) - 1) +}; + +/* + * Private functions + */ + +/** + * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements + * @features: JSn_FEATURE register value + * + * Given a JSn_FEATURE register value returns the core requirements that match + * + * Return: Core requirement bit mask + */ +static base_jd_core_req core_reqs_from_jsn_features(u16 features) +{ + base_jd_core_req core_req = 0u; + + if ((features & JS_FEATURE_SET_VALUE_JOB) != 0) + core_req |= BASE_JD_REQ_V; + + if ((features & JS_FEATURE_CACHE_FLUSH_JOB) != 0) + core_req |= BASE_JD_REQ_CF; + + if ((features & JS_FEATURE_COMPUTE_JOB) != 0) + core_req |= BASE_JD_REQ_CS; + + if ((features & JS_FEATURE_TILER_JOB) != 0) + core_req |= BASE_JD_REQ_T; + + if ((features & JS_FEATURE_FRAGMENT_JOB) != 0) + core_req |= BASE_JD_REQ_FS; + + return core_req; +} + +static void kbase_js_sync_timers(struct kbase_device *kbdev) +{ + mutex_lock(&kbdev->js_data.runpool_mutex); + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&kbdev->js_data.runpool_mutex); +} + +/* Hold the kbasep_js_device_data::runpool_irq::lock for this */ +bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_per_as_data *js_per_as_data; + bool result = false; + int as_nr; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_devdata = &kbdev->js_data; + + as_nr = kctx->as_nr; + if (as_nr != KBASEP_AS_NR_INVALID) { + int new_refcnt; + + KBASE_DEBUG_ASSERT(as_nr >= 0); + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + KBASE_DEBUG_ASSERT(js_per_as_data->kctx != NULL); + + new_refcnt = ++(js_per_as_data->as_busy_refcount); + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RETAIN_CTX_NOLOCK, kctx, + NULL, 0u, new_refcnt); + result = true; + } + + return result; +} + +/** + * jsctx_rb_is_empty_prio(): - Check if ring buffer is empty + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * @prio: Priority to check. + * + * Return true if there are no atoms to pull. There may be running atoms in the + * ring buffer even if there are no atoms to pull. It is also possible for the + * ring buffer to be full (with running atoms) when this functions returns + * true. + * + * Caller must hold runpool_irq.lock + * + * Return: true if there are no atoms to pull, false otherwise. + */ +static inline bool +jsctx_rb_is_empty_prio(struct kbase_context *kctx, int js, int prio) +{ + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + return rb->read_idx == rb->write_idx; +} + +/** + * jsctx_rb_is_empty(): - Check if all priority ring buffers are empty + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * + * Caller must hold runpool_irq.lock + * + * Return: true if the ring buffers for all priorities are empty, false + * otherwise. + */ +static inline bool +jsctx_rb_is_empty(struct kbase_context *kctx, int js) +{ + int prio; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + if (!jsctx_rb_is_empty_prio(kctx, js, prio)) + return false; + } + + return true; +} + +/** + * jsctx_rb_compact_prio(): - Compact a ring buffer + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to compact. + * @prio: Priority id to compact. + */ +static inline void +jsctx_rb_compact_prio(struct kbase_context *kctx, int js, int prio) +{ + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + u16 compact_idx = rb->write_idx - 1; + u16 end_idx = rb->running_idx - 1; + u16 i; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); + + for (i = compact_idx; i != end_idx; i--) { + if (rb->entries[i & JSCTX_RB_MASK].atom_id != + KBASEP_ATOM_ID_INVALID) { + WARN_ON(compact_idx < rb->running_idx); + rb->entries[compact_idx & JSCTX_RB_MASK].atom_id = + rb->entries[i & JSCTX_RB_MASK].atom_id; + + compact_idx--; + } + if (rb->read_idx == i) + rb->read_idx = compact_idx + 1; + } + + rb->running_idx = compact_idx + 1; +} + +/** + * jsctx_rb_compact(): - Compact all priority ring buffers + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to compact. + */ +static inline void +jsctx_rb_compact(struct kbase_context *kctx, int js) +{ + int prio; + + for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) + jsctx_rb_compact_prio(kctx, js, prio); +} + +/** + * jsctx_rb_foreach_prio(): - Execute callback for each entry in ring buffer + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to iterate. + * @prio: Priority id to iterate. + * @callback: Function pointer to callback. + * + * Iterate over a ring buffer and invoke @callback for each entry in buffer, and + * remove the entry from the buffer. + * + * If entries are added to the ring buffer while this is running those entries + * may, or may not be covered. To ensure that all entries in the buffer have + * been enumerated when this function returns jsctx->lock must be held when + * calling this function. + * + * The HW access lock, js_data.runpool_irq.lock, must always be held when + * calling this function. + */ +static void +jsctx_rb_foreach_prio(struct kbase_context *kctx, int js, int prio, + kbasep_js_policy_ctx_job_cb callback) +{ + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + struct kbase_jd_atom *katom; + u16 write_idx = ACCESS_ONCE(rb->write_idx); + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + /* There must be no jobs currently in HW access */ + WARN_ON(rb->read_idx != rb->running_idx); + + /* Invoke callback on all kbase_jd_atoms in the ring buffer, and + * removes them from the buffer */ + while (rb->read_idx != write_idx) { + int id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; + + katom = kbase_jd_atom_from_id(kctx, id); + + rb->read_idx++; + rb->running_idx++; + + callback(kctx->kbdev, katom); + } +} + +/** + * jsctx_rb_foreach(): - Execute callback for each entry in all priority rb + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to iterate. + * @callback: Function pointer to callback. + * + * Iterate over all the different priorities, and for each call + * jsctx_rb_foreach_prio() to iterate over the ring buffer and invoke @callback + * for each entry in buffer, and remove the entry from the buffer. + */ +static inline void +jsctx_rb_foreach(struct kbase_context *kctx, int js, + kbasep_js_policy_ctx_job_cb callback) +{ + int prio; + + for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) + jsctx_rb_foreach_prio(kctx, js, prio, callback); +} + +/** + * jsctx_rb_peek_prio(): - Check buffer and get next atom + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * @prio: Priority id to check. + * + * Check the ring buffer for the specified @js and @prio and return a pointer to + * the next atom, unless the ring buffer is empty. + * + * Return: Pointer to next atom in buffer, or NULL if there is no atom. + */ +static inline struct kbase_jd_atom * +jsctx_rb_peek_prio(struct kbase_context *kctx, int js, int prio) +{ + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + int id; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + if (jsctx_rb_is_empty_prio(kctx, js, prio)) + return NULL; + + id = rb->entries[rb->read_idx & JSCTX_RB_MASK].atom_id; + return kbase_jd_atom_from_id(kctx, id); +} + +/** + * jsctx_rb_peek(): - Check all priority buffers and get next atom + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * + * Check the ring buffers for all priorities, starting from + * KBASE_JS_ATOM_SCHED_PRIO_HIGH, for the specified @js and @prio and return a + * pointer to the next atom, unless all the priority's ring buffers are empty. + * + * Return: Pointer to next atom in buffer, or NULL if there is no atom. + */ +static inline struct kbase_jd_atom * +jsctx_rb_peek(struct kbase_context *kctx, int js) +{ + int prio; + + for (prio = 0; prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT; prio++) { + struct kbase_jd_atom *katom; + + katom = jsctx_rb_peek_prio(kctx, js, prio); + if (katom) + return katom; + } + + return NULL; +} + +/** + * jsctx_rb_peek_last(): - Check a ring buffer and get the last atom + * @kctx: Pointer to kbase context with ring buffer. + * @js: Job slot id to check. + * @prio: Priority id to check. + * + * Check the ring buffer for the specified @js and @prio and return a + * pointer to the last atom, unless all the priority's ring buffers are empty. + * + * The last atom is the atom that was added using jsctx_rb_add() most recently. + * + * Return: Pointer to last atom in buffer, or NULL if there is no atom. + */ +static inline struct kbase_jd_atom * +jsctx_rb_peek_last(struct kbase_context *kctx, int js, int prio) +{ + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + int id; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); + + if (jsctx_rb_is_empty_prio(kctx, js, prio)) + return NULL; + + id = rb->entries[(rb->write_idx - 1) & JSCTX_RB_MASK].atom_id; + return kbase_jd_atom_from_id(kctx, id); +} + +/** + * jsctx_rb_pull(): - Mark atom in list as running + * @kctx: Pointer to kbase context with ring buffer. + * @katom: Pointer to katom to pull. + * + * Mark an atom previously obtained from jsctx_rb_peek() as running. + * + * @katom must currently be at the head of the ring buffer. + */ +static inline void +jsctx_rb_pull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + /* Atoms must be pulled in the correct order. */ + WARN_ON(katom != jsctx_rb_peek_prio(kctx, js, prio)); + + rb->read_idx++; +} + +/** + * jsctx_rb_unpull(): - Undo marking of atom in list as running + * @kctx: Pointer to kbase context with ring buffer. + * @katom: Pointer to katom to unpull. + * + * Undo jsctx_rb_pull() and put @katom back in the queue. + * + * jsctx_rb_unpull() must be called on atoms in the same order the atoms were + * pulled. + */ +static inline void +jsctx_rb_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + /* Atoms must be unpulled in correct order. */ + WARN_ON(rb->entries[(rb->read_idx - 1) & JSCTX_RB_MASK].atom_id != + kbase_jd_atom_id(kctx, katom)); + + rb->read_idx--; +} + +/** + * jsctx_rb_add(): - Add atom to ring buffer + * @kctx: Pointer to kbase context with ring buffer. + * @katom: Pointer to katom to add. + * + * Add @katom to the ring buffer determined by the atom's priority and job slot + * number. + * + * If the ring buffer is full -EBUSY will be returned. + * + * Return: On success 0 is returned, on failure a negative error code. + */ +static int +jsctx_rb_add_atom(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + + lockdep_assert_held(&kctx->jctx.lock); + + /* Check if the ring buffer is full */ + if ((rb->write_idx - rb->running_idx) >= JSCTX_RB_SIZE) + return -EBUSY; + + rb->entries[rb->write_idx & JSCTX_RB_MASK].atom_id = + kbase_jd_atom_id(kctx, katom); + rb->write_idx++; + + return 0; +} + +/** + * jsctx_rb_remove(): - Remove atom from ring buffer + * @kctx: Pointer to kbase context with ring buffer. + * @katom: Pointer to katom to remove. + * + * Remove @katom from the ring buffer. + * + * @katom must have been pulled from the buffer earlier by jsctx_rb_pull(), and + * atoms must be removed in the same order they were pulled from the ring + * buffer. + */ +static inline void +jsctx_rb_remove(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + int prio = katom->sched_priority; + int js = katom->slot_nr; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + /* Atoms must be completed in order. */ + WARN_ON(rb->entries[rb->running_idx & JSCTX_RB_MASK].atom_id != + kbase_jd_atom_id(kctx, katom)); + + rb->running_idx++; +} + +/** + * jsctx_rb_evict(): - Evict atom, and dependents, from ring buffer + * @kctx: Pointer to kbase context with ring buffer. + * @start_katom: Pointer to the first katom to evict. + * @head_katom: Pointer to head katom. + * @evict_list: Pointer to head of list where evicted atoms are added. + * + * Iterate over the ring buffer starting at @start_katom and evict @start_atom + * and dependent atoms in ring buffer. + * + * @evict_list and @head_katom is passed on to kbase_js_evict_atom() which will + * examine the atom dependencies. + * + * jsctx_rb_evict() is only called by kbase_js_evict_deps(). + */ +static void +jsctx_rb_evict(struct kbase_context *kctx, + struct kbase_jd_atom *start_katom, + struct kbase_jd_atom *head_katom, + struct list_head *evict_list) +{ + int prio = start_katom->sched_priority; + int js = start_katom->slot_nr; + struct jsctx_rb *rb = &kctx->jsctx_rb[prio][js]; + bool atom_in_rb = false; + u16 i, start_idx; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + lockdep_assert_held(&kctx->jctx.lock); + + for (i = rb->running_idx; i != rb->write_idx; i++) { + if (rb->entries[i & JSCTX_RB_MASK].atom_id == + kbase_jd_atom_id(kctx, start_katom)) { + start_idx = i; + atom_in_rb = true; + break; + } + } + + /* start_katom must still be in ring buffer. */ + if (i == rb->write_idx || !atom_in_rb) + return; + + /* Evict all dependencies on same slot. */ + for (i = start_idx; i != rb->write_idx; i++) { + u8 katom_evict; + + katom_evict = rb->entries[i & JSCTX_RB_MASK].atom_id; + if (katom_evict != KBASEP_ATOM_ID_INVALID) { + if (!kbase_js_evict_atom(kctx, + &kctx->jctx.atoms[katom_evict], + start_katom, head_katom, + evict_list, rb, i)) + break; + } + } +} + +/* + * Functions private to KBase ('Protected' functions) + */ +int kbasep_js_devdata_init(struct kbase_device * const kbdev) +{ + struct kbasep_js_device_data *jsdd; + int err; + int i; + u16 as_present; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + jsdd = &kbdev->js_data; + + KBASE_DEBUG_ASSERT(jsdd->init_status == JS_DEVDATA_INIT_NONE); + + /* These two must be recalculated if nr_hw_address_spaces changes + * (e.g. for HW workarounds) */ + as_present = (1U << kbdev->nr_hw_address_spaces) - 1; + kbdev->nr_user_address_spaces = kbdev->nr_hw_address_spaces; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) { + bool use_workaround; + + use_workaround = DEFAULT_SECURE_BUT_LOSS_OF_PERFORMANCE; + if (use_workaround) { + dev_dbg(kbdev->dev, "GPU has HW ISSUE 8987, and driver configured for security workaround: 1 address space only"); + kbdev->nr_user_address_spaces = 1; + } + } +#ifdef CONFIG_MALI_DEBUG + /* Soft-stop will be disabled on a single context by default unless + * softstop_always is set */ + jsdd->softstop_always = false; +#endif /* CONFIG_MALI_DEBUG */ + jsdd->nr_all_contexts_running = 0; + jsdd->nr_user_contexts_running = 0; + jsdd->nr_contexts_pullable = 0; + atomic_set(&jsdd->nr_contexts_runnable, 0); + /* All ASs initially free */ + jsdd->as_free = as_present; + /* No ctx allowed to submit */ + jsdd->runpool_irq.submit_allowed = 0u; + memset(jsdd->runpool_irq.ctx_attr_ref_count, 0, + sizeof(jsdd->runpool_irq.ctx_attr_ref_count)); + memset(jsdd->runpool_irq.slot_affinities, 0, + sizeof(jsdd->runpool_irq.slot_affinities)); + memset(jsdd->runpool_irq.slot_affinity_refcount, 0, + sizeof(jsdd->runpool_irq.slot_affinity_refcount)); + INIT_LIST_HEAD(&jsdd->suspended_soft_jobs_list); + + /* Config attributes */ + jsdd->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; + jsdd->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; + jsdd->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) + jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS_8408; + else + jsdd->hard_stop_ticks_ss = DEFAULT_JS_HARD_STOP_TICKS_SS; + jsdd->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; + jsdd->hard_stop_ticks_dumping = DEFAULT_JS_HARD_STOP_TICKS_DUMPING; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) + jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS_8408; + else + jsdd->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; + jsdd->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; + jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING; + jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS; + jsdd->cfs_ctx_runtime_init_slices = + DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES; + jsdd->cfs_ctx_runtime_min_slices = + DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES; + + dev_dbg(kbdev->dev, "JS Config Attribs: "); + dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u", + jsdd->scheduling_period_ns); + dev_dbg(kbdev->dev, "\tsoft_stop_ticks:%u", + jsdd->soft_stop_ticks); + dev_dbg(kbdev->dev, "\tsoft_stop_ticks_cl:%u", + jsdd->soft_stop_ticks_cl); + dev_dbg(kbdev->dev, "\thard_stop_ticks_ss:%u", + jsdd->hard_stop_ticks_ss); + dev_dbg(kbdev->dev, "\thard_stop_ticks_cl:%u", + jsdd->hard_stop_ticks_cl); + dev_dbg(kbdev->dev, "\thard_stop_ticks_dumping:%u", + jsdd->hard_stop_ticks_dumping); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_ss:%u", + jsdd->gpu_reset_ticks_ss); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_cl:%u", + jsdd->gpu_reset_ticks_cl); + dev_dbg(kbdev->dev, "\tgpu_reset_ticks_dumping:%u", + jsdd->gpu_reset_ticks_dumping); + dev_dbg(kbdev->dev, "\tctx_timeslice_ns:%u", + jsdd->ctx_timeslice_ns); + dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_init_slices:%u", + jsdd->cfs_ctx_runtime_init_slices); + dev_dbg(kbdev->dev, "\tcfs_ctx_runtime_min_slices:%u", + jsdd->cfs_ctx_runtime_min_slices); + + if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss && + jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss && + jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_dumping && + jsdd->hard_stop_ticks_dumping < + jsdd->gpu_reset_ticks_dumping)) { + dev_err(kbdev->dev, "Job scheduler timeouts invalid; soft/hard/reset tick counts should be in increasing order\n"); + return -EINVAL; + } + +#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS + dev_dbg(kbdev->dev, "Job Scheduling Policy Soft-stops disabled, ignoring value for soft_stop_ticks==%u at %uns per tick. Other soft-stops may still occur.", + jsdd->soft_stop_ticks, + jsdd->scheduling_period_ns); +#endif +#if KBASE_DISABLE_SCHEDULING_HARD_STOPS + dev_dbg(kbdev->dev, "Job Scheduling Policy Hard-stops disabled, ignoring values for hard_stop_ticks_ss==%d and hard_stop_ticks_dumping==%u at %uns per tick. Other hard-stops may still occur.", + jsdd->hard_stop_ticks_ss, + jsdd->hard_stop_ticks_dumping, + jsdd->scheduling_period_ns); +#endif +#if KBASE_DISABLE_SCHEDULING_SOFT_STOPS && KBASE_DISABLE_SCHEDULING_HARD_STOPS + dev_dbg(kbdev->dev, "Note: The JS policy's tick timer (if coded) will still be run, but do nothing."); +#endif + + /* setup the number of irq throttle cycles base on given time */ + { + int time_us = kbdev->gpu_props.irq_throttle_time_us; + int cycles = kbasep_js_convert_us_to_gpu_ticks_max_freq(kbdev, + time_us); + + atomic_set(&kbdev->irq_throttle_cycles, cycles); + } + + /* Clear the AS data, including setting NULL pointers */ + memset(&jsdd->runpool_irq.per_as_data[0], 0, + sizeof(jsdd->runpool_irq.per_as_data)); + + for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) + jsdd->js_reqs[i] = core_reqs_from_jsn_features( + kbdev->gpu_props.props.raw_props.js_features[i]); + + jsdd->init_status |= JS_DEVDATA_INIT_CONSTANTS; + + /* On error, we could continue on: providing none of the below resources + * rely on the ones above */ + + mutex_init(&jsdd->runpool_mutex); + mutex_init(&jsdd->queue_mutex); + spin_lock_init(&jsdd->runpool_irq.lock); + sema_init(&jsdd->schedule_sem, 1); + + err = kbasep_js_policy_init(kbdev); + if (!err) + jsdd->init_status |= JS_DEVDATA_INIT_POLICY; + + for (i = 0; i < kbdev->gpu_props.num_job_slots; ++i) { + INIT_LIST_HEAD(&jsdd->ctx_list_pullable[i]); + INIT_LIST_HEAD(&jsdd->ctx_list_unpullable[i]); + } + + jsdd->runpool_irq.secure_mode = false; + if (kbdev->secure_ops) { + /* Make sure secure mode is disabled */ + kbdev->secure_ops->secure_mode_disable(kbdev); + } + + /* On error, do no cleanup; this will be handled by the caller(s), since + * we've designed this resource to be safe to terminate on init-fail */ + if (jsdd->init_status != JS_DEVDATA_INIT_ALL) + return -EINVAL; + + return 0; +} + +void kbasep_js_devdata_halt(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbasep_js_devdata_term(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_devdata = &kbdev->js_data; + + if ((js_devdata->init_status & JS_DEVDATA_INIT_CONSTANTS)) { + s8 zero_ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT] = { 0, }; + /* The caller must de-register all contexts before calling this + */ + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running == 0); + KBASE_DEBUG_ASSERT(memcmp( + js_devdata->runpool_irq.ctx_attr_ref_count, + zero_ctx_attr_ref_count, + sizeof(zero_ctx_attr_ref_count)) == 0); + CSTD_UNUSED(zero_ctx_attr_ref_count); + } + if ((js_devdata->init_status & JS_DEVDATA_INIT_POLICY)) + kbasep_js_policy_term(&js_devdata->policy); + + js_devdata->init_status = JS_DEVDATA_INIT_NONE; +} + +int kbasep_js_kctx_init(struct kbase_context * const kctx) +{ + struct kbase_device *kbdev; + struct kbasep_js_kctx_info *js_kctx_info; + int err; + int i; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) + INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]); + + js_kctx_info = &kctx->jctx.sched_info; + KBASE_DEBUG_ASSERT(js_kctx_info->init_status == JS_KCTX_INIT_NONE); + + js_kctx_info->ctx.nr_jobs = 0; + atomic_set(&js_kctx_info->ctx.fault_count, 0); + js_kctx_info->ctx.is_scheduled = false; + js_kctx_info->ctx.is_dying = false; + memset(js_kctx_info->ctx.ctx_attr_ref_count, 0, + sizeof(js_kctx_info->ctx.ctx_attr_ref_count)); + + /* Initially, the context is disabled from submission until the create + * flags are set */ + js_kctx_info->ctx.flags = KBASE_CTX_FLAG_SUBMIT_DISABLED; + + js_kctx_info->init_status |= JS_KCTX_INIT_CONSTANTS; + + /* On error, we could continue on: providing none of the below resources + * rely on the ones above */ + mutex_init(&js_kctx_info->ctx.jsctx_mutex); + + init_waitqueue_head(&js_kctx_info->ctx.is_scheduled_wait); + + err = kbasep_js_policy_init_ctx(kbdev, kctx); + if (!err) + js_kctx_info->init_status |= JS_KCTX_INIT_POLICY; + + /* On error, do no cleanup; this will be handled by the caller(s), since + * we've designed this resource to be safe to terminate on init-fail */ + if (js_kctx_info->init_status != JS_KCTX_INIT_ALL) + return -EINVAL; + + return 0; +} + +void kbasep_js_kctx_term(struct kbase_context *kctx) +{ + struct kbase_device *kbdev; + struct kbasep_js_kctx_info *js_kctx_info; + union kbasep_js_policy *js_policy; + int js; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbdev = kctx->kbdev; + KBASE_DEBUG_ASSERT(kbdev != NULL); + + js_policy = &kbdev->js_data.policy; + js_kctx_info = &kctx->jctx.sched_info; + + if ((js_kctx_info->init_status & JS_KCTX_INIT_CONSTANTS)) { + /* The caller must de-register all jobs before calling this */ + KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs == 0); + } + + mutex_lock(&kbdev->js_data.queue_mutex); + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + mutex_unlock(&kbdev->js_data.queue_mutex); + + if ((js_kctx_info->init_status & JS_KCTX_INIT_POLICY)) + kbasep_js_policy_term_ctx(js_policy, kctx); + + js_kctx_info->init_status = JS_KCTX_INIT_NONE; +} + +/** + * kbase_js_ctx_list_add_pullable - Add context to the tail of the per-slot + * pullable context queue + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * If the context is on either the pullable or unpullable queues, then it is + * removed before being added to the tail. + * + * This function should be used when queueing a context for the first time, or + * re-queueing a context that has been pulled from. + * + * Caller must hold kbasep_jd_device_data.queue_mutex + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_pullable(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->js_data.queue_mutex); + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + + if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + list_add_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_pullable[js]); + + if (!kctx->slots_pullable) { + kbdev->js_data.nr_contexts_pullable++; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) + atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } + kctx->slots_pullable |= (1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_add_pullable_head - Add context to the head of the + * per-slot pullable context queue + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * If the context is on either the pullable or unpullable queues, then it is + * removed before being added to the head. + * + * This function should be used when a context has been scheduled, but no jobs + * can currently be pulled from it. + * + * Caller must hold kbasep_jd_device_data.queue_mutex + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_pullable_head(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->js_data.queue_mutex); + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + + if (!list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])) + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + list_add(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_pullable[js]); + + if (!kctx->slots_pullable) { + kbdev->js_data.nr_contexts_pullable++; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) + atomic_inc(&kbdev->js_data.nr_contexts_runnable); + } + kctx->slots_pullable |= (1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_add_unpullable - Add context to the tail of the per-slot + * unpullable context queue + * @kbdev: Device pointer + * @kctx: Context to add to queue + * @js: Job slot to use + * + * The context must already be on the per-slot pullable queue. It will be + * removed from the pullable queue before being added to the unpullable queue. + * + * This function should be used when a context has been pulled from, and there + * are no jobs remaining on the specified slot. + * + * Caller must hold kbasep_jd_device_data.queue_mutex + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_add_unpullable(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->js_data.queue_mutex); + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + + list_move_tail(&kctx->jctx.sched_info.ctx.ctx_list_entry[js], + &kbdev->js_data.ctx_list_unpullable[js]); + + if (kctx->slots_pullable == (1 << js)) { + kbdev->js_data.nr_contexts_pullable--; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } + kctx->slots_pullable &= ~(1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_remove - Remove context from the per-slot pullable or + * unpullable context queues + * @kbdev: Device pointer + * @kctx: Context to remove from queue + * @js: Job slot to use + * + * The context must already be on one of the queues. + * + * This function should be used when a context has no jobs on the GPU, and no + * jobs remaining for the specified slot. + * + * Caller must hold kbasep_jd_device_data.queue_mutex + * + * Return: true if caller should call kbase_backend_ctx_count_changed() + */ +static bool kbase_js_ctx_list_remove(struct kbase_device *kbdev, + struct kbase_context *kctx, + int js) +{ + bool ret = false; + + lockdep_assert_held(&kbdev->js_data.queue_mutex); + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + + WARN_ON(list_empty(&kctx->jctx.sched_info.ctx.ctx_list_entry[js])); + + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + if (kctx->slots_pullable == (1 << js)) { + kbdev->js_data.nr_contexts_pullable--; + ret = true; + if (!atomic_read(&kctx->atoms_pulled)) + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + } + kctx->slots_pullable &= ~(1 << js); + + return ret; +} + +/** + * kbase_js_ctx_list_pop_head - Pop the head context off the per-slot pullable + * queue. + * @kbdev: Device pointer + * @js: Job slot to use + * + * Caller must hold kbasep_jd_device_data::queue_mutex + * + * Return: Context to use for specified slot. + * NULL if no contexts present for specified slot + */ +static struct kbase_context *kbase_js_ctx_list_pop_head( + struct kbase_device *kbdev, + int js) +{ + struct kbase_context *kctx; + + lockdep_assert_held(&kbdev->js_data.queue_mutex); + + if (list_empty(&kbdev->js_data.ctx_list_pullable[js])) + return NULL; + + kctx = list_entry(kbdev->js_data.ctx_list_pullable[js].next, + struct kbase_context, + jctx.sched_info.ctx.ctx_list_entry[js]); + + list_del_init(&kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + + return kctx; +} + +/** + * kbase_js_ctx_pullable - Return if a context can be pulled from on the + * specified slot + * @kctx: Context pointer + * @js: Job slot to use + * @is_scheduled: true if the context is currently scheduled + * + * Caller must hold runpool_irq.lock + * + * Return: true if context can be pulled from on specified slot + * false otherwise + */ +static bool kbase_js_ctx_pullable(struct kbase_context *kctx, int js, + bool is_scheduled) +{ + struct kbasep_js_device_data *js_devdata; + struct kbase_jd_atom *katom; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + js_devdata = &kctx->kbdev->js_data; + + if (is_scheduled) { + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) + return false; + } + katom = jsctx_rb_peek(kctx, js); + if (!katom) + return false; /* ringbuffer empty */ + if (atomic_read(&katom->blocked)) + return false; /* next atom blocked */ + if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { + if (katom->x_pre_dep->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) + return false; + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && + kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) + return false; + } + + return true; +} + +static bool kbase_js_dep_validate(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + struct kbase_device *kbdev = kctx->kbdev; + bool ret = true; + bool has_dep = false, has_x_dep = false; + int js = kbase_js_get_slot(kbdev, katom); + int prio = katom->sched_priority; + int i; + + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep_atom = katom->dep[i].atom; + + if (dep_atom) { + int dep_js = kbase_js_get_slot(kbdev, dep_atom); + int dep_prio = dep_atom->sched_priority; + + /* Dependent atom must already have been submitted */ + if (!(dep_atom->atom_flags & + KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED)) { + ret = false; + break; + } + + /* Dependencies with different priorities can't + be represented in the ringbuffer */ + if (prio != dep_prio) { + ret = false; + break; + } + + if (js == dep_js) { + /* Only one same-slot dependency can be + * represented in the ringbuffer */ + if (has_dep) { + ret = false; + break; + } + has_dep = true; + } else { + /* Only one cross-slot dependency can be + * represented in the ringbuffer */ + if (has_x_dep) { + ret = false; + break; + } + /* Each dependee atom can only have one + * cross-slot dependency */ + if (dep_atom->x_post_dep) { + ret = false; + break; + } + /* The dependee atom can not already be in the + * HW access ringbuffer */ + if (dep_atom->gpu_rb_state != + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + ret = false; + break; + } + /* The dependee atom can not already have + * completed */ + if (dep_atom->status != + KBASE_JD_ATOM_STATE_IN_JS) { + ret = false; + break; + } + /* Cross-slot dependencies must not violate + * PRLAM-8987 affinity restrictions */ + if (kbase_hw_has_issue(kbdev, + BASE_HW_ISSUE_8987) && + (js == 2 || dep_js == 2)) { + ret = false; + break; + } + has_x_dep = true; + } + + if (kbase_jd_katom_dep_type(&katom->dep[i]) == + BASE_JD_DEP_TYPE_DATA && + js == dep_js) { + struct kbase_jd_atom *last_atom = + jsctx_rb_peek_last(kctx, js, + prio); + + /* Last atom on slot must be pre-dep for this + * atom */ + if (last_atom != dep_atom) { + ret = false; + break; + } + } + + /* Dependency can be represented in ringbuffers */ + } + } + + /* If dependencies can be represented by ringbuffer then clear them from + * atom structure */ + if (ret) { + for (i = 0; i < 2; i++) { + struct kbase_jd_atom *dep_atom = katom->dep[i].atom; + + if (dep_atom) { + int dep_js = kbase_js_get_slot(kbdev, dep_atom); + + if ((js != dep_js) && + (dep_atom->status != + KBASE_JD_ATOM_STATE_COMPLETED) + && (dep_atom->status != + KBASE_JD_ATOM_STATE_HW_COMPLETED) + && (dep_atom->status != + KBASE_JD_ATOM_STATE_UNUSED)) { + + katom->atom_flags |= + KBASE_KATOM_FLAG_X_DEP_BLOCKED; + katom->x_pre_dep = dep_atom; + dep_atom->x_post_dep = katom; + if (kbase_jd_katom_dep_type( + &katom->dep[i]) == + BASE_JD_DEP_TYPE_DATA) + katom->atom_flags |= + KBASE_KATOM_FLAG_FAIL_BLOCKER; + } + if ((kbase_jd_katom_dep_type(&katom->dep[i]) + == BASE_JD_DEP_TYPE_DATA) && + (js == dep_js)) + katom->atom_flags |= + KBASE_KATOM_FLAG_FAIL_PREV; + + list_del(&katom->dep_item[i]); + kbase_jd_katom_dep_clear(&katom->dep[i]); + } + } + } + + return ret; +} + +bool kbasep_js_add_job(struct kbase_context *kctx, + struct kbase_jd_atom *atom) +{ + unsigned long flags; + struct kbasep_js_kctx_info *js_kctx_info; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + union kbasep_js_policy *js_policy; + + bool enqueue_required = false; + bool timer_sync = false; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(atom != NULL); + lockdep_assert_held(&kctx->jctx.lock); + + kbdev = kctx->kbdev; + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + /* + * Begin Runpool transaction + */ + mutex_lock(&js_devdata->runpool_mutex); + + /* Refcount ctx.nr_jobs */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs < U32_MAX); + ++(js_kctx_info->ctx.nr_jobs); + + /* Setup any scheduling information */ + kbasep_js_clear_job_retry_submit(atom); + + /* Lock for state available during IRQ */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + if (!kbase_js_dep_validate(kctx, atom)) { + /* Dependencies could not be represented */ + --(js_kctx_info->ctx.nr_jobs); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + + goto out_unlock; + } + + KBASE_TIMELINE_ATOM_READY(kctx, kbase_jd_atom_id(kctx, atom)); + + if (kbase_js_dep_resolved_submit(kctx, atom, &enqueue_required) != 0) { + /* Ringbuffer was full (should be impossible) - fail the job */ + --(js_kctx_info->ctx.nr_jobs); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&js_devdata->runpool_mutex); + + atom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + goto out_unlock; + } + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_ADD_JOB, kctx, atom, atom->jc, + kbasep_js_trace_get_refcnt_nolock(kbdev, kctx)); + + /* Context Attribute Refcounting */ + kbasep_js_ctx_attr_ctx_retain_atom(kbdev, kctx, atom); + + if (enqueue_required) + timer_sync = kbase_js_ctx_list_add_pullable(kbdev, kctx, + atom->slot_nr); + + /* If this context is active and the atom is the first on its slot, + * kick the job manager to attempt to fast-start the atom */ + if (enqueue_required && kctx == kbdev->hwaccess.active_kctx) + kbase_jm_try_kick(kbdev, 1 << atom->slot_nr); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + /* End runpool transaction */ + + if (!js_kctx_info->ctx.is_scheduled) { + if (js_kctx_info->ctx.is_dying) { + /* A job got added while/after kbase_job_zap_context() + * was called on a non-scheduled context (e.g. KDS + * dependency resolved). Kill that job by killing the + * context. */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, + false); + } else if (js_kctx_info->ctx.nr_jobs == 1) { + /* Handle Refcount going from 0 to 1: schedule the + * context on the Policy Queue */ + KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); + dev_dbg(kbdev->dev, "JS: Enqueue Context %p", kctx); + + /* Policy Queue was updated - caller must try to + * schedule the head context */ + WARN_ON(!enqueue_required); + } + } +out_unlock: + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + mutex_unlock(&js_devdata->queue_mutex); + + return enqueue_required; +} + +void kbasep_js_remove_job(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_jd_atom *atom) +{ + struct kbasep_js_kctx_info *js_kctx_info; + struct kbasep_js_device_data *js_devdata; + union kbasep_js_policy *js_policy; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(atom != NULL); + + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + js_kctx_info = &kctx->jctx.sched_info; + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_REMOVE_JOB, kctx, atom, atom->jc, + kbasep_js_trace_get_refcnt(kbdev, kctx)); + + /* De-refcount ctx.nr_jobs */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.nr_jobs > 0); + --(js_kctx_info->ctx.nr_jobs); +} + +bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + unsigned long flags; + struct kbasep_js_atom_retained_state katom_retained_state; + struct kbasep_js_device_data *js_devdata; + bool attr_state_changed; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + + js_devdata = &kbdev->js_data; + + kbasep_js_atom_retained_state_copy(&katom_retained_state, katom); + kbasep_js_remove_job(kbdev, kctx, katom); + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* The atom has 'finished' (will not be re-run), so no need to call + * kbasep_js_has_atom_finished(). + * + * This is because it returns false for soft-stopped atoms, but we + * want to override that, because we're cancelling an atom regardless of + * whether it was soft-stopped or not */ + attr_state_changed = kbasep_js_ctx_attr_ctx_release_atom(kbdev, kctx, + &katom_retained_state); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return attr_state_changed; +} + +bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + bool result; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + + /* KBASE_TRACE_ADD_REFCOUNT( kbdev, JS_RETAIN_CTX, kctx, NULL, 0, + kbasep_js_trace_get_refcnt(kbdev, kctx)); */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + result = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return result; +} + +struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, + int as_nr) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + struct kbase_context *found_kctx = NULL; + struct kbasep_js_per_as_data *js_per_as_data; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); + js_devdata = &kbdev->js_data; + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + found_kctx = js_per_as_data->kctx; + + if (found_kctx != NULL) + ++(js_per_as_data->as_busy_refcount); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return found_kctx; +} + +struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock( + struct kbase_device *kbdev, int as_nr) +{ + struct kbasep_js_device_data *js_devdata; + struct kbase_context *found_kctx = NULL; + struct kbasep_js_per_as_data *js_per_as_data; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + js_devdata = &kbdev->js_data; + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + found_kctx = js_per_as_data->kctx; + + if (found_kctx != NULL) + ++(js_per_as_data->as_busy_refcount); + + return found_kctx; +} + +/** + * kbasep_js_release_result - Try running more jobs after releasing a context + * and/or atom + * + * @kbdev: The kbase_device to operate on + * @kctx: The kbase_context to operate on + * @katom_retained_state: Retained state from the atom + * @runpool_ctx_attr_change: True if the runpool context attributes have changed + * + * This collates a set of actions that must happen whilst + * kbasep_js_device_data.runpool_irq.lock is held. + * + * This includes running more jobs when: + * - The previously released kctx caused a ctx attribute change, + * - The released atom caused a ctx attribute change, + * - Slots were previously blocked due to affinity restrictions, + * - Submission during IRQ handling failed. + * + * Return: %KBASEP_JS_RELEASE_RESULT_SCHED_ALL if context attributes were + * changed. The caller should try scheduling all contexts + */ +static kbasep_js_release_result kbasep_js_run_jobs_after_ctx_and_atom_release( + struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state, + bool runpool_ctx_attr_change) +{ + struct kbasep_js_device_data *js_devdata; + kbasep_js_release_result result = 0; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(katom_retained_state != NULL); + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + if (js_devdata->nr_user_contexts_running != 0) { + bool retry_submit = false; + int retry_jobslot; + + if (katom_retained_state) + retry_submit = kbasep_js_get_atom_retry_submit_slot( + katom_retained_state, &retry_jobslot); + + if (runpool_ctx_attr_change || retry_submit) { + /* A change in runpool ctx attributes might mean we can + * run more jobs than before */ + result = KBASEP_JS_RELEASE_RESULT_SCHED_ALL; + + KBASE_TRACE_ADD_SLOT(kbdev, JD_DONE_TRY_RUN_NEXT_JOB, + kctx, NULL, 0u, retry_jobslot); + } + } + return result; +} + +/* + * Internal function to release the reference on a ctx and an atom's "retained + * state", only taking the runpool and as transaction mutexes + * + * This also starts more jobs running in the case of an ctx-attribute state + * change + * + * This does none of the followup actions for scheduling: + * - It does not schedule in a new context + * - It does not requeue or handle dying contexts + * + * For those tasks, just call kbasep_js_runpool_release_ctx() instead + * + * Requires: + * - Context is scheduled in, and kctx->as_nr matches kctx_as_nr + * - Context has a non-zero refcount + * - Caller holds js_kctx_info->ctx.jsctx_mutex + * - Caller holds js_devdata->runpool_mutex + */ +static kbasep_js_release_result kbasep_js_runpool_release_ctx_internal( + struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + union kbasep_js_policy *js_policy; + struct kbasep_js_per_as_data *js_per_as_data; + + kbasep_js_release_result release_result = 0u; + bool runpool_ctx_attr_change = false; + int kctx_as_nr; + struct kbase_as *current_as; + int new_ref_count; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled); + + /* kctx->as_nr and js_per_as_data are only read from here. The caller's + * js_ctx_mutex provides a barrier that ensures they are up-to-date. + * + * They will not change whilst we're reading them, because the refcount + * is non-zero (and we ASSERT on that last fact). + */ + kctx_as_nr = kctx->as_nr; + KBASE_DEBUG_ASSERT(kctx_as_nr != KBASEP_AS_NR_INVALID); + js_per_as_data = &js_devdata->runpool_irq.per_as_data[kctx_as_nr]; + KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0); + + /* + * Transaction begins on AS and runpool_irq + * + * Assert about out calling contract + */ + current_as = &kbdev->as[kctx_as_nr]; + mutex_lock(&kbdev->pm.lock); + mutex_lock(¤t_as->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + KBASE_DEBUG_ASSERT(kctx_as_nr == kctx->as_nr); + KBASE_DEBUG_ASSERT(js_per_as_data->as_busy_refcount > 0); + + /* Update refcount */ + new_ref_count = --(js_per_as_data->as_busy_refcount); + + /* Release the atom if it finished (i.e. wasn't soft-stopped) */ + if (kbasep_js_has_atom_finished(katom_retained_state)) + runpool_ctx_attr_change |= kbasep_js_ctx_attr_ctx_release_atom( + kbdev, kctx, katom_retained_state); + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_RELEASE_CTX, kctx, NULL, 0u, + new_ref_count); + + if (new_ref_count == 1 && kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_PRIVILEGED && + !kbase_pm_is_suspending(kbdev)) { + /* Context is kept scheduled into an address space even when + * there are no jobs, in this case we have to handle the + * situation where all jobs have been evicted from the GPU and + * submission is disabled. + * + * At this point we re-enable submission to allow further jobs + * to be executed + */ + kbasep_js_set_submit_allowed(js_devdata, kctx); + } + + /* Make a set of checks to see if the context should be scheduled out */ + if (new_ref_count == 0 && + (!kbasep_js_is_submit_allowed(js_devdata, kctx) || + kbdev->pm.suspending)) { + /* Last reference, and we've been told to remove this context + * from the Run Pool */ + dev_dbg(kbdev->dev, "JS: RunPool Remove Context %p because as_busy_refcount=%d, jobs=%d, allowed=%d", + kctx, new_ref_count, js_kctx_info->ctx.nr_jobs, + kbasep_js_is_submit_allowed(js_devdata, kctx)); + +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_mmu_as_released(kctx->as_nr); +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_nret_gpu_ctx(kbdev, kctx); +#endif + + kbase_backend_release_ctx_irq(kbdev, kctx); + + if (kbdev->hwaccess.active_kctx == kctx) + kbdev->hwaccess.active_kctx = NULL; + + /* Ctx Attribute handling + * + * Releasing atoms attributes must either happen before this, or + * after 'is_scheduled' is changed, otherwise we double-decount + * the attributes */ + runpool_ctx_attr_change |= + kbasep_js_ctx_attr_runpool_release_ctx(kbdev, kctx); + + /* Releasing the context and katom retained state can allow + * more jobs to run */ + release_result |= + kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, + kctx, katom_retained_state, + runpool_ctx_attr_change); + + /* + * Transaction ends on AS and runpool_irq: + * + * By this point, the AS-related data is now clear and ready + * for re-use. + * + * Since releases only occur once for each previous successful + * retain, and no more retains are allowed on this context, no + * other thread will be operating in this + * code whilst we are + */ + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + kbase_backend_release_ctx_noirq(kbdev, kctx); + + mutex_unlock(¤t_as->transaction_mutex); + mutex_unlock(&kbdev->pm.lock); + + /* Note: Don't reuse kctx_as_nr now */ + + /* Synchronize with any policy timers */ + kbase_backend_ctx_count_changed(kbdev); + + /* update book-keeping info */ + js_kctx_info->ctx.is_scheduled = false; + /* Signal any waiter that the context is not scheduled, so is + * safe for termination - once the jsctx_mutex is also dropped, + * and jobs have finished. */ + wake_up(&js_kctx_info->ctx.is_scheduled_wait); + + /* Queue an action to occur after we've dropped the lock */ + release_result |= KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED; + } else { + kbasep_js_run_jobs_after_ctx_and_atom_release(kbdev, kctx, + katom_retained_state, runpool_ctx_attr_change); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(¤t_as->transaction_mutex); + mutex_unlock(&kbdev->pm.lock); + } + + return release_result; +} + +void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_atom_retained_state katom_retained_state; + + /* Setup a dummy katom_retained_state */ + kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + + kbasep_js_runpool_release_ctx_internal(kbdev, kctx, + &katom_retained_state); +} + +void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx, bool has_pm_ref) +{ + struct kbasep_js_device_data *js_devdata; + union kbasep_js_policy *js_policy; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_policy = &kbdev->js_data.policy; + js_devdata = &kbdev->js_data; + + /* This is called if and only if you've you've detached the context from + * the Runpool or the Policy Queue, and not added it back to the Runpool + */ + KBASE_DEBUG_ASSERT(!js_kctx_info->ctx.is_scheduled); + + if (js_kctx_info->ctx.is_dying) { + /* Dying: don't requeue, but kill all jobs on the context. This + * happens asynchronously */ + dev_dbg(kbdev->dev, + "JS: ** Killing Context %p on RunPool Remove **", kctx); + kbase_js_foreach_ctx_job(kctx, &kbase_jd_cancel); + } +} + +void kbasep_js_runpool_release_ctx_and_katom_retained_state( + struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbasep_js_atom_retained_state *katom_retained_state) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + base_jd_event_code event_code; + kbasep_js_release_result release_result; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + event_code = katom_retained_state->event_code; + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + +#if 2 == MALI_INSTRUMENTATION_LEVEL + /* When any fault is detected, stop the context from submitting more + * and add a refcount to prevent the context from being removed while + * the job core dump is undergoing in the user space. Once the dump + * finish, it will release the refcount of the context and allow it + * to be removed. The test conditions are to ensure this mechanism + * will be triggered only in the cases that cmarp_event_handler + * handles. + * + * Currently, cmar event handler only handles job exceptions and + * assert the cases where the event code of the atom does not + * belong to the MMU exceptions or GPU exceptions class. In order to + * perform dump on error in those cases, changes in cmar event handler + * need to be made. + */ + if ((BASE_JD_EVENT_NOT_STARTED != event_code) && + (BASE_JD_EVENT_STOPPED != event_code) && + (BASE_JD_EVENT_ACTIVE != event_code) && + (!((event_code >= BASE_JD_EVENT_RANGE_KERNEL_ONLY_START) && + (event_code <= BASE_JD_EVENT_RANGE_KERNEL_ONLY_END))) && + ((event_code & BASE_JD_SW_EVENT) || + event_code <= BASE_JD_EVENT_UNKNOWN) && + (BASE_JD_EVENT_DONE != event_code)) { + unsigned long flags; + + atomic_inc(&js_kctx_info->ctx.fault_count); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbasep_js_clear_submit_allowed(js_devdata, kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + kbasep_js_runpool_retain_ctx(kbdev, kctx); + } +#endif /* 2 == MALI_INSTRUMENTATION_LEVEL */ + + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, + katom_retained_state); + + /* Drop the runpool mutex to allow requeing kctx */ + mutex_unlock(&js_devdata->runpool_mutex); + + if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + + /* Drop the jsctx_mutex to allow scheduling in a new context */ + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + if (release_result & KBASEP_JS_RELEASE_RESULT_SCHED_ALL) + kbase_js_sched_all(kbdev); +} + +void kbasep_js_dump_fault_term(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbasep_js_set_submit_allowed(js_devdata, kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + kbasep_js_runpool_release_ctx(kbdev, kctx); + atomic_dec(&kctx->jctx.sched_info.ctx.fault_count); +} + + +void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_atom_retained_state katom_retained_state; + + kbasep_js_atom_retained_state_init_invalid(&katom_retained_state); + + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, + &katom_retained_state); +} + +/* Variant of kbasep_js_runpool_release_ctx() that doesn't call into + * kbase_js_sched_all() */ +static void kbasep_js_runpool_release_ctx_no_schedule( + struct kbase_device *kbdev, struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + kbasep_js_release_result release_result; + struct kbasep_js_atom_retained_state katom_retained_state_struct; + struct kbasep_js_atom_retained_state *katom_retained_state = + &katom_retained_state_struct; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + kbasep_js_atom_retained_state_init_invalid(katom_retained_state); + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + release_result = kbasep_js_runpool_release_ctx_internal(kbdev, kctx, + katom_retained_state); + + /* Drop the runpool mutex to allow requeing kctx */ + mutex_unlock(&js_devdata->runpool_mutex); + if ((release_result & KBASEP_JS_RELEASE_RESULT_WAS_DESCHEDULED) != 0u) + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, true); + + /* Drop the jsctx_mutex to allow scheduling in a new context */ + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* NOTE: could return release_result if the caller would like to know + * whether it should schedule a new context, but currently no callers do + */ +} + +/** + * kbase_js_set_timeouts - update all JS timeouts with user specified data + * @kbdev: Device pointer + * + * Timeouts are specified through the 'js_timeouts' sysfs file. If a timeout is + * set to a positive number then that becomes the new value used, if a timeout + * is negative then the default is set. + */ +static void kbase_js_set_timeouts(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_data = &kbdev->js_data; + + if (kbdev->js_scheduling_period_ns < 0) + js_data->scheduling_period_ns = DEFAULT_JS_SCHEDULING_PERIOD_NS; + else if (kbdev->js_scheduling_period_ns > 0) + js_data->scheduling_period_ns = kbdev->js_scheduling_period_ns; + + if (kbdev->js_soft_stop_ticks < 0) + js_data->soft_stop_ticks = DEFAULT_JS_SOFT_STOP_TICKS; + else if (kbdev->js_soft_stop_ticks > 0) + js_data->soft_stop_ticks = kbdev->js_soft_stop_ticks; + + if (kbdev->js_soft_stop_ticks_cl < 0) + js_data->soft_stop_ticks_cl = DEFAULT_JS_SOFT_STOP_TICKS_CL; + else if (kbdev->js_soft_stop_ticks_cl > 0) + js_data->soft_stop_ticks_cl = kbdev->js_soft_stop_ticks_cl; + + if (kbdev->js_hard_stop_ticks_ss < 0) { + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) + js_data->hard_stop_ticks_ss = + DEFAULT_JS_HARD_STOP_TICKS_SS_8408; + else + js_data->hard_stop_ticks_ss = + DEFAULT_JS_HARD_STOP_TICKS_SS; + } else if (kbdev->js_hard_stop_ticks_ss > 0) { + js_data->hard_stop_ticks_ss = kbdev->js_hard_stop_ticks_ss; + } + + if (kbdev->js_hard_stop_ticks_cl < 0) + js_data->hard_stop_ticks_cl = DEFAULT_JS_HARD_STOP_TICKS_CL; + else if (kbdev->js_hard_stop_ticks_cl > 0) + js_data->hard_stop_ticks_cl = kbdev->js_hard_stop_ticks_cl; + + if (kbdev->js_hard_stop_ticks_dumping < 0) + js_data->hard_stop_ticks_dumping = + DEFAULT_JS_HARD_STOP_TICKS_DUMPING; + else if (kbdev->js_hard_stop_ticks_dumping > 0) + js_data->hard_stop_ticks_dumping = + kbdev->js_hard_stop_ticks_dumping; + + if (kbdev->js_reset_ticks_ss < 0) { + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8408)) + js_data->gpu_reset_ticks_ss = + DEFAULT_JS_RESET_TICKS_SS_8408; + else + js_data->gpu_reset_ticks_ss = DEFAULT_JS_RESET_TICKS_SS; + } else if (kbdev->js_reset_ticks_ss > 0) { + js_data->gpu_reset_ticks_ss = kbdev->js_reset_ticks_ss; + } + + if (kbdev->js_reset_ticks_cl < 0) + js_data->gpu_reset_ticks_cl = DEFAULT_JS_RESET_TICKS_CL; + else if (kbdev->js_reset_ticks_cl > 0) + js_data->gpu_reset_ticks_cl = kbdev->js_reset_ticks_cl; + + if (kbdev->js_reset_ticks_dumping < 0) + js_data->gpu_reset_ticks_dumping = + DEFAULT_JS_RESET_TICKS_DUMPING; + else if (kbdev->js_reset_ticks_dumping > 0) + js_data->gpu_reset_ticks_dumping = + kbdev->js_reset_ticks_dumping; +} + +static bool kbasep_js_schedule_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + union kbasep_js_policy *js_policy; + struct kbase_as *new_address_space = NULL; + unsigned long flags; + bool kctx_suspended = false; + int as_nr; + + js_devdata = &kbdev->js_data; + js_policy = &kbdev->js_data.policy; + js_kctx_info = &kctx->jctx.sched_info; + + /* Pick available address space for this context */ + as_nr = kbase_backend_find_free_address_space(kbdev, kctx); + + if (as_nr == KBASEP_AS_NR_INVALID) + return false; /* No address spaces currently available */ + + new_address_space = &kbdev->as[as_nr]; + + /* + * Atomic transaction on the Context and Run Pool begins + */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + + /* Check to see if context is dying due to kbase_job_zap_context() */ + if (js_kctx_info->ctx.is_dying) { + /* Roll back the transaction so far and return */ + kbase_backend_release_free_address_space(kbdev, as_nr); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + return false; + } + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_TRY_SCHEDULE_HEAD_CTX, kctx, NULL, + 0u, + kbasep_js_trace_get_refcnt(kbdev, kctx)); + + if (js_devdata->nr_user_contexts_running == 0 && + kbdev->js_timeouts_updated) { + /* Only when there are no other contexts submitting jobs: + * Latch in run-time job scheduler timeouts that were set + * through js_timeouts sysfs file */ + kbase_js_set_timeouts(kbdev); + + kbdev->js_timeouts_updated = false; + } + + js_kctx_info->ctx.is_scheduled = true; + + mutex_lock(&new_address_space->transaction_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* Assign context to previously chosen address space */ + if (!kbase_backend_use_ctx(kbdev, kctx, as_nr)) { + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&new_address_space->transaction_mutex); + /* If address space is not pending, then kbase_backend_use_ctx() + * failed. Roll back the transaction so far and return */ + if (!kctx->as_pending) { + js_kctx_info->ctx.is_scheduled = false; + + kbase_backend_release_free_address_space(kbdev, as_nr); + } + + mutex_unlock(&js_devdata->runpool_mutex); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + return false; + } + + kbdev->hwaccess.active_kctx = kctx; + +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_mmu_as_in_use(kctx->as_nr); +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_ret_gpu_ctx(kbdev, kctx); +#endif + + /* Cause any future waiter-on-termination to wait until the context is + * descheduled */ + wake_up(&js_kctx_info->ctx.is_scheduled_wait); + + /* Re-check for suspending: a suspend could've occurred, and all the + * contexts could've been removed from the runpool before we took this + * lock. In this case, we don't want to allow this context to run jobs, + * we just want it out immediately. + * + * The DMB required to read the suspend flag was issued recently as part + * of the runpool_irq locking. If a suspend occurs *after* that lock was + * taken (i.e. this condition doesn't execute), then the + * kbasep_js_suspend() code will cleanup this context instead (by virtue + * of it being called strictly after the suspend flag is set, and will + * wait for this lock to drop) */ + if (kbase_pm_is_suspending(kbdev)) { + /* Cause it to leave at some later point */ + bool retained; + + retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + KBASE_DEBUG_ASSERT(retained); + + kbasep_js_clear_submit_allowed(js_devdata, kctx); + kctx_suspended = true; + } + + /* Transaction complete */ + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&new_address_space->transaction_mutex); + + /* Synchronize with any policy timers */ + kbase_backend_ctx_count_changed(kbdev); + + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + /* Note: after this point, the context could potentially get scheduled + * out immediately */ + + if (kctx_suspended) { + /* Finishing forcing out the context due to a suspend. Use a + * variant of kbasep_js_runpool_release_ctx() that doesn't + * schedule a new context, to prevent a risk of recursion back + * into this function */ + kbasep_js_runpool_release_ctx_no_schedule(kbdev, kctx); + return false; + } + return true; +} + +static bool kbase_js_use_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + unsigned long flags; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + if (kctx->as_pending) { + /* Context waiting for AS to be assigned */ + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + return false; + } + if (kbase_backend_use_ctx_sched(kbdev, kctx)) { + /* Context already has ASID - mark as active */ + kbdev->hwaccess.active_kctx = kctx; + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + return true; /* Context already scheduled */ + } + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return kbasep_js_schedule_ctx(kbdev, kctx); +} + +void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_kctx_info *js_kctx_info; + struct kbasep_js_device_data *js_devdata; + bool is_scheduled; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + /* This must never be attempted whilst suspending - i.e. it should only + * happen in response to a syscall from a user-space thread */ + BUG_ON(kbase_pm_is_suspending(kbdev)); + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + /* Mark the context as privileged */ + js_kctx_info->ctx.flags |= KBASE_CTX_FLAG_PRIVILEGED; + + is_scheduled = js_kctx_info->ctx.is_scheduled; + if (!is_scheduled) { + /* Add the context to the pullable list */ + if (kbase_js_ctx_list_add_pullable(kbdev, kctx, 0)) + kbase_js_sync_timers(kbdev); + + /* Fast-starting requires the jsctx_mutex to be dropped, + * because it works on multiple ctxs */ + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + /* Try to schedule the context in */ + kbase_js_sched_all(kbdev); + + /* Wait for the context to be scheduled in */ + wait_event(kctx->jctx.sched_info.ctx.is_scheduled_wait, + kctx->jctx.sched_info.ctx.is_scheduled); + } else { + /* Already scheduled in - We need to retain it to keep the + * corresponding address space */ + kbasep_js_runpool_retain_ctx(kbdev, kctx); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + } +} + +void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_kctx_info *js_kctx_info; + bool pending; + + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + + /* We don't need to use the address space anymore */ + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + js_kctx_info->ctx.flags &= (~KBASE_CTX_FLAG_PRIVILEGED); + pending = kctx->as_pending; + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + + /* Release the context - it will be scheduled out if there is no + * pending job */ + if (!pending) + kbasep_js_runpool_release_ctx(kbdev, kctx); + + kbase_js_sched_all(kbdev); +} + +void kbasep_js_suspend(struct kbase_device *kbdev) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + int i; + u16 retained = 0u; + int nr_privileged_ctx = 0; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(kbase_pm_is_suspending(kbdev)); + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + /* Prevent all contexts from submitting */ + js_devdata->runpool_irq.submit_allowed = 0; + + /* Retain each of the contexts, so we can cause it to leave even if it + * had no refcount to begin with */ + for (i = BASE_MAX_NR_AS - 1; i >= 0; --i) { + struct kbasep_js_per_as_data *js_per_as_data = + &js_devdata->runpool_irq.per_as_data[i]; + struct kbase_context *kctx = js_per_as_data->kctx; + + retained = retained << 1; + + if (kctx) { + ++(js_per_as_data->as_busy_refcount); + retained |= 1u; + /* We can only cope with up to 1 privileged context - + * the instrumented context. It'll be suspended by + * disabling instrumentation */ + if (kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_PRIVILEGED) + KBASE_DEBUG_ASSERT(++nr_privileged_ctx == 1); + } + } + CSTD_UNUSED(nr_privileged_ctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + /* De-ref the previous retain to ensure each context gets pulled out + * sometime later. */ + for (i = 0; + i < BASE_MAX_NR_AS; + ++i, retained = retained >> 1) { + struct kbasep_js_per_as_data *js_per_as_data = + &js_devdata->runpool_irq.per_as_data[i]; + struct kbase_context *kctx = js_per_as_data->kctx; + + if (retained & 1u) + kbasep_js_runpool_release_ctx(kbdev, kctx); + } + + /* Caller must wait for all Power Manager active references to be + * dropped */ +} + +void kbasep_js_resume(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata; + int js; + + KBASE_DEBUG_ASSERT(kbdev); + js_devdata = &kbdev->js_data; + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + + mutex_lock(&js_devdata->queue_mutex); + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + struct kbase_context *kctx, *n; + + list_for_each_entry_safe(kctx, n, + &kbdev->js_data.ctx_list_unpullable[js], + jctx.sched_info.ctx.ctx_list_entry[js]) { + struct kbasep_js_kctx_info *js_kctx_info; + unsigned long flags; + bool timer_sync = false; + + js_kctx_info = &kctx->jctx.sched_info; + + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + if (!js_kctx_info->ctx.is_scheduled && + kbase_js_ctx_pullable(kctx, js, false)) + timer_sync = kbase_js_ctx_list_add_pullable( + kbdev, kctx, js); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + } + } + mutex_unlock(&js_devdata->queue_mutex); + + /* Restart atom processing */ + kbase_js_sched_all(kbdev); + + /* JS Resume complete */ +} + +bool kbase_js_is_atom_valid(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + if ((katom->core_req & BASE_JD_REQ_FS) && + (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | + BASE_JD_REQ_T))) + return false; + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987) && + (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) && + (katom->core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_T))) + return false; + + return true; +} + +static int kbase_js_get_slot(struct kbase_device *kbdev, + struct kbase_jd_atom *katom) +{ + if (katom->core_req & BASE_JD_REQ_FS) + return 0; + + if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { + if (katom->device_nr == 1 && + kbdev->gpu_props.num_core_groups == 2) + return 2; + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987)) + return 2; + } + + return 1; +} + +int kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom, + bool *enqueue_required) +{ + katom->slot_nr = kbase_js_get_slot(kctx->kbdev, katom); + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + /* If slot will transition from unpullable to pullable then add to + * pullable list */ + if (jsctx_rb_is_empty(kctx, katom->slot_nr)) { + *enqueue_required = true; + } else { + *enqueue_required = false; + /* Check if there are lower priority jobs to soft stop */ + kbase_job_slot_ctx_priority_check_locked(kctx, katom); + } + + /* Add atom to ring buffer. */ + if (unlikely(jsctx_rb_add_atom(kctx, katom))) { + /* The ring buffer is full. This should be impossible as the + * job dispatcher can not submit enough atoms to exceed the + * ring buffer size. Fail the job. + */ + WARN(1, "Job submit while JSCTX ringbuffer already full\n"); + return -EINVAL; + } + + katom->atom_flags |= KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; + + return 0; +} + +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js) +{ + struct kbase_jd_atom *katom; + struct kbasep_js_device_data *js_devdata; + int pulled; + + KBASE_DEBUG_ASSERT(kctx); + + js_devdata = &kctx->kbdev->js_data; + lockdep_assert_held(&js_devdata->runpool_irq.lock); + + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) + return NULL; + if (kbase_pm_is_suspending(kctx->kbdev)) + return NULL; + + katom = jsctx_rb_peek(kctx, js); + if (!katom) + return NULL; + + if (atomic_read(&katom->blocked)) + return NULL; + + /* Due to ordering restrictions when unpulling atoms on failure, we do + * not allow multiple runs of fail-dep atoms from the same context to be + * present on the same slot */ + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) && + atomic_read(&kctx->atoms_pulled_slot[js])) { + struct kbase_jd_atom *prev_atom = + kbase_backend_inspect_tail(kctx->kbdev, js); + + if (prev_atom && prev_atom->kctx != kctx) + return NULL; + } + + if (katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) { + if (katom->x_pre_dep->gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) + return NULL; + if ((katom->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) && + kbase_backend_nr_atoms_on_slot(kctx->kbdev, js)) + return NULL; + } + + kctx->pulled = true; + pulled = atomic_inc_return(&kctx->atoms_pulled); + if (pulled == 1 && !kctx->slots_pullable) + atomic_inc(&kctx->kbdev->js_data.nr_contexts_runnable); + atomic_inc(&kctx->atoms_pulled_slot[katom->slot_nr]); + jsctx_rb_pull(kctx, katom); + + kbasep_js_runpool_retain_ctx_nolock(kctx->kbdev, kctx); + katom->atom_flags |= KBASE_KATOM_FLAG_HOLDING_CTX_REF; + + katom->sched_info.cfs.ticks = 0; + + return katom; +} + + +static void js_return_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom = container_of(data, struct kbase_jd_atom, + work); + struct kbase_context *kctx = katom->kctx; + struct kbase_device *kbdev = kctx->kbdev; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + struct kbasep_js_atom_retained_state retained_state; + int js = katom->slot_nr; + bool timer_sync = false; + bool context_idle = false; + unsigned long flags; + + kbase_backend_complete_wq(kbdev, katom); + + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8316)) + kbase_as_poking_timer_release_atom(kbdev, kctx, katom); + + kbasep_js_atom_retained_state_copy(&retained_state, katom); + + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + + atomic_dec(&kctx->atoms_pulled); + atomic_dec(&kctx->atoms_pulled_slot[js]); + + atomic_dec(&katom->blocked); + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + if (!atomic_read(&kctx->atoms_pulled_slot[js]) && + jsctx_rb_is_empty(kctx, js)) + timer_sync |= kbase_js_ctx_list_remove(kbdev, kctx, js); + + if (!atomic_read(&kctx->atoms_pulled)) { + if (!kctx->slots_pullable) + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + + if (kctx->as_nr != KBASEP_AS_NR_INVALID && + !js_kctx_info->ctx.is_dying) { + int num_slots = kbdev->gpu_props.num_job_slots; + int slot; + + if (!kbasep_js_is_submit_allowed(js_devdata, kctx)) + kbasep_js_set_submit_allowed(js_devdata, kctx); + + for (slot = 0; slot < num_slots; slot++) { + if (kbase_js_ctx_pullable(kctx, slot, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable( + kbdev, kctx, slot); + } + } + + kbase_jm_idle_ctx(kbdev, kctx); + + context_idle = true; + } + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + if (context_idle) + kbase_pm_context_idle(kbdev); + + if (timer_sync) + kbase_js_sync_timers(kbdev); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + katom->atom_flags &= ~KBASE_KATOM_FLAG_HOLDING_CTX_REF; + kbasep_js_runpool_release_ctx_and_katom_retained_state(kbdev, kctx, + &retained_state); + + kbase_js_sched_all(kbdev); +} + +void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + jsctx_rb_unpull(kctx, katom); + + WARN_ON(work_pending(&katom->work)); + + /* Block re-submission until workqueue has run */ + atomic_inc(&katom->blocked); + + kbase_job_check_leave_disjoint(kctx->kbdev, katom); + + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, js_return_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +static bool kbase_js_evict_atom(struct kbase_context *kctx, + struct kbase_jd_atom *katom_evict, + struct kbase_jd_atom *start_katom, + struct kbase_jd_atom *head_katom, + struct list_head *evict_list, + struct jsctx_rb *rb, int idx) +{ + struct kbase_jd_atom *x_dep = katom_evict->x_post_dep; + + if (!(katom_evict->atom_flags & KBASE_KATOM_FLAG_FAIL_PREV) && + katom_evict != start_katom) + return false; + + if (katom_evict->gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB) { + WARN_ON(katom_evict->gpu_rb_state != + KBASE_ATOM_GPU_RB_RETURN_TO_JS); + WARN_ON(katom_evict->event_code != head_katom->event_code); + + return false; + } + + if (katom_evict->status == KBASE_JD_ATOM_STATE_HW_COMPLETED && + katom_evict != head_katom) + return false; + + /* Evict cross dependency if present */ + if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) + && (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) + list_add_tail(&x_dep->dep_item[0], evict_list); + + /* If cross dependency is present and does not have a data dependency + * then unblock */ + if (x_dep && (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) + && !(x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)) + x_dep->atom_flags &= ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; + + if (katom_evict != head_katom) { + rb->entries[idx & JSCTX_RB_MASK].atom_id = + KBASEP_ATOM_ID_INVALID; + + katom_evict->event_code = head_katom->event_code; + katom_evict->atom_flags &= + ~KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED; + + if (katom_evict->atom_flags & KBASE_KATOM_FLAG_HOLDING_CTX_REF) + kbase_jd_done(katom_evict, katom_evict->slot_nr, NULL, + 0); + else + kbase_jd_evict(kctx->kbdev, katom_evict); + } + + return true; +} + +/** + * kbase_js_evict_deps - Evict dependencies + * @kctx: Context pointer + * @head_katom: Pointer to the atom to evict + * + * Remove all post dependencies of an atom from the context ringbuffers. + * + * The original atom's event_code will be propogated to all dependent atoms. + * + * Context: Caller must hold both jctx and HW access locks + */ +static void kbase_js_evict_deps(struct kbase_context *kctx, + struct kbase_jd_atom *head_katom) +{ + struct list_head evict_list; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + INIT_LIST_HEAD(&evict_list); + + list_add_tail(&head_katom->dep_item[0], &evict_list); + + while (!list_empty(&evict_list)) { + struct kbase_jd_atom *start_katom; + + start_katom = list_entry(evict_list.prev, struct kbase_jd_atom, + dep_item[0]); + list_del(evict_list.prev); + + jsctx_rb_evict(kctx, start_katom, head_katom, &evict_list); + } +} + +/** + * kbase_js_compact - Compact JSCTX ringbuffers + * @kctx: Context pointer + * + * Compact the JSCTX ringbuffers, removing any NULL entries + * + * Context: Caller must hold both jctx and HW access locks + */ +static void kbase_js_compact(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + int js; + + lockdep_assert_held(&kctx->jctx.sched_info.ctx.jsctx_mutex); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) + jsctx_rb_compact(kctx, js); +} + +void kbase_js_complete_atom_wq(struct kbase_context *kctx, + struct kbase_jd_atom *katom) +{ + struct kbasep_js_kctx_info *js_kctx_info; + struct kbasep_js_device_data *js_devdata; + struct kbase_device *kbdev; + unsigned long flags; + bool timer_sync = false; + int atom_slot; + bool context_idle = false; + + kbdev = kctx->kbdev; + atom_slot = katom->slot_nr; + + js_kctx_info = &kctx->jctx.sched_info; + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + + mutex_lock(&js_devdata->runpool_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + if (katom->atom_flags & KBASE_KATOM_FLAG_JSCTX_RB_SUBMITTED) { + if (katom->event_code != BASE_JD_EVENT_DONE) + kbase_js_evict_deps(kctx, katom); + + jsctx_rb_remove(kctx, katom); + + context_idle = !atomic_dec_return(&kctx->atoms_pulled); + atomic_dec(&kctx->atoms_pulled_slot[atom_slot]); + + if (!atomic_read(&kctx->atoms_pulled) && !kctx->slots_pullable) + atomic_dec(&kbdev->js_data.nr_contexts_runnable); + + if (katom->event_code != BASE_JD_EVENT_DONE) + kbase_js_compact(kctx); + } + + if (!atomic_read(&kctx->atoms_pulled_slot[atom_slot]) && + jsctx_rb_is_empty(kctx, atom_slot)) + timer_sync |= kbase_js_ctx_list_remove(kctx->kbdev, kctx, + atom_slot); + + /* + * If submission is disabled on this context (most likely due to an + * atom failure) and there are now no atoms left in the system then + * re-enable submission so that context can be scheduled again. + */ + if (!kbasep_js_is_submit_allowed(js_devdata, kctx) && + !atomic_read(&kctx->atoms_pulled) && + !js_kctx_info->ctx.is_dying) { + int js; + + kbasep_js_set_submit_allowed(js_devdata, kctx); + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= kbase_js_ctx_list_add_pullable( + kbdev, kctx, js); + } + } else if (katom->x_post_dep && + kbasep_js_is_submit_allowed(js_devdata, kctx)) { + int js; + + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= kbase_js_ctx_list_add_pullable( + kbdev, kctx, js); + } + } + + if (context_idle) + kbase_jm_idle_ctx(kbdev, kctx); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + if (timer_sync) + kbase_backend_ctx_count_changed(kbdev); + mutex_unlock(&js_devdata->runpool_mutex); + + if (context_idle) + kbase_pm_context_idle(kbdev); +} + +void kbase_js_complete_atom(struct kbase_jd_atom *katom, ktime_t *end_timestamp) +{ + u64 microseconds_spent = 0; + struct kbase_device *kbdev; + struct kbase_context *kctx = katom->kctx; + union kbasep_js_policy *js_policy; + struct kbasep_js_device_data *js_devdata; + + kbdev = kctx->kbdev; + + js_policy = &kbdev->js_data.policy; + js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + + katom->status = KBASE_JD_ATOM_STATE_HW_COMPLETED; + +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_job_slots_event(GATOR_MAKE_EVENT(GATOR_JOB_SLOT_STOP, + katom->slot_nr), NULL, 0); +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_tl_nret_atom_lpu( + katom, + &kbdev->gpu_props.props.raw_props.js_features[ + katom->slot_nr]); +#endif + /* Calculate the job's time used */ + if (end_timestamp != NULL) { + /* Only calculating it for jobs that really run on the HW (e.g. + * removed from next jobs never actually ran, so really did take + * zero time) */ + ktime_t tick_diff = ktime_sub(*end_timestamp, + katom->start_timestamp); + + microseconds_spent = ktime_to_ns(tick_diff); + + do_div(microseconds_spent, 1000); + + /* Round up time spent to the minimum timer resolution */ + if (microseconds_spent < KBASEP_JS_TICK_RESOLUTION_US) + microseconds_spent = KBASEP_JS_TICK_RESOLUTION_US; + } + + /* Log the result of the job (completion status, and time spent). */ + kbasep_js_policy_log_job_result(js_policy, katom, microseconds_spent); + + kbase_jd_done(katom, katom->slot_nr, end_timestamp, 0); + + /* Unblock cross dependency if present */ + if (katom->x_post_dep && (katom->event_code == BASE_JD_EVENT_DONE || + !(katom->x_post_dep->atom_flags & + KBASE_KATOM_FLAG_FAIL_BLOCKER))) + katom->x_post_dep->atom_flags &= + ~KBASE_KATOM_FLAG_X_DEP_BLOCKED; +} + +void kbase_js_sched(struct kbase_device *kbdev, int js_mask) +{ + struct kbasep_js_device_data *js_devdata; + union kbasep_js_policy *js_policy; + bool timer_sync = false; + + js_devdata = &kbdev->js_data; + js_policy = &js_devdata->policy; + + down(&js_devdata->schedule_sem); + mutex_lock(&js_devdata->queue_mutex); + + while (js_mask) { + int js; + + js = ffs(js_mask) - 1; + + while (1) { + struct kbase_context *kctx; + unsigned long flags; + bool context_idle = false; + + kctx = kbase_js_ctx_list_pop_head(kbdev, js); + + if (!kctx) { + js_mask &= ~(1 << js); + break; /* No contexts on pullable list */ + } + + if (!atomic_read(&kctx->atoms_pulled)) { + context_idle = true; + + if (kbase_pm_context_active_handle_suspend( + kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) { + /* Suspend pending - return context to + * queue and stop scheduling */ + mutex_lock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + if (kbase_js_ctx_list_add_pullable_head( + kctx->kbdev, kctx, js)) + kbase_js_sync_timers(kbdev); + mutex_unlock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + up(&js_devdata->schedule_sem); + return; + } + } + + if (!kbase_js_use_ctx(kbdev, kctx)) { + mutex_lock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + /* Context can not be used at this time */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, + flags); + if (kctx->as_pending || + kbase_js_ctx_pullable(kctx, js, false) + || (kctx->jctx.sched_info.ctx.flags & + KBASE_CTX_FLAG_PRIVILEGED)) + timer_sync |= + kbase_js_ctx_list_add_pullable_head( + kctx->kbdev, kctx, js); + else + timer_sync |= + kbase_js_ctx_list_add_unpullable( + kctx->kbdev, kctx, js); + spin_unlock_irqrestore( + &js_devdata->runpool_irq.lock, flags); + mutex_unlock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + if (context_idle) + kbase_pm_context_idle(kbdev); + + /* No more jobs can be submitted on this slot */ + js_mask &= ~(1 << js); + break; + } + mutex_lock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + kctx->pulled = false; + + if (!kbase_jm_kick(kbdev, 1 << js)) + /* No more jobs can be submitted on this slot */ + js_mask &= ~(1 << js); + + if (!kctx->pulled) { + /* Failed to pull jobs - push to head of list */ + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= + kbase_js_ctx_list_add_pullable_head( + kctx->kbdev, + kctx, js); + else + timer_sync |= + kbase_js_ctx_list_add_unpullable( + kctx->kbdev, + kctx, js); + + if (context_idle) { + kbase_jm_idle_ctx(kbdev, kctx); + spin_unlock_irqrestore( + &js_devdata->runpool_irq.lock, + flags); + kbase_pm_context_idle(kbdev); + } else { + spin_unlock_irqrestore( + &js_devdata->runpool_irq.lock, + flags); + } + mutex_unlock( + &kctx->jctx.sched_info.ctx.jsctx_mutex); + + js_mask &= ~(1 << js); + break; /* Could not run atoms on this slot */ + } + + /* Push to back of list */ + if (kbase_js_ctx_pullable(kctx, js, true)) + timer_sync |= kbase_js_ctx_list_add_pullable( + kctx->kbdev, kctx, js); + else + timer_sync |= kbase_js_ctx_list_add_unpullable( + kctx->kbdev, kctx, js); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, + flags); + mutex_unlock(&kctx->jctx.sched_info.ctx.jsctx_mutex); + } + } + + if (timer_sync) + kbase_js_sync_timers(kbdev); + + mutex_unlock(&js_devdata->queue_mutex); + up(&js_devdata->schedule_sem); +} + +void kbase_js_zap_context(struct kbase_context *kctx) +{ + struct kbase_device *kbdev = kctx->kbdev; + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + struct kbasep_js_kctx_info *js_kctx_info = &kctx->jctx.sched_info; + int js; + + /* + * Critical assumption: No more submission is possible outside of the + * workqueue. This is because the OS *must* prevent U/K calls (IOCTLs) + * whilst the struct kbase_context is terminating. + */ + + /* First, atomically do the following: + * - mark the context as dying + * - try to evict it from the policy queue */ + mutex_lock(&js_devdata->queue_mutex); + mutex_lock(&js_kctx_info->ctx.jsctx_mutex); + js_kctx_info->ctx.is_dying = true; + + dev_dbg(kbdev->dev, "Zap: Try Evict Ctx %p", kctx); + + /* + * At this point we know: + * - If eviction succeeded, it was in the policy queue, but now no + * longer is + * - We must cancel the jobs here. No Power Manager active reference to + * release. + * - This happens asynchronously - kbase_jd_zap_context() will wait for + * those jobs to be killed. + * - If eviction failed, then it wasn't in the policy queue. It is one + * of the following: + * - a. it didn't have any jobs, and so is not in the Policy Queue or + * the Run Pool (not scheduled) + * - Hence, no more work required to cancel jobs. No Power Manager + * active reference to release. + * - b. it was in the middle of a scheduling transaction (and thus must + * have at least 1 job). This can happen from a syscall or a + * kernel thread. We still hold the jsctx_mutex, and so the thread + * must be waiting inside kbasep_js_try_schedule_head_ctx(), + * before checking whether the runpool is full. That thread will + * continue after we drop the mutex, and will notice the context + * is dying. It will rollback the transaction, killing all jobs at + * the same time. kbase_jd_zap_context() will wait for those jobs + * to be killed. + * - Hence, no more work required to cancel jobs, or to release the + * Power Manager active reference. + * - c. it is scheduled, and may or may not be running jobs + * - We must cause it to leave the runpool by stopping it from + * submitting any more jobs. When it finally does leave, + * kbasep_js_runpool_requeue_or_kill_ctx() will kill all remaining jobs + * (because it is dying), release the Power Manager active reference, + * and will not requeue the context in the policy queue. + * kbase_jd_zap_context() will wait for those jobs to be killed. + * - Hence, work required just to make it leave the runpool. Cancelling + * jobs and releasing the Power manager active reference will be + * handled when it leaves the runpool. + */ + if (!js_kctx_info->ctx.is_scheduled) { + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) { + if (!list_empty( + &kctx->jctx.sched_info.ctx.ctx_list_entry[js])) + list_del_init( + &kctx->jctx.sched_info.ctx.ctx_list_entry[js]); + } + + /* The following events require us to kill off remaining jobs + * and update PM book-keeping: + * - we evicted it correctly (it must have jobs to be in the + * Policy Queue) + * + * These events need no action, but take this path anyway: + * - Case a: it didn't have any jobs, and was never in the Queue + * - Case b: scheduling transaction will be partially rolled- + * back (this already cancels the jobs) + */ + + KBASE_TRACE_ADD(kbdev, JM_ZAP_NON_SCHEDULED, kctx, NULL, 0u, + js_kctx_info->ctx.is_scheduled); + + dev_dbg(kbdev->dev, "Zap: Ctx %p scheduled=0", kctx); + + /* Only cancel jobs when we evicted from the policy + * queue. No Power Manager active reference was held. + * + * Having is_dying set ensures that this kills, and + * doesn't requeue */ + kbasep_js_runpool_requeue_or_kill_ctx(kbdev, kctx, false); + + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + } else { + unsigned long flags; + bool was_retained; + + /* Case c: didn't evict, but it is scheduled - it's in the Run + * Pool */ + KBASE_TRACE_ADD(kbdev, JM_ZAP_SCHEDULED, kctx, NULL, 0u, + js_kctx_info->ctx.is_scheduled); + dev_dbg(kbdev->dev, "Zap: Ctx %p is in RunPool", kctx); + + /* Disable the ctx from submitting any more jobs */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + /* Retain and (later) release the context whilst it is is now + * disallowed from submitting jobs - ensures that someone + * somewhere will be removing the context later on */ + was_retained = kbasep_js_runpool_retain_ctx_nolock(kbdev, kctx); + + /* Since it's scheduled and we have the jsctx_mutex, it must be + * retained successfully */ + KBASE_DEBUG_ASSERT(was_retained); + + dev_dbg(kbdev->dev, "Zap: Ctx %p Kill Any Running jobs", kctx); + + /* Cancel any remaining running jobs for this kctx - if any. + * Submit is disallowed which takes effect immediately, so no + * more new jobs will appear after we do this. */ + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) + kbase_job_slot_hardstop(kctx, js, NULL); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + mutex_unlock(&js_kctx_info->ctx.jsctx_mutex); + mutex_unlock(&js_devdata->queue_mutex); + + dev_dbg(kbdev->dev, "Zap: Ctx %p Release (may or may not schedule out immediately)", + kctx); + + kbasep_js_runpool_release_ctx(kbdev, kctx); + } + + KBASE_TRACE_ADD(kbdev, JM_ZAP_DONE, kctx, NULL, 0u, 0u); + + /* After this, you must wait on both the + * kbase_jd_context::zero_jobs_wait and the + * kbasep_js_kctx_info::ctx::is_scheduled_waitq - to wait for the jobs + * to be destroyed, and the context to be de-scheduled (if it was on the + * runpool). + * + * kbase_jd_zap_context() will do this. */ +} + +static inline int trace_get_refcnt(struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + int as_nr; + int refcnt = 0; + + js_devdata = &kbdev->js_data; + + as_nr = kctx->as_nr; + if (as_nr != KBASEP_AS_NR_INVALID) { + struct kbasep_js_per_as_data *js_per_as_data; + + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + refcnt = js_per_as_data->as_busy_refcount; + } + + return refcnt; +} + +/** + * kbase_js_foreach_ctx_job(): - Call a function on all jobs in context + * @kctx: Pointer to context. + * @callback: Pointer to function to call for each job. + * + * Call a function on all jobs belonging to a non-queued, non-running + * context, and detach the jobs from the context as it goes. + * + * Due to the locks that might be held at the time of the call, the callback + * may need to defer work on a workqueue to complete its actions (e.g. when + * cancelling jobs) + * + * Atoms will be removed from the queue, so this must only be called when + * cancelling jobs (which occurs as part of context destruction). + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + */ +static void kbase_js_foreach_ctx_job(struct kbase_context *kctx, + kbasep_js_policy_ctx_job_cb callback) +{ + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + unsigned long flags; + u32 js; + + kbdev = kctx->kbdev; + + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_FOREACH_CTX_JOBS, kctx, NULL, + 0u, trace_get_refcnt(kbdev, kctx)); + + /* Invoke callback on jobs on each slot in turn */ + for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) + jsctx_rb_foreach(kctx, js, callback); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_js.h b/drivers/gpu/arm/midgard/mali_kbase_js.h new file mode 100644 index 00000000000..79e7705932f --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js.h @@ -0,0 +1,1054 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js.h + * Job Scheduler APIs. + */ + +#ifndef _KBASE_JS_H_ +#define _KBASE_JS_H_ + +#include "mali_kbase_js_defs.h" +#include "mali_kbase_js_policy.h" +#include "mali_kbase_defs.h" +#include "mali_kbase_debug.h" + +#include "mali_kbase_js_ctx_attr.h" + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js Job Scheduler Internal APIs + * @{ + * + * These APIs are Internal to KBase and are available for use by the + * @ref kbase_js_policy "Job Scheduler Policy APIs" + */ + +/** + * @brief Initialize the Job Scheduler + * + * The struct kbasep_js_device_data sub-structure of \a kbdev must be zero + * initialized before passing to the kbasep_js_devdata_init() function. This is + * to give efficient error path code. + */ +int kbasep_js_devdata_init(struct kbase_device * const kbdev); + +/** + * @brief Halt the Job Scheduler. + * + * It is safe to call this on \a kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must + * be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a Programming Error to call this whilst there are still kbase_context + * structures registered with this scheduler. + * + */ +void kbasep_js_devdata_halt(struct kbase_device *kbdev); + +/** + * @brief Terminate the Job Scheduler + * + * It is safe to call this on \a kbdev even if it the kbasep_js_device_data + * sub-structure was never initialized/failed initialization, to give efficient + * error-path code. + * + * For this to work, the struct kbasep_js_device_data sub-structure of \a kbdev must + * be zero initialized before passing to the kbasep_js_devdata_init() + * function. This is to give efficient error path code. + * + * It is a Programming Error to call this whilst there are still kbase_context + * structures registered with this scheduler. + */ +void kbasep_js_devdata_term(struct kbase_device *kbdev); + +/** + * @brief Initialize the Scheduling Component of a struct kbase_context on the Job Scheduler. + * + * This effectively registers a struct kbase_context with a Job Scheduler. + * + * It does not register any jobs owned by the struct kbase_context with the scheduler. + * Those must be separately registered by kbasep_js_add_job(). + * + * The struct kbase_context must be zero intitialized before passing to the + * kbase_js_init() function. This is to give efficient error path code. + */ +int kbasep_js_kctx_init(struct kbase_context * const kctx); + +/** + * @brief Terminate the Scheduling Component of a struct kbase_context on the Job Scheduler + * + * This effectively de-registers a struct kbase_context from its Job Scheduler + * + * It is safe to call this on a struct kbase_context that has never had or failed + * initialization of its jctx.sched_info member, to give efficient error-path + * code. + * + * For this to work, the struct kbase_context must be zero intitialized before passing + * to the kbase_js_init() function. + * + * It is a Programming Error to call this whilst there are still jobs + * registered with this context. + */ +void kbasep_js_kctx_term(struct kbase_context *kctx); + +/** + * @brief Add a job chain to the Job Scheduler, and take necessary actions to + * schedule the context/run the job. + * + * This atomically does the following: + * - Update the numbers of jobs information + * - Add the job to the run pool if necessary (part of init_job) + * + * Once this is done, then an appropriate action is taken: + * - If the ctx is scheduled, it attempts to start the next job (which might be + * this added job) + * - Otherwise, and if this is the first job on the context, it enqueues it on + * the Policy Queue + * + * The Policy's Queue can be updated by this in the following ways: + * - In the above case that this is the first job on the context + * - If the context is high priority and the context is not scheduled, then it + * could cause the Policy to schedule out a low-priority context, allowing + * this context to be scheduled in. + * + * If the context is already scheduled on the RunPool, then adding a job to it + * is guarenteed not to update the Policy Queue. And so, the caller is + * guarenteed to not need to try scheduling a context from the Run Pool - it + * can safely assert that the result is false. + * + * It is a programming error to have more than U32_MAX jobs in flight at a time. + * + * The following locking conditions are made on the caller: + * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it must \em not hold kbasep_js_device_data::runpool_irq::lock (as this will be + * obtained internally) + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). + * + * @return true indicates that the Policy Queue was updated, and so the + * caller will need to try scheduling a context onto the Run Pool. + * @return false indicates that no updates were made to the Policy Queue, + * so no further action is required from the caller. This is \b always returned + * when the context is currently scheduled. + */ +bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** + * @brief Remove a job chain from the Job Scheduler, except for its 'retained state'. + * + * Completely removing a job requires several calls: + * - kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of + * the atom + * - kbasep_js_remove_job(), to partially remove the atom from the Job Scheduler + * - kbasep_js_runpool_release_ctx_and_katom_retained_state(), to release the + * remaining state held as part of the job having been run. + * + * In the common case of atoms completing normally, this set of actions is more optimal for spinlock purposes than having kbasep_js_remove_job() handle all of the actions. + * + * In the case of cancelling atoms, it is easier to call kbasep_js_remove_cancelled_job(), which handles all the necessary actions. + * + * It is a programming error to call this when: + * - \a atom is not a job belonging to kctx. + * - \a atom has already been removed from the Job Scheduler. + * - \a atom is still in the runpool: + * - it has not been removed with kbasep_js_policy_dequeue_job() + * - or, it has not been removed with kbasep_js_policy_dequeue_job_irq() + * + * Do not use this for removing jobs being killed by kbase_jd_cancel() - use + * kbasep_js_remove_cancelled_job() instead. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * + */ +void kbasep_js_remove_job(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *atom); + +/** + * @brief Completely remove a job chain from the Job Scheduler, in the case + * where the job chain was cancelled. + * + * This is a variant of kbasep_js_remove_job() that takes care of removing all + * of the retained state too. This is generally useful for cancelled atoms, + * which need not be handled in an optimal way. + * + * It is a programming error to call this when: + * - \a atom is not a job belonging to kctx. + * - \a atom has already been removed from the Job Scheduler. + * - \a atom is still in the runpool: + * - it is not being killed with kbasep_jd_cancel() + * - or, it has not been removed with kbasep_js_policy_dequeue_job() + * - or, it has not been removed with kbasep_js_policy_dequeue_job_irq() + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, (as this will be + * obtained internally) + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this could be + * obtained internally) + * + * @return true indicates that ctx attributes have changed and the caller + * should call kbase_js_sched_all() to try to run more jobs + * @return false otherwise + */ +bool kbasep_js_remove_cancelled_job(struct kbase_device *kbdev, + struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * @brief Refcount a context as being busy, preventing it from being scheduled + * out. + * + * @note This function can safely be called from IRQ context. + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. + * + * @return value != false if the retain succeeded, and the context will not be scheduled out. + * @return false if the retain failed (because the context is being/has been scheduled out). + */ +bool kbasep_js_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * @brief Refcount a context as being busy, preventing it from being scheduled + * out. + * + * @note This function can safely be called from IRQ context. + * + * The following locks must be held by the caller: + * - kbasep_js_device_data::runpool_irq::lock + * + * @return value != false if the retain succeeded, and the context will not be scheduled out. + * @return false if the retain failed (because the context is being/has been scheduled out). + */ +bool kbasep_js_runpool_retain_ctx_nolock(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * @brief Lookup a context in the Run Pool based upon its current address space + * and ensure that is stays scheduled in. + * + * The context is refcounted as being busy to prevent it from scheduling + * out. It must be released with kbasep_js_runpool_release_ctx() when it is no + * longer required to stay scheduled in. + * + * @note This function can safely be called from IRQ context. + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because + * it will be used internally. If the runpool_irq::lock is already held, then + * the caller should use kbasep_js_runpool_lookup_ctx_nolock() instead. + * + * @return a valid struct kbase_context on success, which has been refcounted as being busy. + * @return NULL on failure, indicating that no context was found in \a as_nr + */ +struct kbase_context *kbasep_js_runpool_lookup_ctx(struct kbase_device *kbdev, int as_nr); + +/** + * kbasep_js_runpool_lookup_ctx_nolock - Lookup a context in the Run Pool based + * upon its current address space and ensure that is stays scheduled in. + * @kbdev: Device pointer + * @as_nr: Address space to lookup + * + * The context is refcounted as being busy to prevent it from scheduling + * out. It must be released with kbasep_js_runpool_release_ctx() when it is no + * longer required to stay scheduled in. + * + * Note: This function can safely be called from IRQ context. + * + * The following locking conditions are made on the caller: + * - it must the kbasep_js_device_data::runpoool_irq::lock. + * + * Return: a valid struct kbase_context on success, which has been refcounted as + * being busy. + * NULL on failure, indicating that no context was found in \a as_nr + */ +struct kbase_context *kbasep_js_runpool_lookup_ctx_nolock( + struct kbase_device *kbdev, int as_nr); + +/** + * @brief Handling the requeuing/killing of a context that was evicted from the + * policy queue or runpool. + * + * This should be used whenever handing off a context that has been evicted + * from the policy queue or the runpool: + * - If the context is not dying and has jobs, it gets re-added to the policy + * queue + * - Otherwise, it is not added + * + * In addition, if the context is dying the jobs are killed asynchronously. + * + * In all cases, the Power Manager active reference is released + * (kbase_pm_context_idle()) whenever the has_pm_ref parameter is true. \a + * has_pm_ref must be set to false whenever the context was not previously in + * the runpool and does not hold a Power Manager active refcount. Note that + * contexts in a rollback of kbasep_js_try_schedule_head_ctx() might have an + * active refcount even though they weren't in the runpool. + * + * The following locking conditions are made on the caller: + * - it must hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + */ +void kbasep_js_runpool_requeue_or_kill_ctx(struct kbase_device *kbdev, struct kbase_context *kctx, bool has_pm_ref); + +/** + * @brief Release a refcount of a context being busy, allowing it to be + * scheduled out. + * + * When the refcount reaches zero and the context \em might be scheduled out + * (depending on whether the Scheudling Policy has deemed it so, or if it has run + * out of jobs). + * + * If the context does get scheduled out, then The following actions will be + * taken as part of deschduling a context: + * - For the context being descheduled: + * - If the context is in the processing of dying (all the jobs are being + * removed from it), then descheduling also kills off any jobs remaining in the + * context. + * - If the context is not dying, and any jobs remain after descheduling the + * context then it is re-enqueued to the Policy's Queue. + * - Otherwise, the context is still known to the scheduler, but remains absent + * from the Policy Queue until a job is next added to it. + * - In all descheduling cases, the Power Manager active reference (obtained + * during kbasep_js_try_schedule_head_ctx()) is released (kbase_pm_context_idle()). + * + * Whilst the context is being descheduled, this also handles actions that + * cause more atoms to be run: + * - Attempt submitting atoms when the Context Attributes on the Runpool have + * changed. This is because the context being scheduled out could mean that + * there are more opportunities to run atoms. + * - Attempt submitting to a slot that was previously blocked due to affinity + * restrictions. This is usually only necessary when releasing a context + * happens as part of completing a previous job, but is harmless nonetheless. + * - Attempt scheduling in a new context (if one is available), and if necessary, + * running a job from that new context. + * + * Unlike retaining a context in the runpool, this function \b cannot be called + * from IRQ context. + * + * It is a programming error to call this on a \a kctx that is not currently + * scheduled, or that already has a zero refcount. + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. + * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) + * - it must \em not hold kbasep_jd_device_data::queue_mutex (as this will be + * obtained internally) + * + */ +void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * @brief Variant of kbasep_js_runpool_release_ctx() that handles additional + * actions from completing an atom. + * + * This is usually called as part of completing an atom and releasing the + * refcount on the context held by the atom. + * + * Therefore, the extra actions carried out are part of handling actions queued + * on a completed atom, namely: + * - Releasing the atom's context attributes + * - Retrying the submission on a particular slot, because we couldn't submit + * on that slot from an IRQ handler. + * + * The locking conditions of this function are the same as those for + * kbasep_js_runpool_release_ctx() + */ +void kbasep_js_runpool_release_ctx_and_katom_retained_state(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); + + +/** + * @brief Release the refcount of the context and allow further submission + * of the context after the dump on error in user space terminates. + * + * Before this function is called, when a fault happens the kernel should + * have disallowed the context from further submission of jobs and + * retained the context to avoid it from being removed. This function + * releases the refcount of the context and allow further submission of + * jobs. + * + * This function should only be called when "instr=2" during compile time. + */ +void kbasep_js_dump_fault_term(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * @brief Variant of kbase_js_runpool_release_ctx() that assumes that + * kbasep_js_device_data::runpool_mutex and + * kbasep_js_kctx_info::ctx::jsctx_mutex are held by the caller, and does not + * attempt to schedule new contexts. + */ +void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev, + struct kbase_context *kctx); + +/** + * @brief Schedule in a privileged context + * + * This schedules a context in regardless of the context priority. + * If the runpool is full, a context will be forced out of the runpool and the function will wait + * for the new context to be scheduled in. + * The context will be kept scheduled in (and the corresponding address space reserved) until + * kbasep_js_release_privileged_ctx is called). + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) + * - it must \em not hold kbasep_jd_device_data::queue_mutex (again, it's used internally). + * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex, because it will + * be used internally. + * + */ +void kbasep_js_schedule_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * @brief Release a privileged context, allowing it to be scheduled out. + * + * See kbasep_js_runpool_release_ctx for potential side effects. + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpool_irq::lock, because + * it will be used internally. + * - it must \em not hold kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it must \em not hold kbasep_js_device_data::runpool_mutex (as this will be + * obtained internally) + * - it must \em not hold the kbase_device::as[n].transaction_mutex (as this will be obtained internally) + * + */ +void kbasep_js_release_privileged_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * @brief Try to submit the next job on each slot + * + * The following locks may be used: + * - kbasep_js_device_data::runpool_mutex + * - kbasep_js_device_data::runpool_irq::lock + */ +void kbase_js_try_run_jobs(struct kbase_device *kbdev); + +/** + * @brief Suspend the job scheduler during a Power Management Suspend event. + * + * Causes all contexts to be removed from the runpool, and prevents any + * contexts from (re)entering the runpool. + * + * This does not handle suspending the one privileged context: the caller must + * instead do this by by suspending the GPU HW Counter Instrumentation. + * + * This will eventually cause all Power Management active references held by + * contexts on the runpool to be released, without running any more atoms. + * + * The caller must then wait for all Power Mangement active refcount to become + * zero before completing the suspend. + * + * The emptying mechanism may take some time to complete, since it can wait for + * jobs to complete naturally instead of forcing them to end quickly. However, + * this is bounded by the Job Scheduling Policy's Job Timeouts. Hence, this + * function is guaranteed to complete in a finite time whenever the Job + * Scheduling Policy implements Job Timeouts (such as those done by CFS). + */ +void kbasep_js_suspend(struct kbase_device *kbdev); + +/** + * @brief Resume the Job Scheduler after a Power Management Resume event. + * + * This restores the actions from kbasep_js_suspend(): + * - Schedules contexts back into the runpool + * - Resumes running atoms on the GPU + */ +void kbasep_js_resume(struct kbase_device *kbdev); + +/** + * @brief Submit an atom to the job scheduler. + * + * The atom is enqueued on the context's ringbuffer. The caller must have + * ensured that all dependencies can be represented in the ringbuffer. + * + * Caller must hold jctx->lock + * + * @param[in] kctx Context pointer + * @param[in] atom Pointer to the atom to submit + * + * @return 0 if submit succeeded + * error code if the atom can not be submitted at this + * time, due to insufficient space in the ringbuffer, or dependencies + * that can not be represented. + */ +int kbase_js_dep_resolved_submit(struct kbase_context *kctx, + struct kbase_jd_atom *katom, + bool *enqueue_required); + +/** + * @brief Pull an atom from a context in the job scheduler for execution. + * + * The atom will not be removed from the ringbuffer at this stage. + * + * The HW access lock must be held when calling this function. + * + * @param[in] kctx Context to pull from + * @param[in] js Job slot to pull from + * @return Pointer to an atom, or NULL if there are no atoms for this + * slot that can be currently run. + */ +struct kbase_jd_atom *kbase_js_pull(struct kbase_context *kctx, int js); + +/** + * @brief Return an atom to the job scheduler ringbuffer. + * + * An atom is 'unpulled' if execution is stopped but intended to be returned to + * later. The most common reason for this is that the atom has been + * soft-stopped. + * + * Note that if multiple atoms are to be 'unpulled', they must be returned in + * the reverse order to which they were originally pulled. It is a programming + * error to return atoms in any other order. + * + * The HW access lock must be held when calling this function. + * + * @param[in] kctx Context pointer + * @param[in] atom Pointer to the atom to unpull + */ +void kbase_js_unpull(struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * @brief Complete an atom from jd_done_worker(), removing it from the job + * scheduler ringbuffer. + * + * If the atom failed then all dependee atoms marked for failure propagation + * will also fail. + * + * @param[in] kctx Context pointer + * @param[in] katom Pointer to the atom to complete + */ +void kbase_js_complete_atom_wq(struct kbase_context *kctx, + struct kbase_jd_atom *katom); + +/** + * @brief Complete an atom. + * + * Most of the work required to complete an atom will be performed by + * jd_done_worker(). + * + * The HW access lock must be held when calling this function. + * + * @param[in] katom Pointer to the atom to complete + * @param[in] end_timestamp The time that the atom completed (may be NULL) + */ +void kbase_js_complete_atom(struct kbase_jd_atom *katom, + ktime_t *end_timestamp); + +/** + * @brief Submit atoms from all available contexts. + * + * This will attempt to submit as many jobs as possible to the provided job + * slots. It will exit when either all job slots are full, or all contexts have + * been used. + * + * @param[in] kbdev Device pointer + * @param[in] js_mask Mask of job slots to submit to + */ +void kbase_js_sched(struct kbase_device *kbdev, int js_mask); + +/** + * kbase_jd_zap_context - Attempt to deschedule a context that is being + * destroyed + * @kctx: Context pointer + * + * This will attempt to remove a context from any internal job scheduler queues + * and perform any other actions to ensure a context will not be submitted + * from. + * + * If the context is currently scheduled, then the caller must wait for all + * pending jobs to complete before taking any further action. + */ +void kbase_js_zap_context(struct kbase_context *kctx); + +/** + * @brief Validate an atom + * + * This will determine whether the atom can be scheduled onto the GPU. Atoms + * with invalid combinations of core requirements will be rejected. + * + * @param[in] kbdev Device pointer + * @param[in] katom Atom to validate + * @return true if atom is valid + * false otherwise + */ +bool kbase_js_is_atom_valid(struct kbase_device *kbdev, + struct kbase_jd_atom *katom); + +/* + * Helpers follow + */ + +/** + * @brief Check that a context is allowed to submit jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, and wrap up + * the long repeated line of code. + * + * As with any bool, never test the return value with true. + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock. + */ +static inline bool kbasep_js_is_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) +{ + u16 test_bit; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled); + + test_bit = (u16) (1u << kctx->as_nr); + + return (bool) (js_devdata->runpool_irq.submit_allowed & test_bit); +} + +/** + * @brief Allow a context to submit jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, and wrap up + * the long repeated line of code. + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock. + */ +static inline void kbasep_js_set_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) +{ + u16 set_bit; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled); + + set_bit = (u16) (1u << kctx->as_nr); + + dev_dbg(kctx->kbdev->dev, "JS: Setting Submit Allowed on %p (as=%d)", kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed |= set_bit; +} + +/** + * @brief Prevent a context from submitting more jobs on this policy + * + * The purpose of this abstraction is to hide the underlying data size, and wrap up + * the long repeated line of code. + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock. + */ +static inline void kbasep_js_clear_submit_allowed(struct kbasep_js_device_data *js_devdata, struct kbase_context *kctx) +{ + u16 clear_bit; + u16 clear_mask; + + /* Ensure context really is scheduled in */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + KBASE_DEBUG_ASSERT(kctx->jctx.sched_info.ctx.is_scheduled); + + clear_bit = (u16) (1u << kctx->as_nr); + clear_mask = ~clear_bit; + + dev_dbg(kctx->kbdev->dev, "JS: Clearing Submit Allowed on %p (as=%d)", kctx, kctx->as_nr); + + js_devdata->runpool_irq.submit_allowed &= clear_mask; +} + +/** + * @brief Manage the 'retry_submit_on_slot' part of a kbase_jd_atom + */ +static inline void kbasep_js_clear_job_retry_submit(struct kbase_jd_atom *atom) +{ + atom->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID; +} + +/** + * Mark a slot as requiring resubmission by carrying that information on a + * completing atom. + * + * @note This can ASSERT in debug builds if the submit slot has been set to + * something other than the current value for @a js. This is because you might + * be unintentionally stopping more jobs being submitted on the old submit + * slot, and that might cause a scheduling-hang. + * + * @note If you can guarantee that the atoms for the original slot will be + * submitted on some other slot, then call kbasep_js_clear_job_retry_submit() + * first to silence the ASSERT. + */ +static inline void kbasep_js_set_job_retry_submit_slot(struct kbase_jd_atom *atom, int js) +{ + KBASE_DEBUG_ASSERT(0 <= js && js <= BASE_JM_MAX_NR_SLOTS); + KBASE_DEBUG_ASSERT((atom->retry_submit_on_slot == + KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID) + || (atom->retry_submit_on_slot == js)); + + atom->retry_submit_on_slot = js; +} + +/** + * Create an initial 'invalid' atom retained state, that requires no + * atom-related work to be done on releasing with + * kbasep_js_runpool_release_ctx_and_katom_retained_state() + */ +static inline void kbasep_js_atom_retained_state_init_invalid(struct kbasep_js_atom_retained_state *retained_state) +{ + retained_state->event_code = BASE_JD_EVENT_NOT_STARTED; + retained_state->core_req = KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID; + retained_state->retry_submit_on_slot = KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID; +} + +/** + * Copy atom state that can be made available after jd_done_nolock() is called + * on that atom. + */ +static inline void kbasep_js_atom_retained_state_copy(struct kbasep_js_atom_retained_state *retained_state, const struct kbase_jd_atom *katom) +{ + retained_state->event_code = katom->event_code; + retained_state->core_req = katom->core_req; + retained_state->retry_submit_on_slot = katom->retry_submit_on_slot; + retained_state->sched_priority = katom->sched_priority; + retained_state->device_nr = katom->device_nr; +} + +/** + * @brief Determine whether an atom has finished (given its retained state), + * and so should be given back to userspace/removed from the system. + * + * Reasons for an atom not finishing include: + * - Being soft-stopped (and so, the atom should be resubmitted sometime later) + * + * @param[in] katom_retained_state the retained state of the atom to check + * @return false if the atom has not finished + * @return !=false if the atom has finished + */ +static inline bool kbasep_js_has_atom_finished(const struct kbasep_js_atom_retained_state *katom_retained_state) +{ + return (bool) (katom_retained_state->event_code != BASE_JD_EVENT_STOPPED && katom_retained_state->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT); +} + +/** + * @brief Determine whether a struct kbasep_js_atom_retained_state is valid + * + * An invalid struct kbasep_js_atom_retained_state is allowed, and indicates that the + * code should just ignore it. + * + * @param[in] katom_retained_state the atom's retained state to check + * @return false if the retained state is invalid, and can be ignored + * @return !=false if the retained state is valid + */ +static inline bool kbasep_js_atom_retained_state_is_valid(const struct kbasep_js_atom_retained_state *katom_retained_state) +{ + return (bool) (katom_retained_state->core_req != KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID); +} + +static inline bool kbasep_js_get_atom_retry_submit_slot(const struct kbasep_js_atom_retained_state *katom_retained_state, int *res) +{ + int js = katom_retained_state->retry_submit_on_slot; + + *res = js; + return (bool) (js >= 0); +} + +#if KBASE_DEBUG_DISABLE_ASSERTS == 0 +/** + * Debug Check the refcount of a context. Only use within ASSERTs + * + * Obtains kbasep_js_device_data::runpool_irq::lock + * + * @return negative value if the context is not scheduled in + * @return current refcount of the context if it is scheduled in. The refcount + * is not guarenteed to be kept constant. + */ +static inline int kbasep_js_debug_check_ctx_refcount(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + int result = -1; + int as_nr; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + as_nr = kctx->as_nr; + if (as_nr != KBASEP_AS_NR_INVALID) + result = js_devdata->runpool_irq.per_as_data[as_nr].as_busy_refcount; + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return result; +} +#endif /* KBASE_DEBUG_DISABLE_ASSERTS == 0 */ + +/** + * @brief Variant of kbasep_js_runpool_lookup_ctx() that can be used when the + * context is guarenteed to be already previously retained. + * + * It is a programming error to supply the \a as_nr of a context that has not + * been previously retained/has a busy refcount of zero. The only exception is + * when there is no ctx in \a as_nr (NULL returned). + * + * The following locking conditions are made on the caller: + * - it must \em not hold the kbasep_js_device_data::runpoool_irq::lock, because + * it will be used internally. + * + * @return a valid struct kbase_context on success, with a refcount that is guarenteed + * to be non-zero and unmodified by this function. + * @return NULL on failure, indicating that no context was found in \a as_nr + */ +static inline struct kbase_context *kbasep_js_runpool_lookup_ctx_noretain(struct kbase_device *kbdev, int as_nr) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + struct kbase_context *found_kctx; + struct kbasep_js_per_as_data *js_per_as_data; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(0 <= as_nr && as_nr < BASE_MAX_NR_AS); + js_devdata = &kbdev->js_data; + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + + found_kctx = js_per_as_data->kctx; + KBASE_DEBUG_ASSERT(found_kctx == NULL || js_per_as_data->as_busy_refcount > 0); + + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return found_kctx; +} + +/** + * This will provide a conversion from time (us) to ticks of the gpu clock + * based on the minimum available gpu frequency. + * This is usually good to compute best/worst case (where the use of current + * frequency is not valid due to DVFS). + * e.g.: when you need the number of cycles to guarantee you won't wait for + * longer than 'us' time (you might have a shorter wait). + */ +static inline u32 kbasep_js_convert_us_to_gpu_ticks_min_freq(struct kbase_device *kbdev, u32 us) +{ + u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min; + + KBASE_DEBUG_ASSERT(0 != gpu_freq); + return us * (gpu_freq / 1000); +} + +/** + * This will provide a conversion from time (us) to ticks of the gpu clock + * based on the maximum available gpu frequency. + * This is usually good to compute best/worst case (where the use of current + * frequency is not valid due to DVFS). + * e.g.: When you need the number of cycles to guarantee you'll wait at least + * 'us' amount of time (but you might wait longer). + */ +static inline u32 kbasep_js_convert_us_to_gpu_ticks_max_freq(struct kbase_device *kbdev, u32 us) +{ + u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max; + + KBASE_DEBUG_ASSERT(0 != gpu_freq); + return us * (u32) (gpu_freq / 1000); +} + +/** + * This will provide a conversion from ticks of the gpu clock to time (us) + * based on the minimum available gpu frequency. + * This is usually good to compute best/worst case (where the use of current + * frequency is not valid due to DVFS). + * e.g.: When you need to know the worst-case wait that 'ticks' cycles will + * take (you guarantee that you won't wait any longer than this, but it may + * be shorter). + */ +static inline u32 kbasep_js_convert_gpu_ticks_to_us_min_freq(struct kbase_device *kbdev, u32 ticks) +{ + u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_min; + + KBASE_DEBUG_ASSERT(0 != gpu_freq); + return ticks / gpu_freq * 1000; +} + +/** + * This will provide a conversion from ticks of the gpu clock to time (us) + * based on the maximum available gpu frequency. + * This is usually good to compute best/worst case (where the use of current + * frequency is not valid due to DVFS). + * e.g.: When you need to know the best-case wait for 'tick' cycles (you + * guarantee to be waiting for at least this long, but it may be longer). + */ +static inline u32 kbasep_js_convert_gpu_ticks_to_us_max_freq(struct kbase_device *kbdev, u32 ticks) +{ + u32 gpu_freq = kbdev->gpu_props.props.core_props.gpu_freq_khz_max; + + KBASE_DEBUG_ASSERT(0 != gpu_freq); + return ticks / gpu_freq * 1000; +} + +/* + * The following locking conditions are made on the caller: + * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * - The caller must hold the kbasep_js_device_data::runpool_mutex + */ +static inline void kbase_js_runpool_inc_context_count( + struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Track total contexts */ + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running < S8_MAX); + ++(js_devdata->nr_all_contexts_running); + + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { + /* Track contexts that can submit jobs */ + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running < + S8_MAX); + ++(js_devdata->nr_user_contexts_running); + } +} + +/* + * The following locking conditions are made on the caller: + * - The caller must hold the kbasep_js_kctx_info::ctx::jsctx_mutex. + * - The caller must hold the kbasep_js_device_data::runpool_mutex + */ +static inline void kbase_js_runpool_dec_context_count( + struct kbase_device *kbdev, + struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&js_kctx_info->ctx.jsctx_mutex); + lockdep_assert_held(&js_devdata->runpool_mutex); + + /* Track total contexts */ + --(js_devdata->nr_all_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_all_contexts_running >= 0); + + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) == 0) { + /* Track contexts that can submit jobs */ + --(js_devdata->nr_user_contexts_running); + KBASE_DEBUG_ASSERT(js_devdata->nr_user_contexts_running >= 0); + } +} + + +/** + * @brief Submit atoms from all available contexts to all job slots. + * + * This will attempt to submit as many jobs as possible. It will exit when + * either all job slots are full, or all contexts have been used. + * + * @param[in] kbdev Device pointer + */ +static inline void kbase_js_sched_all(struct kbase_device *kbdev) +{ + kbase_js_sched(kbdev, (1 << kbdev->gpu_props.num_job_slots) - 1); +} + +extern const int +kbasep_js_atom_priority_to_relative[BASE_JD_NR_PRIO_LEVELS]; + +extern const base_jd_prio +kbasep_js_relative_priority_to_atom[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; + +/** + * kbasep_js_atom_prio_to_sched_prio(): - Convert atom priority (base_jd_prio) + * to relative ordering + * @atom_prio: Priority ID to translate. + * + * Atom priority values for @ref base_jd_prio cannot be compared directly to + * find out which are higher or lower. + * + * This function will convert base_jd_prio values for successively lower + * priorities into a monotonically increasing sequence. That is, the lower the + * base_jd_prio priority, the higher the value produced by this function. This + * is in accordance with how the rest of the kernel treates priority. + * + * The mapping is 1:1 and the size of the valid input range is the same as the + * size of the valid output range, i.e. + * KBASE_JS_ATOM_SCHED_PRIO_COUNT == BASE_JD_NR_PRIO_LEVELS + * + * Note This must be kept in sync with BASE_JD_PRIO_<...> definitions + * + * Return: On success: a value in the inclusive range + * 0..KBASE_JS_ATOM_SCHED_PRIO_COUNT-1. On failure: + * KBASE_JS_ATOM_SCHED_PRIO_INVALID + */ +static inline int kbasep_js_atom_prio_to_sched_prio(base_jd_prio atom_prio) +{ + if (atom_prio >= BASE_JD_NR_PRIO_LEVELS) + return KBASE_JS_ATOM_SCHED_PRIO_INVALID; + + return kbasep_js_atom_priority_to_relative[atom_prio]; +} + +static inline base_jd_prio kbasep_js_sched_prio_to_atom_prio(int sched_prio) +{ + unsigned int prio_idx; + + KBASE_DEBUG_ASSERT(0 <= sched_prio + && sched_prio < KBASE_JS_ATOM_SCHED_PRIO_COUNT); + + prio_idx = (unsigned int)sched_prio; + + return kbasep_js_relative_priority_to_atom[prio_idx]; +} + + /** @} *//* end group kbase_js */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c new file mode 100644 index 00000000000..49266007935 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.c @@ -0,0 +1,310 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +#include <mali_kbase.h> +#include <mali_kbase_config.h> + +/* + * Private functions follow + */ + +/** + * @brief Check whether a ctx has a certain attribute, and if so, retain that + * attribute on the runpool. + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx is scheduled on the runpool + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false); + + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { + KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] < S8_MAX); + ++(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + + if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 1) { + /* First refcount indicates a state change */ + runpool_state_changed = true; + KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_RUNPOOL, kctx, NULL, 0u, attribute); + } + } + + return runpool_state_changed; +} + +/** + * @brief Check whether a ctx has a certain attribute, and if so, release that + * attribute on the runpool. + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx is scheduled on the runpool + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + js_kctx_info = &kctx->jctx.sched_info; + + BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.is_scheduled != false); + + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, attribute) != false) { + KBASE_DEBUG_ASSERT(js_devdata->runpool_irq.ctx_attr_ref_count[attribute] > 0); + --(js_devdata->runpool_irq.ctx_attr_ref_count[attribute]); + + if (js_devdata->runpool_irq.ctx_attr_ref_count[attribute] == 0) { + /* Last de-refcount indicates a state change */ + runpool_state_changed = true; + KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_RUNPOOL, kctx, NULL, 0u, attribute); + } + } + + return runpool_state_changed; +} + +/** + * @brief Retain a certain attribute on a ctx, also retaining it on the runpool + * if the context is scheduled. + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * @return true indicates a change in ctx attributes state of the runpool. + * This may allow the scheduler to submit more jobs than previously. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] < U32_MAX); + + ++(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + + if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + /* Only ref-count the attribute on the runpool for the first time this contexts sees this attribute */ + KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_ON_CTX, kctx, NULL, 0u, attribute); + runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, attribute); + } + + return runpool_state_changed; +} + +/* + * @brief Release a certain attribute on a ctx, also releasing it from the runpool + * if the context is scheduled. + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * @return true indicates a change in ctx attributes state of the runpool. + * This may allow the scheduler to submit more jobs than previously. + * @return false indicates no change in ctx attributes state of the runpool. + */ +static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + BUG_ON(!mutex_is_locked(&js_kctx_info->ctx.jsctx_mutex)); + KBASE_DEBUG_ASSERT(js_kctx_info->ctx.ctx_attr_ref_count[attribute] > 0); + + if (js_kctx_info->ctx.is_scheduled != false && js_kctx_info->ctx.ctx_attr_ref_count[attribute] == 1) { + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + /* Only de-ref-count the attribute on the runpool when this is the last ctx-reference to it */ + runpool_state_changed = kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, attribute); + KBASE_TRACE_ADD(kbdev, JS_CTX_ATTR_NOW_OFF_CTX, kctx, NULL, 0u, attribute); + } + + /* De-ref must happen afterwards, because kbasep_js_ctx_attr_runpool_release() needs to check it too */ + --(js_kctx_info->ctx.ctx_attr_ref_count[attribute]); + + return runpool_state_changed; +} + +/* + * More commonly used public functions + */ + +void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + struct kbasep_js_kctx_info *js_kctx_info; + bool runpool_state_changed = false; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + js_kctx_info = &kctx->jctx.sched_info; + + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_SUBMIT_DISABLED) != false) { + /* This context never submits, so don't track any scheduling attributes */ + return; + } + + /* Transfer attributes held in the context flags for contexts that have submit enabled */ + + if ((js_kctx_info->ctx.flags & KBASE_CTX_FLAG_HINT_ONLY_COMPUTE) != false) { + /* Compute context */ + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + } + /* NOTE: Whether this is a non-compute context depends on the jobs being + * run, e.g. it might be submitting jobs with BASE_JD_REQ_ONLY_COMPUTE */ + + /* ... More attributes can be added here ... */ + + /* The context should not have been scheduled yet, so ASSERT if this caused + * runpool state changes (note that other threads *can't* affect the value + * of runpool_state_changed, due to how it's calculated) */ + KBASE_DEBUG_ASSERT(runpool_state_changed == false); + CSTD_UNUSED(runpool_state_changed); +} + +void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + bool runpool_state_changed; + int i; + + /* Retain any existing attributes */ + for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { + /* The context is being scheduled in, so update the runpool with the new attributes */ + runpool_state_changed = kbasep_js_ctx_attr_runpool_retain_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + + /* We don't need to know about state changed, because retaining a + * context occurs on scheduling it, and that itself will also try + * to run new atoms */ + CSTD_UNUSED(runpool_state_changed); + } + } +} + +bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + bool runpool_state_changed = false; + int i; + + /* Release any existing attributes */ + for (i = 0; i < KBASEP_JS_CTX_ATTR_COUNT; ++i) { + if (kbasep_js_ctx_attr_is_attr_on_ctx(kctx, (enum kbasep_js_ctx_attr) i) != false) { + /* The context is being scheduled out, so update the runpool on the removed attributes */ + runpool_state_changed |= kbasep_js_ctx_attr_runpool_release_attr(kbdev, kctx, (enum kbasep_js_ctx_attr) i); + } + } + + return runpool_state_changed; +} + +void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + bool runpool_state_changed = false; + base_jd_core_req core_req; + + KBASE_DEBUG_ASSERT(katom); + core_req = katom->core_req; + + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + else + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + + if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { + /* Atom that can run on slot1 or slot2, and can use all cores */ + runpool_state_changed |= kbasep_js_ctx_attr_ctx_retain_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + } + + /* We don't need to know about state changed, because retaining an + * atom occurs on adding it, and that itself will also try to run + * new atoms */ + CSTD_UNUSED(runpool_state_changed); +} + +bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state) +{ + bool runpool_state_changed = false; + base_jd_core_req core_req; + + KBASE_DEBUG_ASSERT(katom_retained_state); + core_req = katom_retained_state->core_req; + + /* No-op for invalid atoms */ + if (kbasep_js_atom_retained_state_is_valid(katom_retained_state) == false) + return false; + + if (core_req & BASE_JD_REQ_ONLY_COMPUTE) + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE); + else + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_NON_COMPUTE); + + if ((core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T)) != 0 && (core_req & (BASE_JD_REQ_COHERENT_GROUP | BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)) == 0) { + /* Atom that can run on slot1 or slot2, and can use all cores */ + runpool_state_changed |= kbasep_js_ctx_attr_ctx_release_attr(kbdev, kctx, KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES); + } + + return runpool_state_changed; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h new file mode 100644 index 00000000000..ce9183326a5 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_ctx_attr.h @@ -0,0 +1,158 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js_ctx_attr.h + * Job Scheduler Context Attribute APIs + */ + +#ifndef _KBASE_JS_CTX_ATTR_H_ +#define _KBASE_JS_CTX_ATTR_H_ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js + * @{ + */ + +/** + * Set the initial attributes of a context (when context create flags are set) + * + * Requires: + * - Hold the jsctx_mutex + */ +void kbasep_js_ctx_attr_set_initial_attrs(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * Retain all attributes of a context + * + * This occurs on scheduling in the context on the runpool (but after + * is_scheduled is set) + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx->is_scheduled is true + */ +void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * Release all attributes of a context + * + * This occurs on scheduling out the context from the runpool (but before + * is_scheduled is cleared) + * + * Requires: + * - jsctx mutex + * - runpool_irq spinlock + * - ctx->is_scheduled is true + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * Retain all attributes of an atom + * + * This occurs on adding an atom to a context + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + */ +void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * Release all attributes of an atom, given its retained state. + * + * This occurs after (permanently) removing an atom from a context + * + * Requires: + * - jsctx mutex + * - If the context is scheduled, then runpool_irq spinlock must also be held + * + * This is a no-op when \a katom_retained_state is invalid. + * + * @return true indicates a change in ctx attributes state of the runpool. + * In this state, the scheduler might be able to submit more jobs than + * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock() + * or similar is called sometime later. + * @return false indicates no change in ctx attributes state of the runpool. + */ +bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state); + +/** + * Requires: + * - runpool_irq spinlock + */ +static inline s8 kbasep_js_ctx_attr_count_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_device_data *js_devdata; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_devdata = &kbdev->js_data; + + return js_devdata->runpool_irq.ctx_attr_ref_count[attribute]; +} + +/** + * Requires: + * - runpool_irq spinlock + */ +static inline bool kbasep_js_ctx_attr_is_attr_on_runpool(struct kbase_device *kbdev, enum kbasep_js_ctx_attr attribute) +{ + /* In general, attributes are 'on' when they have a non-zero refcount (note: the refcount will never be < 0) */ + return (bool) kbasep_js_ctx_attr_count_on_runpool(kbdev, attribute); +} + +/** + * Requires: + * - jsctx mutex + */ +static inline bool kbasep_js_ctx_attr_is_attr_on_ctx(struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute) +{ + struct kbasep_js_kctx_info *js_kctx_info; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(attribute < KBASEP_JS_CTX_ATTR_COUNT); + js_kctx_info = &kctx->jctx.sched_info; + + /* In general, attributes are 'on' when they have a refcount (which should never be < 0) */ + return (bool) (js_kctx_info->ctx.ctx_attr_ref_count[attribute]); +} + + /** @} *//* end group kbase_js */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_defs.h b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h new file mode 100644 index 00000000000..7532f9c9e71 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_defs.h @@ -0,0 +1,517 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js.h + * Job Scheduler Type Definitions + */ + +#ifndef _KBASE_JS_DEFS_H_ +#define _KBASE_JS_DEFS_H_ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js + * @{ + */ +/* Forward decls */ +struct kbase_device; +struct kbase_jd_atom; + + +/* Types used by the policies must go here */ +enum { + /** Context will not submit any jobs */ + KBASE_CTX_FLAG_SUBMIT_DISABLED = (1u << 0), + + /** Set if the context uses an address space and should be kept scheduled in */ + KBASE_CTX_FLAG_PRIVILEGED = (1u << 1), + + /** Kernel-side equivalent of BASE_CONTEXT_HINT_ONLY_COMPUTE. Non-mutable after creation flags set */ + KBASE_CTX_FLAG_HINT_ONLY_COMPUTE = (1u << 2) + + /* NOTE: Add flags for other things, such as 'is scheduled', and 'is dying' */ +}; + +typedef u32 kbase_context_flags; + +struct kbasep_atom_req { + base_jd_core_req core_req; + kbase_context_flags ctx_req; + u32 device_nr; +}; + +#include "mali_kbase_js_policy_cfs.h" + +/* Wrapper Interface - doxygen is elsewhere */ +union kbasep_js_policy { + struct kbasep_js_policy_cfs cfs; +}; + +/* Wrapper Interface - doxygen is elsewhere */ +union kbasep_js_policy_ctx_info { + struct kbasep_js_policy_cfs_ctx cfs; +}; + +/* Wrapper Interface - doxygen is elsewhere */ +union kbasep_js_policy_job_info { + struct kbasep_js_policy_cfs_job cfs; +}; + + +/** Callback function run on all of a context's jobs registered with the Job + * Scheduler */ +typedef void (*kbasep_js_policy_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); + +/** + * @brief Maximum number of jobs that can be submitted to a job slot whilst + * inside the IRQ handler. + * + * This is important because GPU NULL jobs can complete whilst the IRQ handler + * is running. Otherwise, it potentially allows an unlimited number of GPU NULL + * jobs to be submitted inside the IRQ handler, which increases IRQ latency. + */ +#define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 + +/** + * @brief the IRQ_THROTTLE time in microseconds + * + * This will be converted via the GPU's clock frequency into a cycle-count. + * + * @note we can make an estimate of the GPU's frequency by periodically + * sampling its CYCLE_COUNT register + */ +#define KBASE_JS_IRQ_THROTTLE_TIME_US 20 + +/** + * @brief Context attributes + * + * Each context attribute can be thought of as a boolean value that caches some + * state information about either the runpool, or the context: + * - In the case of the runpool, it is a cache of "Do any contexts owned by + * the runpool have attribute X?" + * - In the case of a context, it is a cache of "Do any atoms owned by the + * context have attribute X?" + * + * The boolean value of the context attributes often affect scheduling + * decisions, such as affinities to use and job slots to use. + * + * To accomodate changes of state in the context, each attribute is refcounted + * in the context, and in the runpool for all running contexts. Specifically: + * - The runpool holds a refcount of how many contexts in the runpool have this + * attribute. + * - The context holds a refcount of how many atoms have this attribute. + * + * Examples of use: + * - Finding out when there are a mix of @ref BASE_CONTEXT_HINT_ONLY_COMPUTE + * and ! @ref BASE_CONTEXT_HINT_ONLY_COMPUTE contexts in the runpool + */ +enum kbasep_js_ctx_attr { + /** Attribute indicating a context that contains Compute jobs. That is, + * @ref BASE_CONTEXT_HINT_ONLY_COMPUTE is \b set and/or the context has jobs of type + * @ref BASE_JD_REQ_ONLY_COMPUTE + * + * @note A context can be both 'Compute' and 'Non Compute' if it contains + * both types of jobs. + */ + KBASEP_JS_CTX_ATTR_COMPUTE, + + /** Attribute indicating a context that contains Non-Compute jobs. That is, + * the context has some jobs that are \b not of type @ref + * BASE_JD_REQ_ONLY_COMPUTE. The context usually has + * BASE_CONTEXT_HINT_COMPUTE \b clear, but this depends on the HW + * workarounds in use in the Job Scheduling Policy. + * + * @note A context can be both 'Compute' and 'Non Compute' if it contains + * both types of jobs. + */ + KBASEP_JS_CTX_ATTR_NON_COMPUTE, + + /** Attribute indicating that a context contains compute-job atoms that + * aren't restricted to a coherent group, and can run on all cores. + * + * Specifically, this is when the atom's \a core_req satisfy: + * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 + * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups + * + * Such atoms could be blocked from running if one of the coherent groups + * is being used by another job slot, so tracking this context attribute + * allows us to prevent such situations. + * + * @note This doesn't take into account the 1-coregroup case, where all + * compute atoms would effectively be able to run on 'all cores', but + * contexts will still not always get marked with this attribute. Instead, + * it is the caller's responsibility to take into account the number of + * coregroups when interpreting this attribute. + * + * @note Whilst Tiler atoms are normally combined with + * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without + * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy + * enough to handle anyway. + */ + KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, + + /** Must be the last in the enum */ + KBASEP_JS_CTX_ATTR_COUNT +}; + +enum { + /** Bit indicating that new atom should be started because this atom completed */ + KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), + /** Bit indicating that the atom was evicted from the JS_NEXT registers */ + KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) +}; + +/** Combination of KBASE_JS_ATOM_DONE_<...> bits */ +typedef u32 kbasep_js_atom_done_code; + +/** + * Data used by the scheduler that is unique for each Address Space. + * + * This is used in IRQ context and kbasep_js_device_data::runpoool_irq::lock + * must be held whilst accessing this data (inculding reads and atomic + * decisions based on the read). + */ +struct kbasep_js_per_as_data { + /** + * Ref count of whether this AS is busy, and must not be scheduled out + * + * When jobs are running this is always positive. However, it can still be + * positive when no jobs are running. If all you need is a heuristic to + * tell you whether jobs might be running, this should be sufficient. + */ + int as_busy_refcount; + + /** Pointer to the current context on this address space, or NULL for no context */ + struct kbase_context *kctx; +}; + +/** + * @brief KBase Device Data Job Scheduler sub-structure + * + * This encapsulates the current context of the Job Scheduler on a particular + * device. This context is global to the device, and is not tied to any + * particular struct kbase_context running on the device. + * + * nr_contexts_running and as_free are optimized for packing together (by making + * them smaller types than u32). The operations on them should rarely involve + * masking. The use of signed types for arithmetic indicates to the compiler that + * the value will not rollover (which would be undefined behavior), and so under + * the Total License model, it is free to make optimizations based on that (i.e. + * to remove masking). + */ +struct kbasep_js_device_data { + /** Sub-structure to collect together Job Scheduling data used in IRQ context */ + struct runpool_irq { + /** + * Lock for accessing Job Scheduling data used in IRQ context + * + * This lock must be held whenever this data is accessed (read, or + * write). Even for read-only access, memory barriers would be needed. + * In any case, it is likely that decisions based on only reading must + * also be atomic with respect to data held here and elsewhere in the + * Job Scheduler. + * + * This lock must also be held for accessing: + * - kbase_context::as_nr + * - kbase_device::jm_slots + * - Parts of the kbasep_js_policy, dependent on the policy (refer to + * the policy in question for more information) + * - Parts of kbasep_js_policy_ctx_info, dependent on the policy (refer to + * the policy in question for more information) + */ + spinlock_t lock; + + /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. + * When bit 'N' is set in this, it indicates whether the context bound to address space + * 'N' (per_as_data[N].kctx) is allowed to submit jobs. + * + * It is placed here because it's much more memory efficient than having a u8 in + * struct kbasep_js_per_as_data to store this flag */ + u16 submit_allowed; + + /** Context Attributes: + * Each is large enough to hold a refcount of the number of contexts + * that can fit into the runpool. This is currently BASE_MAX_NR_AS + * + * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store + * the refcount. Hence, it's not worthwhile reducing this to + * bit-manipulation on u32s to save space (where in contrast, 4 bit + * sub-fields would be easy to do and would save space). + * + * Whilst this must not become negative, the sign bit is used for: + * - error detection in debug builds + * - Optimization: it is undefined for a signed int to overflow, and so + * the compiler can optimize for that never happening (thus, no masking + * is required on updating the variable) */ + s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + + /** Data that is unique for each AS */ + struct kbasep_js_per_as_data per_as_data[BASE_MAX_NR_AS]; + + /* + * Affinity management and tracking + */ + /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates + * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */ + u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; + /** Refcount for each core owned by each slot. Used to generate the + * slot_affinities array of bitvectors + * + * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, + * because it is refcounted only when a job is definitely about to be + * submitted to a slot, and is de-refcounted immediately after a job + * finishes */ + s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; + + /* + * true when GPU is put into secure mode + */ + bool secure_mode; + } runpool_irq; + + /** + * Run Pool mutex, for managing contexts within the runpool. + * Unless otherwise specified, you must hold this lock whilst accessing any + * members that follow + * + * In addition, this is used to access: + * - the kbasep_js_kctx_info::runpool substructure + */ + struct mutex runpool_mutex; + + /** + * Queue Lock, used to access the Policy's queue of contexts independently + * of the Run Pool. + * + * Of course, you don't need the Run Pool lock to access this. + */ + struct mutex queue_mutex; + + /** + * Scheduling semaphore. This must be held when calling + * kbase_jm_kick() + */ + struct semaphore schedule_sem; + + /** + * List of contexts that can currently be pulled from + */ + struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS]; + /** + * List of contexts that can not currently be pulled from, but have + * jobs currently running. + */ + struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS]; + + u16 as_free; /**< Bitpattern of free Address Spaces */ + + /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ + s8 nr_user_contexts_running; + /** Number of currently scheduled contexts (including ones that are not submitting jobs) */ + s8 nr_all_contexts_running; + + /** + * Policy-specific information. + * + * Refer to the structure defined by the current policy to determine which + * locks must be held when accessing this. + */ + union kbasep_js_policy policy; + + /** Core Requirements to match up with base_js_atom's core_req memeber + * @note This is a write-once member, and so no locking is required to read */ + base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; + + u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */ + u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */ + u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */ + u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */ + u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */ + u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */ + u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */ + u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */ + u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ + u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ + u32 cfs_ctx_runtime_init_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_INIT_SLICES */ + u32 cfs_ctx_runtime_min_slices; /**< Value for DEFAULT_JS_CFS_CTX_RUNTIME_MIN_SLICES */ + + /** List of suspended soft jobs */ + struct list_head suspended_soft_jobs_list; + +#ifdef CONFIG_MALI_DEBUG + /* Support soft-stop on a single context */ + bool softstop_always; +#endif /* CONFIG_MALI_DEBUG */ + /** The initalized-flag is placed at the end, to avoid cache-pollution (we should + * only be using this during init/term paths). + * @note This is a write-once member, and so no locking is required to read */ + int init_status; + + /* Number of contexts that can currently be pulled from */ + u32 nr_contexts_pullable; + + /* Number of contexts that can either be pulled from or are currently + * running */ + atomic_t nr_contexts_runnable; +}; + +/** + * @brief KBase Context Job Scheduling information structure + * + * This is a substructure in the struct kbase_context that encapsulates all the + * scheduling information. + */ +struct kbasep_js_kctx_info { + /** + * Runpool substructure. This must only be accessed whilst the Run Pool + * mutex ( kbasep_js_device_data::runpool_mutex ) is held. + * + * In addition, the kbasep_js_device_data::runpool_irq::lock may need to be + * held for certain sub-members. + * + * @note some of the members could be moved into struct kbasep_js_device_data for + * improved d-cache/tlb efficiency. + */ + struct { + union kbasep_js_policy_ctx_info policy_ctx; /**< Policy-specific context */ + } runpool; + + /** + * Job Scheduler Context information sub-structure. These members are + * accessed regardless of whether the context is: + * - In the Policy's Run Pool + * - In the Policy's Queue + * - Not queued nor in the Run Pool. + * + * You must obtain the jsctx_mutex before accessing any other members of + * this substructure. + * + * You may not access any of these members from IRQ context. + */ + struct { + struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ + + /** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in + * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr + * for such jobs*/ + u32 nr_jobs; + + /** Context Attributes: + * Each is large enough to hold a refcount of the number of atoms on + * the context. **/ + u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; + + kbase_context_flags flags; + /* NOTE: Unify the following flags into kbase_context_flags */ + /** + * Is the context scheduled on the Run Pool? + * + * This is only ever updated whilst the jsctx_mutex is held. + */ + bool is_scheduled; + /** + * Wait queue to wait for is_scheduled state changes. + * */ + wait_queue_head_t is_scheduled_wait; + + bool is_dying; /**< Is the context in the process of being evicted? */ + + /** Link implementing JS queues. Context can be present on one + * list per job slot + */ + struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; + + atomic_t fault_count; /**< The no. of times the context is retained due to the fault job. */ + } ctx; + + /* The initalized-flag is placed at the end, to avoid cache-pollution (we should + * only be using this during init/term paths) */ + int init_status; +}; + +/** Subset of atom state that can be available after jd_done_nolock() is called + * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), + * because the original atom could disappear. */ +struct kbasep_js_atom_retained_state { + /** Event code - to determine whether the atom has finished */ + enum base_jd_event_code event_code; + /** core requirements */ + base_jd_core_req core_req; + /* priority */ + int sched_priority; + /** Job Slot to retry submitting to if submission from IRQ handler failed */ + int retry_submit_on_slot; + /* Core group atom was executed on */ + u32 device_nr; + +}; + +/** + * Value signifying 'no retry on a slot required' for: + * - kbase_js_atom_retained_state::retry_submit_on_slot + * - kbase_jd_atom::retry_submit_on_slot + */ +#define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) + +/** + * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state. + * + * @see kbase_atom_retained_state_is_valid() + */ +#define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP + +/** + * @brief The JS timer resolution, in microseconds + * + * Any non-zero difference in time will be at least this size. + */ +#define KBASEP_JS_TICK_RESOLUTION_US 1 + +/* + * Internal atom priority defines for kbase_jd_atom::sched_prio + */ +enum { + KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, + KBASE_JS_ATOM_SCHED_PRIO_MED, + KBASE_JS_ATOM_SCHED_PRIO_LOW, + KBASE_JS_ATOM_SCHED_PRIO_COUNT, +}; + +/* Invalid priority for kbase_jd_atom::sched_prio */ +#define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 + +/* Default priority in the case of contexts with no atoms, or being lenient + * about invalid priorities from userspace */ +#define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED + + /** @} *//* end group kbase_js */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_DEFS_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h new file mode 100644 index 00000000000..2094586ff2d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy.h @@ -0,0 +1,763 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_js_policy.h + * Job Scheduler Policy APIs. + */ + +#ifndef _KBASE_JS_POLICY_H_ +#define _KBASE_JS_POLICY_H_ + +/** + * @page page_kbase_js_policy Job Scheduling Policies + * The Job Scheduling system is described in the following: + * - @subpage page_kbase_js_policy_overview + * - @subpage page_kbase_js_policy_operation + * + * The API details are as follows: + * - @ref kbase_jm + * - @ref kbase_js + * - @ref kbase_js_policy + */ + +/** + * @page page_kbase_js_policy_overview Overview of the Policy System + * + * The Job Scheduler Policy manages: + * - The assigning of KBase Contexts to GPU Address Spaces (\em ASs) + * - The choosing of Job Chains (\em Jobs) from a KBase context, to run on the + * GPU's Job Slots (\em JSs). + * - The amount of \em time a context is assigned to (<em>scheduled on</em>) an + * Address Space + * - The amount of \em time a Job spends running on the GPU + * + * The Policy implements this management via 2 components: + * - A Policy Queue, which manages a set of contexts that are ready to run, + * but not currently running. + * - A Policy Run Pool, which manages the currently running contexts (one per Address + * Space) and the jobs to run on the Job Slots. + * + * Each Graphics Process in the system has at least one KBase Context. Therefore, + * the Policy Queue can be seen as a queue of Processes waiting to run Jobs on + * the GPU. + * + * <!-- The following needs to be all on one line, due to doxygen's parser --> + * @dotfile policy_overview.dot "Diagram showing a very simplified overview of the Policy System. IRQ handling, soft/hard-stopping, contexts re-entering the system and Policy details are omitted" + * + * The main operations on the queue are: + * - Enqueuing a Context to it + * - Dequeuing a Context from it, to run it. + * - Note: requeuing a context is much the same as enqueuing a context, but + * occurs when a context is scheduled out of the system to allow other contexts + * to run. + * + * These operations have much the same meaning for the Run Pool - Jobs are + * dequeued to run on a Jobslot, and requeued when they are scheduled out of + * the GPU. + * + * @note This is an over-simplification of the Policy APIs - there are more + * operations than 'Enqueue'/'Dequeue', and a Dequeue from the Policy Queue + * takes at least two function calls: one to Dequeue from the Queue, one to add + * to the Run Pool. + * + * As indicated on the diagram, Jobs permanently leave the scheduling system + * when they are completed, otherwise they get dequeued/requeued until this + * happens. Similarly, Contexts leave the scheduling system when their jobs + * have all completed. However, Contexts may later return to the scheduling + * system (not shown on the diagram) if more Bags of Jobs are submitted to + * them. + */ + +/** + * @page page_kbase_js_policy_operation Policy Operation + * + * We describe the actions that the Job Scheduler Core takes on the Policy in + * the following cases: + * - The IRQ Path + * - The Job Submission Path + * - The High Priority Job Submission Path + * + * This shows how the Policy APIs will be used by the Job Scheduler core. + * + * The following diagram shows an example Policy that contains a Low Priority + * queue, and a Real-time (High Priority) Queue. The RT queue is examined + * before the LowP one on dequeuing from the head. The Low Priority Queue is + * ordered by time, and the RT queue is ordered by time weighted by + * RT-priority. In addition, it shows that the Job Scheduler Core will start a + * Soft-Stop Timer (SS-Timer) when it dequeue's and submits a job. The + * Soft-Stop time is set by a global configuration value, and must be a value + * appropriate for the policy. For example, this could include "don't run a + * soft-stop timer" for a First-Come-First-Served (FCFS) policy. + * + * <!-- The following needs to be all on one line, due to doxygen's parser --> + * @dotfile policy_operation_diagram.dot "Diagram showing the objects managed by an Example Policy, and the operations made upon these objects by the Job Scheduler Core." + * + * @section sec_kbase_js_policy_operation_prio Dealing with Priority + * + * Priority applies separately to a context as a whole, and to the jobs within + * a context. The jobs specify a priority in the base_jd_atom::prio member, but + * it is independent of the context priority. That is, it only affects + * scheduling of atoms within a context. Refer to @ref base_jd_prio for more + * details. The meaning of the context's priority value is up to the policy + * itself, and could be a logarithmic scale instead of a linear scale (e.g. the + * policy could implement an increase/decrease in priority by 1 results in an + * increase/decrease in \em proportion of time spent scheduled in by 25%, an + * effective change in timeslice by 11%). + * + * It is up to the policy whether a boost in priority boosts the priority of + * the entire context (e.g. to such an extent where it may pre-empt other + * running contexts). If it chooses to do this, the Policy must make sure that + * only jobs from high-priority contexts are run, and that the context is + * scheduled out once only jobs from low priority contexts remain. This ensures + * that the low priority contexts do not gain from the priority boost, yet they + * still get scheduled correctly with respect to other low priority contexts. + * + * + * @section sec_kbase_js_policy_operation_irq IRQ Path + * + * The following happens on the IRQ path from the Job Scheduler Core: + * - Note the slot that completed (for later) + * - Log the time spent by the job (and implicitly, the time spent by the + * context) + * - call kbasep_js_policy_log_job_result() <em>in the context of the irq + * handler.</em> + * - This must happen regardless of whether the job completed successfully or + * not (otherwise the context gets away with DoS'ing the system with faulty jobs) + * - What was the result of the job? + * - If Completed: job is just removed from the system + * - If Hard-stop or failure: job is removed from the system + * - If Soft-stop: queue the book-keeping work onto a work-queue: have a + * work-queue call kbasep_js_policy_enqueue_job() + * - Check the timeslice used by the owning context + * - call kbasep_js_policy_should_remove_ctx() <em>in the context of the irq + * handler.</em> + * - If this returns true, clear the "allowed" flag. + * - Check the ctx's flags for "allowed", "has jobs to run" and "is running + * jobs" + * - And so, should the context stay scheduled in? + * - If No, push onto a work-queue the work of scheduling out the old context, + * and getting a new one. That is: + * - kbasep_js_policy_runpool_remove_ctx() on old_ctx + * - kbasep_js_policy_enqueue_ctx() on old_ctx + * - kbasep_js_policy_dequeue_head_ctx() to get new_ctx + * - kbasep_js_policy_runpool_add_ctx() on new_ctx + * - (all of this work is deferred on a work-queue to keep the IRQ handler quick) + * - If there is space in the completed job slots' HEAD/NEXT registers, run the next job: + * - kbasep_js_policy_dequeue_job() <em>in the context of the irq + * handler</em> with core_req set to that of the completing slot + * - if this returned true, submit the job to the completed slot. + * - This is repeated until kbasep_js_policy_dequeue_job() returns + * false, or the job slot has a job queued on both the HEAD and NEXT registers. + * - If kbasep_js_policy_dequeue_job() returned false, submit some work to + * the work-queue to retry from outside of IRQ context (calling + * kbasep_js_policy_dequeue_job() from a work-queue). + * + * Since the IRQ handler submits new jobs \em and re-checks the IRQ_RAWSTAT, + * this sequence could loop a large number of times: this could happen if + * the jobs submitted completed on the GPU very quickly (in a few cycles), such + * as GPU NULL jobs. Then, the HEAD/NEXT registers will always be free to take + * more jobs, causing us to loop until we run out of jobs. + * + * To mitigate this, we must limit the number of jobs submitted per slot during + * the IRQ handler - for example, no more than 2 jobs per slot per IRQ should + * be sufficient (to fill up the HEAD + NEXT registers in normal cases). For + * Mali-T600 with 3 job slots, this means that up to 6 jobs could be submitted per + * slot. Note that IRQ Throttling can make this situation commonplace: 6 jobs + * could complete but the IRQ for each of them is delayed by the throttling. By + * the time you get the IRQ, all 6 jobs could've completed, meaning you can + * submit jobs to fill all 6 HEAD+NEXT registers again. + * + * @note As much work is deferred as possible, which includes the scheduling + * out of a context and scheduling in a new context. However, we can still make + * starting a single high-priorty context quick despite this: + * - On Mali-T600 family, there is one more AS than JSs. + * - This means we can very quickly schedule out one AS, no matter what the + * situation (because there will always be one AS that's not currently running + * on the job slot - it can only have a job in the NEXT register). + * - Even with this scheduling out, fair-share can still be guaranteed e.g. by + * a timeline-based Completely Fair Scheduler. + * - When our high-priority context comes in, we can do this quick-scheduling + * out immediately, and then schedule in the high-priority context without having to block. + * - This all assumes that the context to schedule out is of lower + * priority. Otherwise, we will have to block waiting for some other low + * priority context to finish its jobs. Note that it's likely (but not + * impossible) that the high-priority context \b is running jobs, by virtue of + * it being high priority. + * - Therefore, we can give a high liklihood that on Mali-T600 at least one + * high-priority context can be started very quickly. For the general case, we + * can guarantee starting (no. ASs) - (no. JSs) high priority contexts + * quickly. In any case, there is a high likelihood that we're able to start + * more than one high priority context quickly. + * + * In terms of the functions used in the IRQ handler directly, these are the + * perfomance considerations: + * - kbase_js_policy_log_job_result(): + * - This is just adding to a 64-bit value (possibly even a 32-bit value if we + * only store the time the job's recently spent - see below on 'priority weighting') + * - For priority weighting, a divide operation ('div') could happen, but + * this can happen in a deferred context (outside of IRQ) when scheduling out + * the ctx; as per our Engineering Specification, the contexts of different + * priority still stay scheduled in for the same timeslice, but higher priority + * ones scheduled back in more often. + * - That is, the weighted and unweighted times must be stored separately, and + * the weighted time is only updated \em outside of IRQ context. + * - Of course, this divide is more likely to be a 'multiply by inverse of the + * weight', assuming that the weight (priority) doesn't change. + * - kbasep_js_policy_should_remove_ctx(): + * - This is usually just a comparison of the stored time value against some + * maximum value. + * + * @note all deferred work can be wrapped up into one call - we usually need to + * indicate that a job/bag is done outside of IRQ context anyway. + * + * + * + * @section sec_kbase_js_policy_operation_submit Submission path + * + * Start with a Context with no jobs present, and assume equal priority of all + * contexts in the system. The following work all happens outside of IRQ + * Context : + * - As soon as job is made 'ready to 'run', then is must be registerd with the Job + * Scheduler Policy: + * - 'Ready to run' means they've satisified their dependencies in the + * Kernel-side Job Dispatch system. + * - Call kbasep_js_policy_enqueue_job() + * - This indicates that the job should be scheduled (it is ready to run). + * - As soon as a ctx changes from having 0 jobs 'ready to run' to >0 jobs + * 'ready to run', we enqueue the context on the policy queue: + * - Call kbasep_js_policy_enqueue_ctx() + * - This indicates that the \em ctx should be scheduled (it is ready to run) + * + * Next, we need to handle adding a context to the Run Pool - if it's sensible + * to do so. This can happen due to two reasons: + * -# A context is enqueued as above, and there are ASs free for it to run on + * (e.g. it is the first context to be run, in which case it can be added to + * the Run Pool immediately after enqueuing on the Policy Queue) + * -# A previous IRQ caused another ctx to be scheduled out, requiring that the + * context at the head of the queue be scheduled in. Such steps would happen in + * a work queue (work deferred from the IRQ context). + * + * In both cases, we'd handle it as follows: + * - Get the context at the Head of the Policy Queue: + * - Call kbasep_js_policy_dequeue_head_ctx() + * - Assign the Context an Address Space (Assert that there will be one free, + * given the above two reasons) + * - Add this context to the Run Pool: + * - Call kbasep_js_policy_runpool_add_ctx() + * - Now see if a job should be run: + * - Mostly, this will be done in the IRQ handler at the completion of a + * previous job. + * - However, there are two cases where this cannot be done: a) The first job + * enqueued to the system (there is no previous IRQ to act upon) b) When jobs + * are submitted at a low enough rate to not fill up all Job Slots (or, not to + * fill both the 'HEAD' and 'NEXT' registers in the job-slots) + * - Hence, on each ctx <b>and job</b> submission we should try to see if we + * can run a job: + * - For each job slot that has free space (in NEXT or HEAD+NEXT registers): + * - Call kbasep_js_policy_dequeue_job() with core_req set to that of the + * slot + * - if we got one, submit it to the job slot. + * - This is repeated until kbasep_js_policy_dequeue_job() returns + * false, or the job slot has a job queued on both the HEAD and NEXT registers. + * + * The above case shows that we should attempt to run jobs in cases where a) a ctx + * has been added to the Run Pool, and b) new jobs have been added to a context + * in the Run Pool: + * - In the latter case, the context is in the runpool because it's got a job + * ready to run, or is already running a job + * - We could just wait until the IRQ handler fires, but for certain types of + * jobs this can take comparatively a long time to complete, e.g. GLES FS jobs + * generally take much longer to run that GLES CS jobs, which are vertex shader + * jobs. + * - Therefore, when a new job appears in the ctx, we must check the job-slots + * to see if they're free, and run the jobs as before. + * + * + * + * @section sec_kbase_js_policy_operation_submit_hipri Submission path for High Priority Contexts + * + * For High Priority Contexts on Mali-T600, we can make sure that at least 1 of + * them can be scheduled in immediately to start high prioriy jobs. In general, + * (no. ASs) - (no JSs) high priority contexts may be started immediately. The + * following describes how this happens: + * + * Similar to the previous section, consider what happens with a high-priority + * context (a context with a priority higher than that of any in the Run Pool) + * that starts out with no jobs: + * - A job becomes ready to run on the context, and so we enqueue the context + * on the Policy's Queue. + * - However, we'd like to schedule in this context immediately, instead of + * waiting for one of the Run Pool contexts' timeslice to expire + * - The policy's Enqueue function must detect this (because it is the policy + * that embodies the concept of priority), and take appropriate action + * - That is, kbasep_js_policy_enqueue_ctx() should check the Policy's Run + * Pool to see if a lower priority context should be scheduled out, and then + * schedule in the High Priority context. + * - For Mali-T600, we can always pick a context to schedule out immediately + * (because there are more ASs than JSs), and so scheduling out a victim context + * and scheduling in the high priority context can happen immediately. + * - If a policy implements fair-sharing, then this can still ensure the + * victim later on gets a fair share of the GPU. + * - As a note, consider whether the victim can be of equal/higher priority + * than the incoming context: + * - Usually, higher priority contexts will be the ones currently running + * jobs, and so the context with the lowest priority is usually not running + * jobs. + * - This makes it likely that the victim context is low priority, but + * it's not impossible for it to be a high priority one: + * - Suppose 3 high priority contexts are submitting only FS jobs, and one low + * priority context submitting CS jobs. Then, the context not running jobs will + * be one of the hi priority contexts (because only 2 FS jobs can be + * queued/running on the GPU HW for Mali-T600). + * - The problem can be mitigated by extra action, but it's questionable + * whether we need to: we already have a high likelihood that there's at least + * one high priority context - that should be good enough. + * - And so, this method makes sure that at least one high priority context + * can be started very quickly, but more than one high priority contexts could be + * delayed (up to one timeslice). + * - To improve this, use a GPU with a higher number of Address Spaces vs Job + * Slots. + * - At this point, let's assume this high priority context has been scheduled + * in immediately. The next step is to ensure it can start some jobs quickly. + * - It must do this by Soft-Stopping jobs on any of the Job Slots that it can + * submit to. + * - The rest of the logic for starting the jobs is taken care of by the IRQ + * handler. All the policy needs to do is ensure that + * kbasep_js_policy_dequeue_job() will return the jobs from the high priority + * context. + * + * @note in SS state, we currently only use 2 job-slots (even for T608, but + * this might change in future). In this case, it's always possible to schedule + * out 2 ASs quickly (their jobs won't be in the HEAD registers). At the same + * time, this maximizes usage of the job-slots (only 2 are in use), because you + * can guarantee starting of the jobs from the High Priority contexts immediately too. + * + * + * + * @section sec_kbase_js_policy_operation_notes Notes + * + * - In this design, a separate 'init' is needed from dequeue/requeue, so that + * information can be retained between the dequeue/requeue calls. For example, + * the total time spent for a context/job could be logged between + * dequeue/requeuing, to implement Fair Sharing. In this case, 'init' just + * initializes that information to some known state. + * + * + * + */ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup kbase_js_policy Job Scheduler Policy APIs + * @{ + * + * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of + * the Job Scheduler Policy and its use from the Job Scheduler Core</b>. + */ + +/** + * @brief Job Scheduler Policy structure + */ +union kbasep_js_policy; + +/** + * @brief Initialize the Job Scheduler Policy + */ +int kbasep_js_policy_init(struct kbase_device *kbdev); + +/** + * @brief Terminate the Job Scheduler Policy + */ +void kbasep_js_policy_term(union kbasep_js_policy *js_policy); + +/** + * @addtogroup kbase_js_policy_ctx Job Scheduler Policy, Context Management API + * @{ + * + * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of + * the Job Scheduler Policy and its use from the Job Scheduler Core</b>. + */ + +/** + * @brief Job Scheduler Policy Ctx Info structure + * + * This structure is embedded in the struct kbase_context structure. It is used to: + * - track information needed for the policy to schedule the context (e.g. time + * used, OS priority etc.) + * - link together kbase_contexts into a queue, so that a struct kbase_context can be + * obtained as the container of the policy ctx info. This allows the API to + * return what "the next context" should be. + * - obtain other information already stored in the struct kbase_context for + * scheduling purposes (e.g process ID to get the priority of the originating + * process) + */ +union kbasep_js_policy_ctx_info; + +/** + * @brief Initialize a ctx for use with the Job Scheduler Policy + * + * This effectively initializes the union kbasep_js_policy_ctx_info structure within + * the struct kbase_context (itself located within the kctx->jctx.sched_info structure). + */ +int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx); + +/** + * @brief Terminate resources associated with using a ctx in the Job Scheduler + * Policy. + */ +void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx); + +/** + * @brief Enqueue a context onto the Job Scheduler Policy Queue + * + * If the context enqueued has a priority higher than any in the Run Pool, then + * it is the Policy's responsibility to decide whether to schedule out a low + * priority context from the Run Pool to allow the high priority context to be + * scheduled in. + * + * If the context has the privileged flag set, it will always be kept at the + * head of the queue. + * + * The caller will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + * The caller will be holding kbasep_js_device_data::queue_mutex. + */ +void kbasep_js_policy_enqueue_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx); + +/** + * @brief Dequeue a context from the Head of the Job Scheduler Policy Queue + * + * The caller will be holding kbasep_js_device_data::queue_mutex. + * + * @return true if a context was available, and *kctx_ptr points to + * the kctx dequeued. + * @return false if no contexts were available. + */ +bool kbasep_js_policy_dequeue_head_ctx(union kbasep_js_policy *js_policy, struct kbase_context ** const kctx_ptr); + +/** + * @brief Evict a context from the Job Scheduler Policy Queue + * + * This is only called as part of destroying a kbase_context. + * + * There are many reasons why this might fail during the lifetime of a + * context. For example, the context is in the process of being scheduled. In + * that case a thread doing the scheduling might have a pointer to it, but the + * context is neither in the Policy Queue, nor is it in the Run + * Pool. Crucially, neither the Policy Queue, Run Pool, or the Context itself + * are locked. + * + * Hence to find out where in the system the context is, it is important to do + * more than just check the kbasep_js_kctx_info::ctx::is_scheduled member. + * + * The caller will be holding kbasep_js_device_data::queue_mutex. + * + * @return true if the context was evicted from the Policy Queue + * @return false if the context was not found in the Policy Queue + */ +bool kbasep_js_policy_try_evict_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx); + +/** + * @brief Call a function on all jobs belonging to a non-queued, non-running + * context, optionally detaching the jobs from the context as it goes. + * + * At the time of the call, the context is guarenteed to be not-currently + * scheduled on the Run Pool (is_scheduled == false), and not present in + * the Policy Queue. This is because one of the following functions was used + * recently on the context: + * - kbasep_js_policy_evict_ctx() + * - kbasep_js_policy_runpool_remove_ctx() + * + * In both cases, no subsequent call was made on the context to any of: + * - kbasep_js_policy_runpool_add_ctx() + * - kbasep_js_policy_enqueue_ctx() + * + * Due to the locks that might be held at the time of the call, the callback + * may need to defer work on a workqueue to complete its actions (e.g. when + * cancelling jobs) + * + * \a detach_jobs must only be set when cancelling jobs (which occurs as part + * of context destruction). + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + */ +void kbasep_js_policy_foreach_ctx_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, + kbasep_js_policy_ctx_job_cb callback, bool detach_jobs); + +/** + * @brief Add a context to the Job Scheduler Policy's Run Pool + * + * If the context enqueued has a priority higher than any in the Run Pool, then + * it is the Policy's responsibility to decide whether to schedule out low + * priority jobs that are currently running on the GPU. + * + * The number of contexts present in the Run Pool will never be more than the + * number of Address Spaces. + * + * The following guarentees are made about the state of the system when this + * is called: + * - kctx->as_nr member is valid + * - the context has its submit_allowed flag set + * - kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is valid + * - The refcount of the context is guarenteed to be zero. + * - kbasep_js_kctx_info::ctx::is_scheduled will be true. + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it will be holding kbasep_js_device_data::runpool_mutex. + * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock) + * + * Due to a spinlock being held, this function must not call any APIs that sleep. + */ +void kbasep_js_policy_runpool_add_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx); + +/** + * @brief Remove a context from the Job Scheduler Policy's Run Pool + * + * The kctx->as_nr member is valid and the context has its submit_allowed flag + * set when this is called. The state of + * kbasep_js_device_data::runpool_irq::per_as_data[kctx->as_nr] is also + * valid. The refcount of the context is guarenteed to be zero. + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it will be holding kbasep_js_device_data::runpool_mutex. + * - it will be holding kbasep_js_device_data::runpool_irq::lock (a spinlock) + * + * Due to a spinlock being held, this function must not call any APIs that sleep. + */ +void kbasep_js_policy_runpool_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx); + +/** + * @brief Indicate whether a context should be removed from the Run Pool + * (should be scheduled out). + * + * The kbasep_js_device_data::runpool_irq::lock will be held by the caller. + * + * @note This API is called from IRQ context. + */ +bool kbasep_js_policy_should_remove_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx); + +/** + * @brief Synchronize with any timers acting upon the runpool + * + * The policy should check whether any timers it owns should be running. If + * they should not, the policy must cancel such timers and ensure they are not + * re-run by the time this function finishes. + * + * In particular, the timers must not be running when there are no more contexts + * on the runpool, because the GPU could be powered off soon after this call. + * + * The locking conditions on the caller are as follows: + * - it will be holding kbasep_js_kctx_info::ctx::jsctx_mutex. + * - it will be holding kbasep_js_device_data::runpool_mutex. + */ +void kbasep_js_policy_runpool_timers_sync(union kbasep_js_policy *js_policy); + + +/** + * @brief Indicate whether a new context has an higher priority than the current context. + * + * + * The caller has the following conditions on locking: + * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held for \a new_ctx + * + * This function must not sleep, because an IRQ spinlock might be held whilst + * this is called. + * + * @note There is nothing to stop the priority of \a current_ctx changing + * during or immediately after this function is called (because its jsctx_mutex + * cannot be held). Therefore, this function should only be seen as a heuristic + * guide as to whether \a new_ctx is higher priority than \a current_ctx + */ +bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx); + + /** @} *//* end group kbase_js_policy_ctx */ + +/** + * @addtogroup kbase_js_policy_job Job Scheduler Policy, Job Chain Management API + * @{ + * + * <b>Refer to @ref page_kbase_js_policy for an overview and detailed operation of + * the Job Scheduler Policy and its use from the Job Scheduler Core</b>. + */ + +/** + * @brief Job Scheduler Policy Job Info structure + * + * This structure is embedded in the struct kbase_jd_atom structure. It is used to: + * - track information needed for the policy to schedule the job (e.g. time + * used, etc.) + * - link together jobs into a queue/buffer, so that a struct kbase_jd_atom can be + * obtained as the container of the policy job info. This allows the API to + * return what "the next job" should be. + */ +union kbasep_js_policy_job_info; + +/** + * @brief Initialize a job for use with the Job Scheduler Policy + * + * This function initializes the union kbasep_js_policy_job_info structure within the + * kbase_jd_atom. It will only initialize/allocate resources that are specific + * to the job. + * + * That is, this function makes \b no attempt to: + * - initialize any context/policy-wide information + * - enqueue the job on the policy. + * + * At some later point, the following functions must be called on the job, in this order: + * - kbasep_js_policy_register_job() to register the job and initialize policy/context wide data. + * - kbasep_js_policy_enqueue_job() to enqueue the job + * + * A job must only ever be initialized on the Policy once, and must be + * terminated on the Policy before the job is freed. + * + * The caller will not be holding any locks, and so this function will not + * modify any information in \a kctx or \a js_policy. + * + * @return 0 if initialization was correct. + */ +int kbasep_js_policy_init_job(const union kbasep_js_policy *js_policy, const struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * @brief Register context/policy-wide information for a job on the Job Scheduler Policy. + * + * Registers the job with the policy. This is used to track the job before it + * has been enqueued/requeued by kbasep_js_policy_enqueue_job(). Specifically, + * it is used to update information under a lock that could not be updated at + * kbasep_js_policy_init_job() time (such as context/policy-wide data). + * + * @note This function will not fail, and hence does not allocate any + * resources. Any failures that could occur on registration will be caught + * during kbasep_js_policy_init_job() instead. + * + * A job must only ever be registerd on the Policy once, and must be + * deregistered on the Policy on completion (whether or not that completion was + * success/failure). + * + * The caller has the following conditions on locking: + * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held. + */ +void kbasep_js_policy_register_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * @brief De-register context/policy-wide information for a on the Job Scheduler Policy. + * + * This must be used before terminating the resources associated with using a + * job in the Job Scheduler Policy. This function does not itself terminate any + * resources, at most it just updates information in the policy and context. + * + * The caller has the following conditions on locking: + * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held. + */ +void kbasep_js_policy_deregister_job(union kbasep_js_policy *js_policy, struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** + * @brief Dequeue a Job for a job slot from the Job Scheduler Policy Run Pool + * + * The job returned by the policy will match at least one of the bits in the + * job slot's core requirements (but it may match more than one, or all @ref + * base_jd_core_req bits supported by the job slot). + * + * In addition, the requirements of the job returned will be a subset of those + * requested - the job returned will not have requirements that \a job_slot_idx + * cannot satisfy. + * + * The caller will submit the job to the GPU as soon as the GPU's NEXT register + * for the corresponding slot is empty. Of course, the GPU will then only run + * this new job when the currently executing job (in the jobslot's HEAD + * register) has completed. + * + * @return true if a job was available, and *kctx_ptr points to + * the kctx dequeued. + * @return false if no jobs were available among all ctxs in the Run Pool. + * + * @note base_jd_core_req is currently a u8 - beware of type conversion. + * + * The caller has the following conditions on locking: + * - kbasep_js_device_data::runpool_lock::irq will be held. + * - kbasep_js_device_data::runpool_mutex will be held. + * - kbasep_js_kctx_info::ctx::jsctx_mutex. will be held + */ +bool kbasep_js_policy_dequeue_job(struct kbase_device *kbdev, int job_slot_idx, struct kbase_jd_atom ** const katom_ptr); + +/** + * @brief Requeue a Job back into the the Job Scheduler Policy Run Pool + * + * This will be used to enqueue a job after its creation and also to requeue + * a job into the Run Pool that was previously dequeued (running). It notifies + * the policy that the job should be run again at some point later. + * + * The caller has the following conditions on locking: + * - kbasep_js_device_data::runpool_irq::lock (a spinlock) will be held. + * - kbasep_js_device_data::runpool_mutex will be held. + * - kbasep_js_kctx_info::ctx::jsctx_mutex will be held. + */ +void kbasep_js_policy_enqueue_job(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom); + +/** + * @brief Log the result of a job: the time spent on a job/context, and whether + * the job failed or not. + * + * Since a struct kbase_jd_atom contains a pointer to the struct kbase_context owning it, + * then this can also be used to log time on either/both the job and the + * containing context. + * + * The completion state of the job can be found by examining \a katom->event.event_code + * + * If the Job failed and the policy is implementing fair-sharing, then the + * policy must penalize the failing job/context: + * - At the very least, it should penalize the time taken by the amount of + * time spent processing the IRQ in SW. This because a job in the NEXT slot + * waiting to run will be delayed until the failing job has had the IRQ + * cleared. + * - \b Optionally, the policy could apply other penalties. For example, based + * on a threshold of a number of failing jobs, after which a large penalty is + * applied. + * + * The kbasep_js_device_data::runpool_mutex will be held by the caller. + * + * @note This API is called from IRQ context. + * + * The caller has the following conditions on locking: + * - kbasep_js_device_data::runpool_irq::lock will be held. + * + * @param js_policy job scheduler policy + * @param katom job dispatch atom + * @param time_spent_us the time spent by the job, in microseconds (10^-6 seconds). + */ +void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us); + + /** @} *//* end group kbase_js_policy_job */ + + /** @} *//* end group kbase_js_policy */ + /** @} *//* end group base_kbase_api */ + /** @} *//* end group base_api */ + +#endif /* _KBASE_JS_POLICY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c new file mode 100644 index 00000000000..692460710ce --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.c @@ -0,0 +1,297 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Job Scheduler: Completely Fair Policy Implementation + */ + +#include <mali_kbase.h> +#include <mali_kbase_js.h> +#include <mali_kbase_js_policy_cfs.h> +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0) +#include <linux/sched/rt.h> +#endif + +/** + * Define for when dumping is enabled. + * This should not be based on the instrumentation level as whether dumping is enabled for a particular level is down to the integrator. + * However this is being used for now as otherwise the cinstr headers would be needed. + */ +#define CINSTR_DUMPING_ENABLED (2 == MALI_INSTRUMENTATION_LEVEL) + +/* Fixed point constants used for runtime weight calculations */ +#define WEIGHT_FIXEDPOINT_SHIFT 10 +#define WEIGHT_TABLE_SIZE 40 +#define WEIGHT_0_NICE (WEIGHT_TABLE_SIZE/2) +#define WEIGHT_0_VAL (1 << WEIGHT_FIXEDPOINT_SHIFT) + +#define PROCESS_PRIORITY_MIN (-20) +#define PROCESS_PRIORITY_MAX (19) + +/* Defines for easy asserts 'is scheduled'/'is queued'/'is neither queued norscheduled' */ +#define KBASEP_JS_CHECKFLAG_QUEUED (1u << 0) /**< Check the queued state */ +#define KBASEP_JS_CHECKFLAG_SCHEDULED (1u << 1) /**< Check the scheduled state */ +#define KBASEP_JS_CHECKFLAG_IS_QUEUED (1u << 2) /**< Expect queued state to be set */ +#define KBASEP_JS_CHECKFLAG_IS_SCHEDULED (1u << 3) /**< Expect scheduled state to be set */ + +enum { + KBASEP_JS_CHECK_NOTQUEUED = KBASEP_JS_CHECKFLAG_QUEUED, + KBASEP_JS_CHECK_NOTSCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED, + KBASEP_JS_CHECK_QUEUED = KBASEP_JS_CHECKFLAG_QUEUED | KBASEP_JS_CHECKFLAG_IS_QUEUED, + KBASEP_JS_CHECK_SCHEDULED = KBASEP_JS_CHECKFLAG_SCHEDULED | KBASEP_JS_CHECKFLAG_IS_SCHEDULED +}; + +typedef u32 kbasep_js_check; + +/* + * Private Functions + */ + +/* Table autogenerated using util built from: base/tools/gen_cfs_weight_of_prio/ */ + +/* weight = 1.25 */ +static const int weight_of_priority[] = { + /* -20 */ 11, 14, 18, 23, + /* -16 */ 29, 36, 45, 56, + /* -12 */ 70, 88, 110, 137, + /* -8 */ 171, 214, 268, 335, + /* -4 */ 419, 524, 655, 819, + /* 0 */ 1024, 1280, 1600, 2000, + /* 4 */ 2500, 3125, 3906, 4883, + /* 8 */ 6104, 7630, 9538, 11923, + /* 12 */ 14904, 18630, 23288, 29110, + /* 16 */ 36388, 45485, 56856, 71070 +}; + +/* + * Note: There is nothing to stop the priority of the ctx containing + * ctx_info changing during or immediately after this function is called + * (because its jsctx_mutex cannot be held during IRQ). Therefore, this + * function should only be seen as a heuristic guide as to the priority weight + * of the context. + */ +static u64 priority_weight(struct kbasep_js_policy_cfs_ctx *ctx_info, u64 time_us) +{ + u64 time_delta_us; + int priority; + + priority = ctx_info->process_priority; + + /* Adjust runtime_us using priority weight if required */ + if (priority != 0 && time_us != 0) { + int clamped_priority; + + /* Clamp values to min..max weights */ + if (priority > PROCESS_PRIORITY_MAX) + clamped_priority = PROCESS_PRIORITY_MAX; + else if (priority < PROCESS_PRIORITY_MIN) + clamped_priority = PROCESS_PRIORITY_MIN; + else + clamped_priority = priority; + + /* Fixed point multiplication */ + time_delta_us = (time_us * weight_of_priority[WEIGHT_0_NICE + clamped_priority]); + /* Remove fraction */ + time_delta_us = time_delta_us >> WEIGHT_FIXEDPOINT_SHIFT; + /* Make sure the time always increases */ + if (0 == time_delta_us) + time_delta_us++; + } else { + time_delta_us = time_us; + } + + return time_delta_us; +} + +#if KBASE_TRACE_ENABLE +static int kbasep_js_policy_trace_get_refcnt_nolock(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + int as_nr; + int refcnt = 0; + + js_devdata = &kbdev->js_data; + + as_nr = kctx->as_nr; + if (as_nr != KBASEP_AS_NR_INVALID) { + struct kbasep_js_per_as_data *js_per_as_data; + + js_per_as_data = &js_devdata->runpool_irq.per_as_data[as_nr]; + + refcnt = js_per_as_data->as_busy_refcount; + } + + return refcnt; +} + +static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + unsigned long flags; + struct kbasep_js_device_data *js_devdata; + int refcnt = 0; + + js_devdata = &kbdev->js_data; + + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + refcnt = kbasep_js_policy_trace_get_refcnt_nolock(kbdev, kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + return refcnt; +} +#else /* KBASE_TRACE_ENABLE */ +static inline int kbasep_js_policy_trace_get_refcnt(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + CSTD_UNUSED(kbdev); + CSTD_UNUSED(kctx); + return 0; +} +#endif /* KBASE_TRACE_ENABLE */ + + +/* + * Non-private functions + */ + +int kbasep_js_policy_init(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_policy_cfs *policy_info; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + js_devdata = &kbdev->js_data; + policy_info = &js_devdata->policy.cfs; + + atomic64_set(&policy_info->least_runtime_us, KBASEP_JS_RUNTIME_EMPTY); + atomic64_set(&policy_info->rt_least_runtime_us, KBASEP_JS_RUNTIME_EMPTY); + + policy_info->head_runtime_us = 0; + + return 0; +} + +void kbasep_js_policy_term(union kbasep_js_policy *js_policy) +{ + CSTD_UNUSED(js_policy); +} + +int kbasep_js_policy_init_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) +{ + struct kbasep_js_device_data *js_devdata; + struct kbasep_js_policy_cfs_ctx *ctx_info; + struct kbasep_js_policy_cfs *policy_info; + int policy; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + js_devdata = &kbdev->js_data; + policy_info = &kbdev->js_data.policy.cfs; + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_INIT_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx)); + + policy = current->policy; + if (policy == SCHED_FIFO || policy == SCHED_RR) { + ctx_info->process_rt_policy = true; + ctx_info->process_priority = (((MAX_RT_PRIO - 1) - current->rt_priority) / 5) - 20; + } else { + ctx_info->process_rt_policy = false; + ctx_info->process_priority = (current->static_prio - MAX_RT_PRIO) - 20; + } + + /* Initial runtime (relative to least-run context runtime) + * + * This uses the Policy Queue's most up-to-date head_runtime_us by using the + * queue mutex to issue memory barriers - also ensure future updates to + * head_runtime_us occur strictly after this context is initialized */ + mutex_lock(&js_devdata->queue_mutex); + + /* No need to hold the the runpool_irq.lock here, because we're initializing + * the value, and the context is definitely not being updated in the + * runpool at this point. The queue_mutex ensures the memory barrier. */ + ctx_info->runtime_us = policy_info->head_runtime_us + priority_weight(ctx_info, (u64) js_devdata->cfs_ctx_runtime_init_slices * (u64) (js_devdata->ctx_timeslice_ns / 1000u)); + + mutex_unlock(&js_devdata->queue_mutex); + + return 0; +} + +void kbasep_js_policy_term_ctx(union kbasep_js_policy *js_policy, struct kbase_context *kctx) +{ + struct kbasep_js_policy_cfs_ctx *ctx_info; + struct kbasep_js_policy_cfs *policy_info; + struct kbase_device *kbdev; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(kctx != NULL); + + policy_info = &js_policy->cfs; + ctx_info = &kctx->jctx.sched_info.runpool.policy_ctx.cfs; + + kbdev = container_of(js_policy, struct kbase_device, js_data.policy); + KBASE_TRACE_ADD_REFCOUNT(kbdev, JS_POLICY_TERM_CTX, kctx, NULL, 0u, kbasep_js_policy_trace_get_refcnt(kbdev, kctx)); + + /* No work to do */ +} + +/* + * Job Chain Management + */ + +void kbasep_js_policy_log_job_result(union kbasep_js_policy *js_policy, struct kbase_jd_atom *katom, u64 time_spent_us) +{ + struct kbasep_js_policy_cfs_ctx *ctx_info; + struct kbase_context *parent_ctx; + + KBASE_DEBUG_ASSERT(js_policy != NULL); + KBASE_DEBUG_ASSERT(katom != NULL); + CSTD_UNUSED(js_policy); + + parent_ctx = katom->kctx; + KBASE_DEBUG_ASSERT(parent_ctx != NULL); + + ctx_info = &parent_ctx->jctx.sched_info.runpool.policy_ctx.cfs; + + ctx_info->runtime_us += priority_weight(ctx_info, time_spent_us); + + katom->time_spent_us += time_spent_us; +} + +bool kbasep_js_policy_ctx_has_priority(union kbasep_js_policy *js_policy, struct kbase_context *current_ctx, struct kbase_context *new_ctx) +{ + struct kbasep_js_policy_cfs_ctx *current_ctx_info; + struct kbasep_js_policy_cfs_ctx *new_ctx_info; + + KBASE_DEBUG_ASSERT(current_ctx != NULL); + KBASE_DEBUG_ASSERT(new_ctx != NULL); + CSTD_UNUSED(js_policy); + + current_ctx_info = ¤t_ctx->jctx.sched_info.runpool.policy_ctx.cfs; + new_ctx_info = &new_ctx->jctx.sched_info.runpool.policy_ctx.cfs; + + if (!current_ctx_info->process_rt_policy && new_ctx_info->process_rt_policy) + return true; + + if (current_ctx_info->process_rt_policy == + new_ctx_info->process_rt_policy) + return true; + + return false; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h new file mode 100644 index 00000000000..b457d8215ab --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_js_policy_cfs.h @@ -0,0 +1,81 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* + * Completely Fair Job Scheduler Policy structure definitions + */ + +#ifndef _KBASE_JS_POLICY_CFS_H_ +#define _KBASE_JS_POLICY_CFS_H_ + +#define KBASEP_JS_RUNTIME_EMPTY ((u64)-1) + +/** + * struct kbasep_js_policy_cfs - Data for the CFS policy + * @head_runtime_us: Number of microseconds the least-run context has been + * running for. The kbasep_js_device_data.queue_mutex must + * be held whilst updating this. + * Reads are possible without this mutex, but an older value + * might be read if no memory barries are issued beforehand. + * @least_runtime_us: Number of microseconds the least-run context in the + * context queue has been running for. + * -1 if context queue is empty. + * @rt_least_runtime_us: Number of microseconds the least-run context in the + * realtime (priority) context queue has been running for. + * -1 if realtime context queue is empty + */ +struct kbasep_js_policy_cfs { + u64 head_runtime_us; + atomic64_t least_runtime_us; + atomic64_t rt_least_runtime_us; +}; + +/** + * struct kbasep_js_policy_cfs_ctx - a single linked list of all contexts + * @runtime_us: microseconds this context has been running for + * @process_rt_policy: set if calling process policy scheme is a realtime + * scheduler and will use the priority queue. Non-mutable + * after ctx init + * @process_priority: calling process NICE priority, in the range -20..19 + * + * &kbasep_js_device_data.runpool_irq.lock must be held when updating + * @runtime_us. Initializing will occur on context init and context enqueue + * (which can only occur in one thread at a time), but multi-thread access only + * occurs while the context is in the runpool. + * + * Reads are possible without the spinlock, but an older value might be read if + * no memory barries are issued beforehand. + */ +struct kbasep_js_policy_cfs_ctx { + u64 runtime_us; + bool process_rt_policy; + int process_priority; +}; + +/** + * struct kbasep_js_policy_cfs_job - per job information for CFS + * @ticks: number of ticks that this job has been executing for + * + * &kbasep_js_device_data.runpool_irq.lock must be held when accessing @ticks. + */ +struct kbasep_js_policy_cfs_job { + u32 ticks; +}; + +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_linux.h b/drivers/gpu/arm/midgard/mali_kbase_linux.h new file mode 100644 index 00000000000..6d1e61fd41e --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_linux.h @@ -0,0 +1,43 @@ +/* + * + * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_linux.h + * Base kernel APIs, Linux implementation. + */ + +#ifndef _KBASE_LINUX_H_ +#define _KBASE_LINUX_H_ + +/* All things that are needed for the Linux port. */ +#include <linux/platform_device.h> +#include <linux/miscdevice.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/atomic.h> + +#if (defined(MALI_KERNEL_TEST_API) && (1 == MALI_KERNEL_TEST_API)) + #define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func) +#else + #define KBASE_EXPORT_TEST_API(func) +#endif + +#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func) + +#endif /* _KBASE_LINUX_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.c b/drivers/gpu/arm/midgard/mali_kbase_mem.c new file mode 100644 index 00000000000..2c7fb5a4032 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -0,0 +1,1363 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem.c + * Base kernel memory APIs + */ +#ifdef CONFIG_DMA_SHARED_BUFFER +#include <linux/dma-buf.h> +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +#include <linux/kernel.h> +#include <linux/bug.h> +#include <linux/compat.h> + +#include <mali_kbase_config.h> +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#include <mali_kbase_cache_policy.h> +#include <mali_kbase_hw.h> +#include <mali_kbase_gator.h> +#include <mali_kbase_hwaccess_time.h> + +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include <mali_kbase_tlstream.h> +#endif + +/** + * @brief Check the zone compatibility of two regions. + */ +static int kbase_region_tracker_match_zone(struct kbase_va_region *reg1, + struct kbase_va_region *reg2) +{ + return ((reg1->flags & KBASE_REG_ZONE_MASK) == + (reg2->flags & KBASE_REG_ZONE_MASK)); +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_match_zone); + +/* This function inserts a region into the tree. */ +static void kbase_region_tracker_insert(struct kbase_context *kctx, struct kbase_va_region *new_reg) +{ + u64 start_pfn = new_reg->start_pfn; + struct rb_node **link = &(kctx->reg_rbtree.rb_node); + struct rb_node *parent = NULL; + + /* Find the right place in the tree using tree search */ + while (*link) { + struct kbase_va_region *old_reg; + + parent = *link; + old_reg = rb_entry(parent, struct kbase_va_region, rblink); + + /* RBTree requires no duplicate entries. */ + KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn); + + if (old_reg->start_pfn > start_pfn) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Put the new node there, and rebalance tree */ + rb_link_node(&(new_reg->rblink), parent, link); + rb_insert_color(&(new_reg->rblink), &(kctx->reg_rbtree)); +} + +/* Find allocated region enclosing free range. */ +static struct kbase_va_region *kbase_region_tracker_find_region_enclosing_range_free( + struct kbase_context *kctx, u64 start_pfn, size_t nr_pages) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + + u64 end_pfn = start_pfn + nr_pages; + + rbnode = kctx->reg_rbtree.rb_node; + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + reg->nr_pages; + + /* If start is lower than this, go left. */ + if (start_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (end_pfn > tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +/* Find region enclosing given address. */ +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + lockdep_assert_held(&kctx->reg_lock); + + rbnode = kctx->reg_rbtree.rb_node; + while (rbnode) { + u64 tmp_start_pfn, tmp_end_pfn; + + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + tmp_start_pfn = reg->start_pfn; + tmp_end_pfn = reg->start_pfn + reg->nr_pages; + + /* If start is lower than this, go left. */ + if (gpu_pfn < tmp_start_pfn) + rbnode = rbnode->rb_left; + /* If end is higher than this, then go right. */ + else if (gpu_pfn >= tmp_end_pfn) + rbnode = rbnode->rb_right; + else /* Enclosing */ + return reg; + } + + return NULL; +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address); + +/* Find region with given base address */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr) +{ + u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; + struct rb_node *rbnode; + struct kbase_va_region *reg; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + lockdep_assert_held(&kctx->reg_lock); + + rbnode = kctx->reg_rbtree.rb_node; + while (rbnode) { + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if (reg->start_pfn > gpu_pfn) + rbnode = rbnode->rb_left; + else if (reg->start_pfn < gpu_pfn) + rbnode = rbnode->rb_right; + else + return reg; + + } + + return NULL; +} + +KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address); + +/* Find region meeting given requirements */ +static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(struct kbase_context *kctx, struct kbase_va_region *reg_reqs, size_t nr_pages, size_t align) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + + /* Note that this search is a linear search, as we do not have a target + address in mind, so does not benefit from the rbtree search */ + rbnode = rb_first(&(kctx->reg_rbtree)); + while (rbnode) { + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + if ((reg->nr_pages >= nr_pages) && + (reg->flags & KBASE_REG_FREE) && + kbase_region_tracker_match_zone(reg, reg_reqs)) { + /* Check alignment */ + u64 start_pfn = (reg->start_pfn + align - 1) & ~(align - 1); + + if ((start_pfn >= reg->start_pfn) && + (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) && + ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) + return reg; + } + rbnode = rb_next(rbnode); + } + + return NULL; +} + +/** + * @brief Remove a region object from the global list. + * + * The region reg is removed, possibly by merging with other free and + * compatible adjacent regions. It must be called with the context + * region lock held. The associated memory is not released (see + * kbase_free_alloced_region). Internal use only. + */ +static int kbase_remove_va_region(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + struct rb_node *rbprev; + struct kbase_va_region *prev = NULL; + struct rb_node *rbnext; + struct kbase_va_region *next = NULL; + + int merged_front = 0; + int merged_back = 0; + int err = 0; + + /* Try to merge with the previous block first */ + rbprev = rb_prev(&(reg->rblink)); + if (rbprev) { + prev = rb_entry(rbprev, struct kbase_va_region, rblink); + if ((prev->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(prev, reg)) { + /* We're compatible with the previous VMA, merge with it */ + prev->nr_pages += reg->nr_pages; + rb_erase(&(reg->rblink), &kctx->reg_rbtree); + reg = prev; + merged_front = 1; + } + } + + /* Try to merge with the next block second */ + /* Note we do the lookup here as the tree may have been rebalanced. */ + rbnext = rb_next(&(reg->rblink)); + if (rbnext) { + /* We're compatible with the next VMA, merge with it */ + next = rb_entry(rbnext, struct kbase_va_region, rblink); + if ((next->flags & KBASE_REG_FREE) && kbase_region_tracker_match_zone(next, reg)) { + next->start_pfn = reg->start_pfn; + next->nr_pages += reg->nr_pages; + rb_erase(&(reg->rblink), &kctx->reg_rbtree); + merged_back = 1; + if (merged_front) { + /* We already merged with prev, free it */ + kbase_free_alloced_region(reg); + } + } + } + + /* If we failed to merge then we need to add a new block */ + if (!(merged_front || merged_back)) { + /* + * We didn't merge anything. Add a new free + * placeholder and remove the original one. + */ + struct kbase_va_region *free_reg; + + free_reg = kbase_alloc_free_region(kctx, reg->start_pfn, reg->nr_pages, reg->flags & KBASE_REG_ZONE_MASK); + if (!free_reg) { + err = -ENOMEM; + goto out; + } + + rb_replace_node(&(reg->rblink), &(free_reg->rblink), &(kctx->reg_rbtree)); + } + + out: + return err; +} + +KBASE_EXPORT_TEST_API(kbase_remove_va_region); + +/** + * @brief Insert a VA region to the list, replacing the current at_reg. + */ +static int kbase_insert_va_region_nolock(struct kbase_context *kctx, struct kbase_va_region *new_reg, struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages) +{ + int err = 0; + + /* Must be a free region */ + KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0); + /* start_pfn should be contained within at_reg */ + KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages)); + /* at least nr_pages from start_pfn should be contained within at_reg */ + KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages); + + new_reg->start_pfn = start_pfn; + new_reg->nr_pages = nr_pages; + + /* Regions are a whole use, so swap and delete old one. */ + if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) { + rb_replace_node(&(at_reg->rblink), &(new_reg->rblink), &(kctx->reg_rbtree)); + kbase_free_alloced_region(at_reg); + } + /* New region replaces the start of the old one, so insert before. */ + else if (at_reg->start_pfn == start_pfn) { + at_reg->start_pfn += nr_pages; + KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages); + at_reg->nr_pages -= nr_pages; + + kbase_region_tracker_insert(kctx, new_reg); + } + /* New region replaces the end of the old one, so insert after. */ + else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) { + at_reg->nr_pages -= nr_pages; + + kbase_region_tracker_insert(kctx, new_reg); + } + /* New region splits the old one, so insert and create new */ + else { + struct kbase_va_region *new_front_reg; + + new_front_reg = kbase_alloc_free_region(kctx, + at_reg->start_pfn, + start_pfn - at_reg->start_pfn, + at_reg->flags & KBASE_REG_ZONE_MASK); + + if (new_front_reg) { + at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; + at_reg->start_pfn = start_pfn + nr_pages; + + kbase_region_tracker_insert(kctx, new_front_reg); + kbase_region_tracker_insert(kctx, new_reg); + } else { + err = -ENOMEM; + } + } + + return err; +} + +/** + * @brief Add a VA region to the list. + */ +int kbase_add_va_region(struct kbase_context *kctx, + struct kbase_va_region *reg, u64 addr, + size_t nr_pages, size_t align) +{ + struct kbase_va_region *tmp; + u64 gpu_pfn = addr >> PAGE_SHIFT; + int err = 0; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + + lockdep_assert_held(&kctx->reg_lock); + + if (!align) + align = 1; + + /* must be a power of 2 */ + KBASE_DEBUG_ASSERT((align & (align - 1)) == 0); + KBASE_DEBUG_ASSERT(nr_pages > 0); + + /* Path 1: Map a specific address. Find the enclosing region, which *must* be free. */ + if (gpu_pfn) { + struct device *dev = kctx->kbdev->dev; + + KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1))); + + tmp = kbase_region_tracker_find_region_enclosing_range_free(kctx, gpu_pfn, nr_pages); + if (!tmp) { + dev_warn(dev, "Enclosing region not found: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages); + err = -ENOMEM; + goto exit; + } + + if ((!kbase_region_tracker_match_zone(tmp, reg)) || + (!(tmp->flags & KBASE_REG_FREE))) { + dev_warn(dev, "Zone mismatch: %lu != %lu", tmp->flags & KBASE_REG_ZONE_MASK, reg->flags & KBASE_REG_ZONE_MASK); + dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n", tmp->start_pfn, tmp->flags, tmp->nr_pages, gpu_pfn, nr_pages); + dev_warn(dev, "in function %s (%p, %p, 0x%llx, 0x%zx, 0x%zx)\n", __func__, kctx, reg, addr, nr_pages, align); + err = -ENOMEM; + goto exit; + } + + err = kbase_insert_va_region_nolock(kctx, reg, tmp, gpu_pfn, nr_pages); + if (err) { + dev_warn(dev, "Failed to insert va region"); + err = -ENOMEM; + goto exit; + } + + goto exit; + } + + /* Path 2: Map any free address which meets the requirements. */ + { + u64 start_pfn; + + tmp = kbase_region_tracker_find_region_meeting_reqs(kctx, reg, nr_pages, align); + if (!tmp) { + err = -ENOMEM; + goto exit; + } + start_pfn = (tmp->start_pfn + align - 1) & ~(align - 1); + err = kbase_insert_va_region_nolock(kctx, reg, tmp, start_pfn, nr_pages); + } + + exit: + return err; +} + +KBASE_EXPORT_TEST_API(kbase_add_va_region); + +/** + * @brief Initialize the internal region tracker data structure. + */ +static void kbase_region_tracker_ds_init(struct kbase_context *kctx, struct kbase_va_region *same_va_reg, struct kbase_va_region *exec_reg, struct kbase_va_region *custom_va_reg) +{ + kctx->reg_rbtree = RB_ROOT; + kbase_region_tracker_insert(kctx, same_va_reg); + + /* exec and custom_va_reg doesn't always exist */ + if (exec_reg && custom_va_reg) { + kbase_region_tracker_insert(kctx, exec_reg); + kbase_region_tracker_insert(kctx, custom_va_reg); + } +} + +void kbase_region_tracker_term(struct kbase_context *kctx) +{ + struct rb_node *rbnode; + struct kbase_va_region *reg; + + do { + rbnode = rb_first(&(kctx->reg_rbtree)); + if (rbnode) { + rb_erase(rbnode, &(kctx->reg_rbtree)); + reg = rb_entry(rbnode, struct kbase_va_region, rblink); + kbase_free_alloced_region(reg); + } + } while (rbnode); +} + +/** + * Initialize the region tracker data structure. + */ +int kbase_region_tracker_init(struct kbase_context *kctx) +{ + struct kbase_va_region *same_va_reg; + struct kbase_va_region *exec_reg = NULL; + struct kbase_va_region *custom_va_reg = NULL; + size_t same_va_bits = sizeof(void *) * BITS_PER_BYTE; + u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; + u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; + +#if defined(CONFIG_ARM64) + same_va_bits = VA_BITS; +#elif defined(CONFIG_X86_64) + same_va_bits = 47; +#elif defined(CONFIG_64BIT) +#error Unsupported 64-bit architecture +#endif + +#ifdef CONFIG_64BIT + if (kctx->is_compat) + same_va_bits = 32; + else if (kbase_hw_has_feature(kctx->kbdev, BASE_HW_FEATURE_33BIT_VA)) + same_va_bits = 33; +#endif + + if (kctx->kbdev->gpu_props.mmu.va_bits < same_va_bits) + return -EINVAL; + + /* all have SAME_VA */ + same_va_reg = kbase_alloc_free_region(kctx, 1, + (1ULL << (same_va_bits - PAGE_SHIFT)) - 2, + KBASE_REG_ZONE_SAME_VA); + + if (!same_va_reg) + return -ENOMEM; + +#ifdef CONFIG_64BIT + /* only 32-bit clients have the other two zones */ + if (kctx->is_compat) { +#endif + if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { + kbase_free_alloced_region(same_va_reg); + return -EINVAL; + } + /* If the current size of TMEM is out of range of the + * virtual address space addressable by the MMU then + * we should shrink it to fit + */ + if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) + custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; + + exec_reg = kbase_alloc_free_region(kctx, + KBASE_REG_ZONE_EXEC_BASE, + KBASE_REG_ZONE_EXEC_SIZE, + KBASE_REG_ZONE_EXEC); + + if (!exec_reg) { + kbase_free_alloced_region(same_va_reg); + return -ENOMEM; + } + + custom_va_reg = kbase_alloc_free_region(kctx, + KBASE_REG_ZONE_CUSTOM_VA_BASE, + custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); + + if (!custom_va_reg) { + kbase_free_alloced_region(same_va_reg); + kbase_free_alloced_region(exec_reg); + return -ENOMEM; + } +#ifdef CONFIG_64BIT + } +#endif + + kbase_region_tracker_ds_init(kctx, same_va_reg, exec_reg, custom_va_reg); + + return 0; +} + +int kbase_mem_init(struct kbase_device *kbdev) +{ + struct kbasep_mem_device *memdev; + + KBASE_DEBUG_ASSERT(kbdev); + + memdev = &kbdev->memdev; + + /* Initialize memory usage */ + atomic_set(&memdev->used_pages, 0); + + /* nothing to do, zero-inited when struct kbase_device was created */ + return 0; +} + +void kbase_mem_halt(struct kbase_device *kbdev) +{ + CSTD_UNUSED(kbdev); +} + +void kbase_mem_term(struct kbase_device *kbdev) +{ + struct kbasep_mem_device *memdev; + int pages; + + KBASE_DEBUG_ASSERT(kbdev); + + memdev = &kbdev->memdev; + + pages = atomic_read(&memdev->used_pages); + if (pages != 0) + dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages); +} + +KBASE_EXPORT_TEST_API(kbase_mem_term); + + + + +/** + * @brief Allocate a free region object. + * + * The allocated object is not part of any list yet, and is flagged as + * KBASE_REG_FREE. No mapping is allocated yet. + * + * zone is KBASE_REG_ZONE_CUSTOM_VA, KBASE_REG_ZONE_SAME_VA, or KBASE_REG_ZONE_EXEC + * + */ +struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone) +{ + struct kbase_va_region *new_reg; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + /* zone argument should only contain zone related region flags */ + KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); + KBASE_DEBUG_ASSERT(nr_pages > 0); + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); + + new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL); + + if (!new_reg) + return NULL; + + new_reg->cpu_alloc = NULL; /* no alloc bound yet */ + new_reg->gpu_alloc = NULL; /* no alloc bound yet */ + new_reg->kctx = kctx; + new_reg->flags = zone | KBASE_REG_FREE; + + new_reg->flags |= KBASE_REG_GROWABLE; + + /* Set up default MEMATTR usage */ + new_reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT); + + new_reg->start_pfn = start_pfn; + new_reg->nr_pages = nr_pages; + + return new_reg; +} + +KBASE_EXPORT_TEST_API(kbase_alloc_free_region); + +/** + * @brief Free a region object. + * + * The described region must be freed of any mapping. + * + * If the region is not flagged as KBASE_REG_FREE, the region's + * alloc object will be released. + * It is a bug if no alloc object exists for non-free regions. + * + */ +void kbase_free_alloced_region(struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(NULL != reg); + if (!(reg->flags & KBASE_REG_FREE)) { + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); + /* To detect use-after-free in debug builds */ + KBASE_DEBUG_CODE(reg->flags |= KBASE_REG_FREE); + } + kfree(reg); +} + +KBASE_EXPORT_TEST_API(kbase_free_alloced_region); + +void kbase_mmu_update(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + lockdep_assert_held(&kctx->kbdev->js_data.runpool_irq.lock); + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the runpool_irq lock */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + kctx->kbdev->mmu_mode->update(kctx); +} + +KBASE_EXPORT_TEST_API(kbase_mmu_update); + +void kbase_mmu_disable(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the runpool_irq lock */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); +} + +KBASE_EXPORT_TEST_API(kbase_mmu_disable); + +void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + kbdev->mmu_mode->disable_as(kbdev, as_nr); +} + +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align) +{ + int err; + size_t i = 0; + unsigned long mask = ~KBASE_REG_MEMATTR_MASK; +#if defined(CONFIG_MALI_CACHE_COHERENT) + unsigned long attr = + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA); +#else + unsigned long attr = + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); +#endif + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + + err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); + if (err) + return err; + + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { + u64 stride; + struct kbase_mem_phy_alloc *alloc; + + alloc = reg->gpu_alloc; + stride = alloc->imported.alias.stride; + KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); + for (i = 0; i < alloc->imported.alias.nents; i++) { + if (alloc->imported.alias.aliased[i].alloc) { + err = kbase_mmu_insert_pages(kctx, + reg->start_pfn + (i * stride), + alloc->imported.alias.aliased[i].alloc->pages + alloc->imported.alias.aliased[i].offset, + alloc->imported.alias.aliased[i].length, + reg->flags); + if (err) + goto bad_insert; + + kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc); + } else { + err = kbase_mmu_insert_single_page(kctx, + reg->start_pfn + i * stride, + kctx->aliasing_sink_page, + alloc->imported.alias.aliased[i].length, + (reg->flags & mask) | attr); + + if (err) + goto bad_insert; + } + } + } else { + err = kbase_mmu_insert_pages(kctx, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags); + if (err) + goto bad_insert; + kbase_mem_phy_alloc_gpu_mapped(reg->gpu_alloc); + } + + return err; + +bad_insert: + if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { + u64 stride; + + stride = reg->gpu_alloc->imported.alias.stride; + KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); + while (i--) + if (reg->gpu_alloc->imported.alias.aliased[i].alloc) { + kbase_mmu_teardown_pages(kctx, reg->start_pfn + (i * stride), reg->gpu_alloc->imported.alias.aliased[i].length); + kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); + } + } + + kbase_remove_va_region(kctx, reg); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_gpu_mmap); + +int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + int err; + + if (reg->start_pfn == 0) + return 0; + + if (reg->gpu_alloc && reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) { + size_t i; + + err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, reg->nr_pages); + KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); + for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) + if (reg->gpu_alloc->imported.alias.aliased[i].alloc) + kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); + } else { + err = kbase_mmu_teardown_pages(kctx, reg->start_pfn, kbase_reg_current_backed_size(reg)); + kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc); + } + + if (err) + return err; + + err = kbase_remove_va_region(kctx, reg); + return err; +} + +static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping_of_region(const struct kbase_va_region *reg, unsigned long uaddr, size_t size) +{ + struct kbase_cpu_mapping *map; + struct list_head *pos; + + KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + + if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ + return NULL; + + list_for_each(pos, ®->cpu_alloc->mappings) { + map = list_entry(pos, struct kbase_cpu_mapping, mappings_list); + if (map->vm_start <= uaddr && map->vm_end >= uaddr + size) + return map; + } + + return NULL; +} + +KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_of_region); + +int kbasep_find_enclosing_cpu_mapping_offset( + struct kbase_context *kctx, u64 gpu_addr, + unsigned long uaddr, size_t size, u64 * offset) +{ + struct kbase_cpu_mapping *map = NULL; + const struct kbase_va_region *reg; + int err = -EINVAL; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + kbase_gpu_vm_lock(kctx); + + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); + if (reg && !(reg->flags & KBASE_REG_FREE)) { + map = kbasep_find_enclosing_cpu_mapping_of_region(reg, uaddr, + size); + if (map) { + *offset = (uaddr - PTR_TO_U64(map->vm_start)) + + (map->page_off << PAGE_SHIFT); + err = 0; + } + } + + kbase_gpu_vm_unlock(kctx); + + return err; +} + +KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset); + +void kbase_sync_single(struct kbase_context *kctx, + phys_addr_t cpu_pa, phys_addr_t gpu_pa, + off_t offset, size_t size, enum kbase_sync_type sync_fn) +{ + struct page *cpu_page; + + cpu_page = pfn_to_page(PFN_DOWN(cpu_pa)); + + if (likely(cpu_pa == gpu_pa)) { + dma_addr_t dma_addr; + + BUG_ON(!cpu_page); + BUG_ON(offset + size > PAGE_SIZE); + + dma_addr = kbase_dma_addr(cpu_page) + offset; + if (sync_fn == KBASE_SYNC_TO_CPU) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, + size, DMA_BIDIRECTIONAL); + else if (sync_fn == KBASE_SYNC_TO_DEVICE) + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + size, DMA_BIDIRECTIONAL); + } else { + void *src = NULL; + void *dst = NULL; + struct page *gpu_page; + + if (WARN(!gpu_pa, "No GPU PA found for infinite cache op")) + return; + + gpu_page = pfn_to_page(PFN_DOWN(gpu_pa)); + + if (sync_fn == KBASE_SYNC_TO_DEVICE) { + src = ((unsigned char *)kmap(cpu_page)) + offset; + dst = ((unsigned char *)kmap(gpu_page)) + offset; + } else if (sync_fn == KBASE_SYNC_TO_CPU) { + dma_sync_single_for_cpu(kctx->kbdev->dev, + kbase_dma_addr(gpu_page) + offset, + size, DMA_BIDIRECTIONAL); + src = ((unsigned char *)kmap(gpu_page)) + offset; + dst = ((unsigned char *)kmap(cpu_page)) + offset; + } + memcpy(dst, src, size); + kunmap(gpu_page); + kunmap(cpu_page); + if (sync_fn == KBASE_SYNC_TO_DEVICE) + dma_sync_single_for_device(kctx->kbdev->dev, + kbase_dma_addr(gpu_page) + offset, + size, DMA_BIDIRECTIONAL); + } +} + +static int kbase_do_syncset(struct kbase_context *kctx, + struct base_syncset *set, enum kbase_sync_type sync_fn) +{ + int err = 0; + struct basep_syncset *sset = &set->basep_sset; + struct kbase_va_region *reg; + struct kbase_cpu_mapping *map; + unsigned long start; + size_t size; + phys_addr_t *cpu_pa; + phys_addr_t *gpu_pa; + u64 page_off, page_count; + u64 i; + off_t offset; + + kbase_os_mem_map_lock(kctx); + kbase_gpu_vm_lock(kctx); + + /* find the region where the virtual address is contained */ + reg = kbase_region_tracker_find_region_enclosing_address(kctx, + sset->mem_handle); + if (!reg) { + dev_warn(kctx->kbdev->dev, "Can't find region at VA 0x%016llX", + sset->mem_handle); + err = -EINVAL; + goto out_unlock; + } + + if (!(reg->flags & KBASE_REG_CPU_CACHED)) + goto out_unlock; + + start = (uintptr_t)sset->user_addr; + size = (size_t)sset->size; + + map = kbasep_find_enclosing_cpu_mapping_of_region(reg, start, size); + if (!map) { + dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX", + start, sset->mem_handle); + err = -EINVAL; + goto out_unlock; + } + + offset = start & (PAGE_SIZE - 1); + page_off = map->page_off + ((start - map->vm_start) >> PAGE_SHIFT); + page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + cpu_pa = kbase_get_cpu_phy_pages(reg); + gpu_pa = kbase_get_gpu_phy_pages(reg); + + /* Sync first page */ + if (cpu_pa[page_off]) { + size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); + + kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off], + offset, sz, sync_fn); + } + + /* Sync middle pages (if any) */ + for (i = 1; page_count > 2 && i < page_count - 1; i++) { + /* we grow upwards, so bail on first non-present page */ + if (!cpu_pa[page_off + i]) + break; + + kbase_sync_single(kctx, cpu_pa[page_off + i], + gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn); + } + + /* Sync last page (if any) */ + if (page_count > 1 && cpu_pa[page_off + page_count - 1]) { + size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1; + + kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1], + gpu_pa[page_off + page_count - 1], 0, sz, + sync_fn); + } + +out_unlock: + kbase_gpu_vm_unlock(kctx); + kbase_os_mem_map_unlock(kctx); + return err; +} + +int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset) +{ + int err = -EINVAL; + struct basep_syncset *sset; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != syncset); + + sset = &syncset->basep_sset; + + switch (sset->type) { + case BASE_SYNCSET_OP_MSYNC: + err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_DEVICE); + break; + + case BASE_SYNCSET_OP_CSYNC: + err = kbase_do_syncset(kctx, syncset, KBASE_SYNC_TO_CPU); + break; + + default: + dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type); + break; + } + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_sync_now); + +/* vm lock must be held */ +int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg) +{ + int err; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != reg); + BUG_ON(!mutex_is_locked(&kctx->reg_lock)); + err = kbase_gpu_munmap(kctx, reg); + if (err) { + dev_warn(reg->kctx->kbdev->dev, "Could not unmap from the GPU...\n"); + goto out; + } +#ifndef CONFIG_MALI_NO_MALI + if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { + /* Wait for GPU to flush write buffer before freeing physical pages */ + kbase_wait_write_flush(kctx); + } +#endif + /* This will also free the physical pages */ + kbase_free_alloced_region(reg); + + out: + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mem_free_region); + +/** + * @brief Free the region from the GPU and unregister it. + * + * This function implements the free operation on a memory segment. + * It will loudly fail if called with outstanding mappings. + */ +int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) +{ + int err = 0; + struct kbase_va_region *reg; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + if (0 == gpu_addr) { + dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); + return -EINVAL; + } + kbase_gpu_vm_lock(kctx); + + if (gpu_addr >= BASE_MEM_COOKIE_BASE && + gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) { + int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE); + + reg = kctx->pending_regions[cookie]; + if (!reg) { + err = -EINVAL; + goto out_unlock; + } + + /* ask to unlink the cookie as we'll free it */ + + kctx->pending_regions[cookie] = NULL; + kctx->cookies |= (1UL << cookie); + + kbase_free_alloced_region(reg); + } else { + /* A real GPU va */ + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE)) { + dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX", + gpu_addr); + err = -EINVAL; + goto out_unlock; + } + + if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { + /* SAME_VA must be freed through munmap */ + dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, + gpu_addr); + err = -EINVAL; + goto out_unlock; + } + + err = kbase_mem_free_region(kctx, reg); + } + + out_unlock: + kbase_gpu_vm_unlock(kctx); + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mem_free); + +void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags) +{ + KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); + + reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); + /* all memory is now growable */ + reg->flags |= KBASE_REG_GROWABLE; + + if (flags & BASE_MEM_GROW_ON_GPF) + reg->flags |= KBASE_REG_PF_GROW; + + if (flags & BASE_MEM_PROT_CPU_WR) + reg->flags |= KBASE_REG_CPU_WR; + + if (flags & BASE_MEM_PROT_CPU_RD) + reg->flags |= KBASE_REG_CPU_RD; + + if (flags & BASE_MEM_PROT_GPU_WR) + reg->flags |= KBASE_REG_GPU_WR; + + if (flags & BASE_MEM_PROT_GPU_RD) + reg->flags |= KBASE_REG_GPU_RD; + + if (0 == (flags & BASE_MEM_PROT_GPU_EX)) + reg->flags |= KBASE_REG_GPU_NX; + + if (flags & BASE_MEM_COHERENT_SYSTEM || + flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) + reg->flags |= KBASE_REG_SHARE_BOTH; + else if (flags & BASE_MEM_COHERENT_LOCAL) + reg->flags |= KBASE_REG_SHARE_IN; +} +KBASE_EXPORT_TEST_API(kbase_update_region_flags); + +int kbase_alloc_phy_pages_helper( + struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_requested) +{ + KBASE_DEBUG_ASSERT(alloc); + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.kctx); + + if (nr_pages_requested == 0) + goto done; /*nothing to do*/ + + kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->used_pages); + kbase_atomic_add_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages); + + /* Increase mm counters before we allocate pages so that this + * allocation is visible to the OOM killer */ + kbase_process_page_usage_inc(alloc->imported.kctx, nr_pages_requested); + + if (kbase_mem_allocator_alloc(&alloc->imported.kctx->osalloc, + nr_pages_requested, alloc->pages + alloc->nents) != 0) + goto no_alloc; + +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pagesalloc((s64)nr_pages_requested); +#endif + + alloc->nents += nr_pages_requested; +done: + return 0; + +no_alloc: + kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_requested); + kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->used_pages); + kbase_atomic_sub_pages(nr_pages_requested, &alloc->imported.kctx->kbdev->memdev.used_pages); + + return -ENOMEM; +} + +int kbase_free_phy_pages_helper( + struct kbase_mem_phy_alloc *alloc, + size_t nr_pages_to_free) +{ + bool syncback; + phys_addr_t *start_free; + + KBASE_DEBUG_ASSERT(alloc); + KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE); + KBASE_DEBUG_ASSERT(alloc->imported.kctx); + KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free); + + /* early out if nothing to do */ + if (0 == nr_pages_to_free) + return 0; + + start_free = alloc->pages + alloc->nents - nr_pages_to_free; + + syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + + kbase_mem_allocator_free(&alloc->imported.kctx->osalloc, + nr_pages_to_free, + start_free, + syncback); + + alloc->nents -= nr_pages_to_free; + kbase_process_page_usage_dec(alloc->imported.kctx, nr_pages_to_free); + kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->used_pages); + kbase_atomic_sub_pages(nr_pages_to_free, &alloc->imported.kctx->kbdev->memdev.used_pages); + +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pagesalloc(-(s64)nr_pages_to_free); +#endif + + return 0; +} + +void kbase_mem_kref_free(struct kref *kref) +{ + struct kbase_mem_phy_alloc *alloc; + + alloc = container_of(kref, struct kbase_mem_phy_alloc, kref); + + switch (alloc->type) { + case KBASE_MEM_TYPE_NATIVE: { + KBASE_DEBUG_ASSERT(alloc->imported.kctx); + kbase_free_phy_pages_helper(alloc, alloc->nents); + break; + } + case KBASE_MEM_TYPE_ALIAS: { + /* just call put on the underlying phy allocs */ + size_t i; + struct kbase_aliased *aliased; + + aliased = alloc->imported.alias.aliased; + if (aliased) { + for (i = 0; i < alloc->imported.alias.nents; i++) + if (aliased[i].alloc) + kbase_mem_phy_alloc_put(aliased[i].alloc); + vfree(aliased); + } + break; + } + case KBASE_MEM_TYPE_RAW: + /* raw pages, external cleanup */ + break; + #ifdef CONFIG_UMP + case KBASE_MEM_TYPE_IMPORTED_UMP: + ump_dd_release(alloc->imported.ump_handle); + break; +#endif +#ifdef CONFIG_DMA_SHARED_BUFFER + case KBASE_MEM_TYPE_IMPORTED_UMM: + dma_buf_detach(alloc->imported.umm.dma_buf, + alloc->imported.umm.dma_attachment); + dma_buf_put(alloc->imported.umm.dma_buf); + break; +#endif + case KBASE_MEM_TYPE_TB:{ + void *tb; + + tb = alloc->imported.kctx->jctx.tb; + kbase_device_trace_buffer_uninstall(alloc->imported.kctx); + vfree(tb); + break; + } + default: + WARN(1, "Unexecpted free of type %d\n", alloc->type); + break; + } + + /* Free based on allocation type */ + if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) + vfree(alloc); + else + kfree(alloc); +} + +KBASE_EXPORT_TEST_API(kbase_mem_kref_free); + +int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) +{ + KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(vsize > 0); + + /* validate user provided arguments */ + if (size > vsize || vsize > reg->nr_pages) + goto out_term; + + /* Prevent vsize*sizeof from wrapping around. + * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail. + */ + if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) + goto out_term; + + KBASE_DEBUG_ASSERT(0 != vsize); + + if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) + goto out_term; + + if (reg->cpu_alloc != reg->gpu_alloc) { + if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0) + goto out_rollback; + } + + return 0; + +out_rollback: + kbase_free_phy_pages_helper(reg->cpu_alloc, size); +out_term: + return -1; +} + +KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages); + +bool kbase_check_alloc_flags(unsigned long flags) +{ + /* Only known input flags should be set. */ + if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) + return false; + + /* At least one flag should be set */ + if (flags == 0) + return false; + + /* Either the GPU or CPU must be reading from the allocated memory */ + if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0) + return false; + + /* Either the GPU or CPU must be writing to the allocated memory */ + if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + + /* GPU cannot be writing to GPU executable memory and cannot grow the memory on page fault. */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) + return false; + + /* GPU should have at least read or write access otherwise there is no + reason for allocating. */ + if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + + return true; +} + +bool kbase_check_import_flags(unsigned long flags) +{ + /* Only known input flags should be set. */ + if (flags & ~BASE_MEM_FLAGS_INPUT_MASK) + return false; + + /* At least one flag should be set */ + if (flags == 0) + return false; + + /* Imported memory cannot be GPU executable */ + if (flags & BASE_MEM_PROT_GPU_EX) + return false; + + /* Imported memory cannot grow on page fault */ + if (flags & BASE_MEM_GROW_ON_GPF) + return false; + + /* GPU should have at least read or write access otherwise there is no + reason for importing. */ + if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) + return false; + + /* Secure memory cannot be read by the CPU */ + if ((flags & BASE_MEM_SECURE) && (flags & BASE_MEM_PROT_CPU_RD)) + return false; + + return true; +} + +/** + * @brief Acquire the per-context region list lock + */ +void kbase_gpu_vm_lock(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_lock(&kctx->reg_lock); +} + +KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); + +/** + * @brief Release the per-context region list lock + */ +void kbase_gpu_vm_unlock(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + mutex_unlock(&kctx->reg_lock); +} + +KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem.h b/drivers/gpu/arm/midgard/mali_kbase_mem.h new file mode 100644 index 00000000000..d8a03526a64 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem.h @@ -0,0 +1,774 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem.h + * Base kernel memory APIs + */ + +#ifndef _KBASE_MEM_H_ +#define _KBASE_MEM_H_ + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +#include <linux/kref.h> + +#ifdef CONFIG_UMP +#include <linux/ump.h> +#endif /* CONFIG_UMP */ +#include "mali_base_kernel.h" +#include <mali_kbase_hw.h> +#include "mali_kbase_pm.h" +#include "mali_kbase_defs.h" +#if defined(CONFIG_MALI_GATOR_SUPPORT) +#include "mali_kbase_gator.h" +#endif + +/* Part of the workaround for uTLB invalid pages is to ensure we grow/shrink tmem by 4 pages at a time */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316 (2) /* round to 4 pages */ + +/* Part of the workaround for PRLAM-9630 requires us to grow/shrink memory by 8 pages. +The MMU reads in 8 page table entries from memory at a time, if we have more than one page fault within the same 8 pages and +page tables are updated accordingly, the MMU does not re-read the page table entries from memory for the subsequent page table +updates and generates duplicate page faults as the page table information used by the MMU is not valid. */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630 (3) /* round to 8 pages */ + +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2 (0) /* round to 1 page */ + +/* This must always be a power of 2 */ +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2) +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) +#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) +/** + * A CPU mapping + */ +struct kbase_cpu_mapping { + struct list_head mappings_list; + struct kbase_mem_phy_alloc *alloc; + struct kbase_context *kctx; + struct kbase_va_region *region; + pgoff_t page_off; + int count; + unsigned long vm_start; + unsigned long vm_end; +}; + +enum kbase_memory_type { + KBASE_MEM_TYPE_NATIVE, + KBASE_MEM_TYPE_IMPORTED_UMP, + KBASE_MEM_TYPE_IMPORTED_UMM, + KBASE_MEM_TYPE_ALIAS, + KBASE_MEM_TYPE_TB, + KBASE_MEM_TYPE_RAW +}; + +/* internal structure, mirroring base_mem_aliasing_info, + * but with alloc instead of a gpu va (handle) */ +struct kbase_aliased { + struct kbase_mem_phy_alloc *alloc; /* NULL for special, non-NULL for native */ + u64 offset; /* in pages */ + u64 length; /* in pages */ +}; + +/** + * @brief Physical pages tracking object properties + */ +#define KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED (1ul << 0) +#define KBASE_MEM_PHY_ALLOC_LARGE (1ul << 1) + +/* physical pages tracking object. + * Set up to track N pages. + * N not stored here, the creator holds that info. + * This object only tracks how many elements are actually valid (present). + * Changing of nents or *pages should only happen if the kbase_mem_phy_alloc is not + * shared with another region or client. CPU mappings are OK to exist when changing, as + * long as the tracked mappings objects are updated as part of the change. + */ +struct kbase_mem_phy_alloc { + struct kref kref; /* number of users of this alloc */ + atomic_t gpu_mappings; + size_t nents; /* 0..N */ + phys_addr_t *pages; /* N elements, only 0..nents are valid */ + + /* kbase_cpu_mappings */ + struct list_head mappings; + + /* type of buffer */ + enum kbase_memory_type type; + + unsigned long properties; + + /* member in union valid based on @a type */ + union { +#ifdef CONFIG_UMP + ump_dd_handle ump_handle; +#endif /* CONFIG_UMP */ +#if defined(CONFIG_DMA_SHARED_BUFFER) + struct { + struct dma_buf *dma_buf; + struct dma_buf_attachment *dma_attachment; + unsigned int current_mapping_usage_count; + struct sg_table *sgt; + } umm; +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ + struct { + u64 stride; + size_t nents; + struct kbase_aliased *aliased; + } alias; + /* Used by type = (KBASE_MEM_TYPE_NATIVE, KBASE_MEM_TYPE_TB) */ + struct kbase_context *kctx; + } imported; +}; + +static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(alloc); + /* we only track mappings of NATIVE buffers */ + if (alloc->type == KBASE_MEM_TYPE_NATIVE) + atomic_inc(&alloc->gpu_mappings); +} + +static inline void kbase_mem_phy_alloc_gpu_unmapped(struct kbase_mem_phy_alloc *alloc) +{ + KBASE_DEBUG_ASSERT(alloc); + /* we only track mappings of NATIVE buffers */ + if (alloc->type == KBASE_MEM_TYPE_NATIVE) + if (0 > atomic_dec_return(&alloc->gpu_mappings)) { + pr_err("Mismatched %s:\n", __func__); + dump_stack(); + } +} + +void kbase_mem_kref_free(struct kref *kref); + +int kbase_mem_init(struct kbase_device *kbdev); +void kbase_mem_halt(struct kbase_device *kbdev); +void kbase_mem_term(struct kbase_device *kbdev); + +static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_get(struct kbase_mem_phy_alloc *alloc) +{ + kref_get(&alloc->kref); + return alloc; +} + +static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_mem_phy_alloc *alloc) +{ + kref_put(&alloc->kref, kbase_mem_kref_free); + return NULL; +} + +/** + * A GPU memory region, and attributes for CPU mappings. + */ +struct kbase_va_region { + struct rb_node rblink; + struct list_head link; + + struct kbase_context *kctx; /* Backlink to base context */ + + u64 start_pfn; /* The PFN in GPU space */ + size_t nr_pages; + +/* Free region */ +#define KBASE_REG_FREE (1ul << 0) +/* CPU write access */ +#define KBASE_REG_CPU_WR (1ul << 1) +/* GPU write access */ +#define KBASE_REG_GPU_WR (1ul << 2) +/* No eXecute flag */ +#define KBASE_REG_GPU_NX (1ul << 3) +/* Is CPU cached? */ +#define KBASE_REG_CPU_CACHED (1ul << 4) +/* Is GPU cached? */ +#define KBASE_REG_GPU_CACHED (1ul << 5) + +#define KBASE_REG_GROWABLE (1ul << 6) +/* Can grow on pf? */ +#define KBASE_REG_PF_GROW (1ul << 7) + +/* VA managed by us */ +#define KBASE_REG_CUSTOM_VA (1ul << 8) + +/* inner shareable coherency */ +#define KBASE_REG_SHARE_IN (1ul << 9) +/* inner & outer shareable coherency */ +#define KBASE_REG_SHARE_BOTH (1ul << 10) + +/* Space for 4 different zones */ +#define KBASE_REG_ZONE_MASK (3ul << 11) +#define KBASE_REG_ZONE(x) (((x) & 3) << 11) + +/* GPU read access */ +#define KBASE_REG_GPU_RD (1ul<<13) +/* CPU read access */ +#define KBASE_REG_CPU_RD (1ul<<14) + +/* Aligned for GPU EX in SAME_VA */ +#define KBASE_REG_ALIGNED (1ul<<15) + +/* Index of chosen MEMATTR for this region (0..7) */ +#define KBASE_REG_MEMATTR_MASK (7ul << 16) +#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) +#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) + +#define KBASE_REG_SECURE (1ul << 19) + +#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) + +/* only used with 32-bit clients */ +/* + * On a 32bit platform, custom VA should be wired from (4GB + shader region) + * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface + * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference). + * So we put the default limit to the maximum possible on Linux and shrink + * it down, if required by the GPU, during initialization. + */ +#define KBASE_REG_ZONE_EXEC KBASE_REG_ZONE(1) /* Dedicated 16MB region for shader code */ +#define KBASE_REG_ZONE_EXEC_BASE ((1ULL << 32) >> PAGE_SHIFT) +#define KBASE_REG_ZONE_EXEC_SIZE ((16ULL * 1024 * 1024) >> PAGE_SHIFT) + +#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(2) +#define KBASE_REG_ZONE_CUSTOM_VA_BASE (KBASE_REG_ZONE_EXEC_BASE + KBASE_REG_ZONE_EXEC_SIZE) /* Starting after KBASE_REG_ZONE_EXEC */ +#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) +/* end 32-bit clients only */ + + unsigned long flags; + + size_t extent; /* nr of pages alloc'd on PF */ + + struct kbase_mem_phy_alloc *cpu_alloc; /* the one alloc object we mmap to the CPU when mapping this region */ + struct kbase_mem_phy_alloc *gpu_alloc; /* the one alloc object we mmap to the GPU when mapping this region */ + + /* non-NULL if this memory object is a kds_resource */ + struct kds_resource *kds_res; + +}; + +/* Common functions */ +static inline phys_addr_t *kbase_get_cpu_phy_pages(struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + + return reg->cpu_alloc->pages; +} + +static inline phys_addr_t *kbase_get_gpu_phy_pages(struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + + return reg->gpu_alloc->pages; +} + +static inline size_t kbase_reg_current_backed_size(struct kbase_va_region *reg) +{ + KBASE_DEBUG_ASSERT(reg); + /* if no alloc object the backed size naturally is 0 */ + if (!reg->cpu_alloc) + return 0; + + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->cpu_alloc->nents == reg->gpu_alloc->nents); + + return reg->cpu_alloc->nents; +} + +#define KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD ((size_t)(4*1024)) /* size above which vmalloc is used over kmalloc */ + +static inline struct kbase_mem_phy_alloc *kbase_alloc_create(size_t nr_pages, enum kbase_memory_type type) +{ + struct kbase_mem_phy_alloc *alloc; + const size_t alloc_size = + sizeof(*alloc) + sizeof(*alloc->pages) * nr_pages; + + /* Prevent nr_pages*sizeof + sizeof(*alloc) from wrapping around. */ + if (nr_pages > ((((size_t) -1) - sizeof(*alloc)) + / sizeof(*alloc->pages))) + return ERR_PTR(-ENOMEM); + + /* Allocate based on the size to reduce internal fragmentation of vmem */ + if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) + alloc = vzalloc(alloc_size); + else + alloc = kzalloc(alloc_size, GFP_KERNEL); + + if (!alloc) + return ERR_PTR(-ENOMEM); + + /* Store allocation method */ + if (alloc_size > KBASE_MEM_PHY_ALLOC_LARGE_THRESHOLD) + alloc->properties |= KBASE_MEM_PHY_ALLOC_LARGE; + + kref_init(&alloc->kref); + atomic_set(&alloc->gpu_mappings, 0); + alloc->nents = 0; + alloc->pages = (void *)(alloc + 1); + INIT_LIST_HEAD(&alloc->mappings); + alloc->type = type; + + return alloc; +} + +static inline int kbase_reg_prepare_native(struct kbase_va_region *reg, + struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(reg); + KBASE_DEBUG_ASSERT(!reg->cpu_alloc); + KBASE_DEBUG_ASSERT(!reg->gpu_alloc); + KBASE_DEBUG_ASSERT(reg->flags & KBASE_REG_FREE); + + reg->cpu_alloc = kbase_alloc_create(reg->nr_pages, + KBASE_MEM_TYPE_NATIVE); + if (IS_ERR(reg->cpu_alloc)) + return PTR_ERR(reg->cpu_alloc); + else if (!reg->cpu_alloc) + return -ENOMEM; + reg->cpu_alloc->imported.kctx = kctx; + if (kctx->infinite_cache_active && (reg->flags & KBASE_REG_CPU_CACHED)) { + reg->gpu_alloc = kbase_alloc_create(reg->nr_pages, + KBASE_MEM_TYPE_NATIVE); + reg->gpu_alloc->imported.kctx = kctx; + } else { + reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + } + + reg->flags &= ~KBASE_REG_FREE; + return 0; +} + +static inline int kbase_atomic_add_pages(int num_pages, atomic_t *used_pages) +{ + int new_val = atomic_add_return(num_pages, used_pages); +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_total_alloc_pages_change((long long int)new_val); +#endif + return new_val; +} + +static inline int kbase_atomic_sub_pages(int num_pages, atomic_t *used_pages) +{ + int new_val = atomic_sub_return(num_pages, used_pages); +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_total_alloc_pages_change((long long int)new_val); +#endif + return new_val; +} + +/** + * @brief Initialize low-level memory access for a kbase device + * + * Performs any low-level setup needed for a kbase device to access memory on + * the device. + * + * @param kbdev kbase device to initialize memory access for + * @return 0 on success, Linux error code on failure + */ +int kbase_mem_lowlevel_init(struct kbase_device *kbdev); + + +/** + * @brief Terminate low-level memory access for a kbase device + * + * Perform any low-level cleanup needed to clean + * after @ref kbase_mem_lowlevel_init + * + * @param kbdev kbase device to clean up for + */ +void kbase_mem_lowlevel_term(struct kbase_device *kbdev); + +/** + * @brief Initialize an OS based memory allocator. + * + * Initializes a allocator. + * Must be called before any allocation is attempted. + * \a kbase_mem_allocator_alloc and \a kbase_mem_allocator_free is used + * to allocate and free memory. + * \a kbase_mem_allocator_term must be called to clean up the allocator. + * All memory obtained via \a kbase_mem_allocator_alloc must have been + * \a kbase_mem_allocator_free before \a kbase_mem_allocator_term is called. + * + * @param allocator Allocator object to initialize + * @param max_size Maximum number of pages to keep on the freelist. + * @param kbdev The kbase device this allocator is used with + * @return 0 on success, an error code indicating what failed on + * error. + */ +int kbase_mem_allocator_init(struct kbase_mem_allocator *allocator, + unsigned int max_size, + struct kbase_device *kbdev); + +/** + * @brief Allocate memory via an OS based memory allocator. + * + * @param[in] allocator Allocator to obtain the memory from + * @param nr_pages Number of pages to allocate + * @param[out] pages Pointer to an array where the physical address of the allocated pages will be stored + * @return 0 if the pages were allocated, an error code indicating what failed on error + */ +int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages); + +/** + * @brief Free memory obtained for an OS based memory allocator. + * + * @param[in] allocator Allocator to free the memory back to + * @param nr_pages Number of pages to free + * @param[in] pages Pointer to an array holding the physical address of the paghes to free. + * @param[in] sync_back true case the memory should be synced back + */ +void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages, bool sync_back); + +/** + * @brief Terminate an OS based memory allocator. + * + * Frees all cached allocations and clean up internal state. + * All allocate pages must have been \a kbase_mem_allocator_free before + * this function is called. + * + * @param[in] allocator Allocator to terminate + */ +void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator); + + + +int kbase_region_tracker_init(struct kbase_context *kctx); +void kbase_region_tracker_term(struct kbase_context *kctx); + +struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(struct kbase_context *kctx, u64 gpu_addr); + +/** + * @brief Check that a pointer is actually a valid region. + * + * Must be called with context lock held. + */ +struct kbase_va_region *kbase_region_tracker_find_region_base_address(struct kbase_context *kctx, u64 gpu_addr); + +struct kbase_va_region *kbase_alloc_free_region(struct kbase_context *kctx, u64 start_pfn, size_t nr_pages, int zone); +void kbase_free_alloced_region(struct kbase_va_region *reg); +int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); + +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); +bool kbase_check_alloc_flags(unsigned long flags); +bool kbase_check_import_flags(unsigned long flags); +void kbase_update_region_flags(struct kbase_va_region *reg, unsigned long flags); + +void kbase_gpu_vm_lock(struct kbase_context *kctx); +void kbase_gpu_vm_unlock(struct kbase_context *kctx); + +int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size); + +int kbase_mmu_init(struct kbase_context *kctx); +void kbase_mmu_term(struct kbase_context *kctx); + +phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx); +void kbase_mmu_free_pgd(struct kbase_context *kctx); +int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, + phys_addr_t *phys, size_t nr, + unsigned long flags); +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, + phys_addr_t phys, size_t nr, + unsigned long flags); + +int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr); +int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags); + +/** + * @brief Register region and map it on the GPU. + * + * Call kbase_add_va_region() and map the region on the GPU. + */ +int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); + +/** + * @brief Remove the region from the GPU and unregister it. + * + * Must be called with context lock held. + */ +int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg); + +/** + * The caller has the following locking conditions: + * - It must hold kbase_as::transaction_mutex on kctx's address space + * - It must hold the kbasep_js_device_data::runpool_irq::lock + */ +void kbase_mmu_update(struct kbase_context *kctx); + +/** + * The caller has the following locking conditions: + * - It must hold kbase_as::transaction_mutex on kctx's address space + * - It must hold the kbasep_js_device_data::runpool_irq::lock + */ +void kbase_mmu_disable(struct kbase_context *kctx); + +/** + * kbase_mmu_disable_as() - set the MMU in unmapped mode for an address space. + * + * @kbdev: Kbase device + * @as_nr: Number of the address space for which the MMU + * should be set in unmapped mode. + * + * The caller must hold kbdev->as[as_nr].transaction_mutex. + */ +void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr); + +void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat); + +/** Dump the MMU tables to a buffer + * + * This function allocates a buffer (of @c nr_pages pages) to hold a dump of the MMU tables and fills it. If the + * buffer is too small then the return value will be NULL. + * + * The GPU vm lock must be held when calling this function. + * + * The buffer returned should be freed with @ref vfree when it is no longer required. + * + * @param[in] kctx The kbase context to dump + * @param[in] nr_pages The number of pages to allocate for the buffer. + * + * @return The address of the buffer containing the MMU dump or NULL on error (including if the @c nr_pages is too + * small) + */ +void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages); + +int kbase_sync_now(struct kbase_context *kctx, struct base_syncset *syncset); +void kbase_sync_single(struct kbase_context *kctx, phys_addr_t cpu_pa, + phys_addr_t gpu_pa, off_t offset, size_t size, + enum kbase_sync_type sync_fn); +void kbase_pre_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); +void kbase_post_job_sync(struct kbase_context *kctx, struct base_syncset *syncsets, size_t nr); + +/** + * Set attributes for imported tmem region + * + * This function sets (extends with) requested attributes for given region + * of imported external memory + * + * @param[in] kctx The kbase context which the tmem belongs to + * @param[in] gpu_addr The base address of the tmem region + * @param[in] attributes The attributes of tmem region to be set + * + * @return 0 on success. Any other value indicates failure. + */ +int kbase_tmem_set_attributes(struct kbase_context *kctx, u64 gpu_addr, u32 attributes); + +/** + * Get attributes of imported tmem region + * + * This function retrieves the attributes of imported external memory + * + * @param[in] kctx The kbase context which the tmem belongs to + * @param[in] gpu_addr The base address of the tmem region + * @param[out] attributes The actual attributes of tmem region + * + * @return 0 on success. Any other value indicates failure. + */ +int kbase_tmem_get_attributes(struct kbase_context *kctx, u64 gpu_addr, u32 *const attributes); + +/* OS specific functions */ +int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr); +int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg); +void kbase_os_mem_map_lock(struct kbase_context *kctx); +void kbase_os_mem_map_unlock(struct kbase_context *kctx); + +/** + * @brief Update the memory allocation counters for the current process + * + * OS specific call to updates the current memory allocation counters for the current process with + * the supplied delta. + * + * @param[in] kctx The kbase context + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages); + +/** + * @brief Add to the memory allocation counters for the current process + * + * OS specific call to add to the current memory allocation counters for the current process by + * the supplied amount. + * + * @param[in] kctx The kernel base context used for the allocation. + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages) +{ + kbasep_os_process_page_usage_update(kctx, pages); +} + +/** + * @brief Subtract from the memory allocation counters for the current process + * + * OS specific call to subtract from the current memory allocation counters for the current process by + * the supplied amount. + * + * @param[in] kctx The kernel base context used for the allocation. + * @param[in] pages The desired delta to apply to the memory usage counters. + */ + +static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages) +{ + kbasep_os_process_page_usage_update(kctx, 0 - pages); +} + +/** + * @brief Find the offset of the CPU mapping of a memory allocation containing + * a given address range + * + * Searches for a CPU mapping of any part of the region starting at @p gpu_addr + * that fully encloses the CPU virtual address range specified by @p uaddr and + * @p size. Returns a failure indication if only part of the address range lies + * within a CPU mapping, or the address range lies within a CPU mapping of a + * different region. + * + * @param[in,out] kctx The kernel base context used for the allocation. + * @param[in] gpu_addr GPU address of the start of the allocated region + * within which to search. + * @param[in] uaddr Start of the CPU virtual address range. + * @param[in] size Size of the CPU virtual address range (in bytes). + * @param[out] offset The offset from the start of the allocation to the + * specified CPU virtual address. + * + * @return 0 if offset was obtained successfully. Error code + * otherwise. + */ +int kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context *kctx, + u64 gpu_addr, + unsigned long uaddr, + size_t size, + u64 *offset); + +enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer); +void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); +void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom); + +/** +* @brief Allocates physical pages. +* +* Allocates \a nr_pages_requested and updates the alloc object. +* +* @param[in] alloc allocation object to add pages to +* @param[in] nr_pages_requested number of physical pages to allocate +* +* @return 0 if all pages have been successfully allocated. Error code otherwise +*/ +int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_requested); + +/** +* @brief Free physical pages. +* +* Frees \a nr_pages and updates the alloc object. +* +* @param[in] alloc allocation object to free pages from +* @param[in] nr_pages_to_free number of physical pages to free +*/ +int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free); + +static inline void kbase_set_dma_addr(struct page *p, dma_addr_t dma_addr) +{ + SetPagePrivate(p); + if (sizeof(dma_addr_t) > sizeof(p->private)) { + /* on 32-bit ARM with LPAE dma_addr_t becomes larger, but the + * private filed stays the same. So we have to be clever and + * use the fact that we only store DMA addresses of whole pages, + * so the low bits should be zero */ + KBASE_DEBUG_ASSERT(!(dma_addr & (PAGE_SIZE - 1))); + set_page_private(p, dma_addr >> PAGE_SHIFT); + } else { + set_page_private(p, dma_addr); + } +} + +static inline dma_addr_t kbase_dma_addr(struct page *p) +{ + if (sizeof(dma_addr_t) > sizeof(p->private)) + return ((dma_addr_t)page_private(p)) << PAGE_SHIFT; + + return (dma_addr_t)page_private(p); +} + +/** +* @brief Process a bus or page fault. +* +* This function will process a fault on a specific address space +* +* @param[in] kbdev The @ref kbase_device the fault happened on +* @param[in] kctx The @ref kbase_context for the faulting address space if +* one was found. +* @param[in] as The address space that has the fault +*/ +void kbase_mmu_interrupt_process(struct kbase_device *kbdev, + struct kbase_context *kctx, struct kbase_as *as); + +/** + * @brief Process a page fault. + * + * @param[in] data work_struct passed by queue_work() + */ +void page_fault_worker(struct work_struct *data); + +/** + * @brief Process a bus fault. + * + * @param[in] data work_struct passed by queue_work() + */ +void bus_fault_worker(struct work_struct *data); + +/** + * @brief Flush MMU workqueues. + * + * This function will cause any outstanding page or bus faults to be processed. + * It should be called prior to powering off the GPU. + * + * @param[in] kbdev Device pointer + */ +void kbase_flush_mmu_wqs(struct kbase_device *kbdev); + +/** + * kbase_sync_single_for_device - update physical memory and give GPU ownership + * @kbdev: Device pointer + * @handle: DMA address of region + * @size: Size of region to sync + * @dir: DMA data direction + */ + +void kbase_sync_single_for_device(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir); + +/** + * kbase_sync_single_for_cpu - update physical memory and give CPU ownership + * @kbdev: Device pointer + * @handle: DMA address of region + * @size: Size of region to sync + * @dir: DMA data direction + */ + +void kbase_sync_single_for_cpu(struct kbase_device *kbdev, dma_addr_t handle, + size_t size, enum dma_data_direction dir); + +#endif /* _KBASE_MEM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c new file mode 100644 index 00000000000..642fa44bf35 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.c @@ -0,0 +1,292 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem.c + * Base kernel memory APIs + */ +#include <mali_kbase.h> +#include <linux/dma-mapping.h> +#include <linux/highmem.h> +#include <linux/mempool.h> +#include <linux/mm.h> +#include <linux/atomic.h> +#include <linux/version.h> + +int kbase_mem_lowlevel_init(struct kbase_device *kbdev) +{ + return 0; +} + +void kbase_mem_lowlevel_term(struct kbase_device *kbdev) +{ +} + +static unsigned long kbase_mem_allocator_count(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_mem_allocator *allocator; + + allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer); + return atomic_read(&allocator->free_list_size); +} + +static unsigned long kbase_mem_allocator_scan(struct shrinker *s, + struct shrink_control *sc) +{ + struct kbase_mem_allocator *allocator; + int i; + int freed; + + allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer); + + might_sleep(); + + mutex_lock(&allocator->free_list_lock); + i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan); + freed = i; + + atomic_sub(i, &allocator->free_list_size); + + while (i--) { + struct page *p; + + BUG_ON(list_empty(&allocator->free_list_head)); + p = list_first_entry(&allocator->free_list_head, + struct page, lru); + list_del(&p->lru); + ClearPagePrivate(p); + __free_page(p); + } + mutex_unlock(&allocator->free_list_lock); + return atomic_read(&allocator->free_list_size); +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) +static int kbase_mem_allocator_shrink(struct shrinker *s, + struct shrink_control *sc) +{ + if (sc->nr_to_scan == 0) + return kbase_mem_allocator_count(s, sc); + + return kbase_mem_allocator_scan(s, sc); +} +#endif + +int kbase_mem_allocator_init(struct kbase_mem_allocator *const allocator, + unsigned int max_size, struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(NULL != allocator); + KBASE_DEBUG_ASSERT(kbdev); + + INIT_LIST_HEAD(&allocator->free_list_head); + + allocator->kbdev = kbdev; + + mutex_init(&allocator->free_list_lock); + + atomic_set(&allocator->free_list_size, 0); + + allocator->free_list_max_size = max_size; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) + allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink; +#else + allocator->free_list_reclaimer.count_objects = + kbase_mem_allocator_count; + allocator->free_list_reclaimer.scan_objects = kbase_mem_allocator_scan; +#endif + allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS; + /* Kernel versions prior to 3.1 : + * struct shrinker does not define batch */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) + allocator->free_list_reclaimer.batch = 0; +#endif + + register_shrinker(&allocator->free_list_reclaimer); + + return 0; +} +KBASE_EXPORT_TEST_API(kbase_mem_allocator_init); + +void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator) +{ + KBASE_DEBUG_ASSERT(NULL != allocator); + + unregister_shrinker(&allocator->free_list_reclaimer); + mutex_lock(&allocator->free_list_lock); + while (!list_empty(&allocator->free_list_head)) { + struct page *p; + + p = list_first_entry(&allocator->free_list_head, struct page, + lru); + list_del(&p->lru); + dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + ClearPagePrivate(p); + __free_page(p); + } + atomic_set(&allocator->free_list_size, 0); + mutex_unlock(&allocator->free_list_lock); + mutex_destroy(&allocator->free_list_lock); +} +KBASE_EXPORT_TEST_API(kbase_mem_allocator_term); + +int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages) +{ + struct page *p; + int i; + int num_from_free_list; + struct list_head from_free_list = LIST_HEAD_INIT(from_free_list); + gfp_t gfp; + + might_sleep(); + + KBASE_DEBUG_ASSERT(NULL != allocator); + + /* take from the free list first */ + mutex_lock(&allocator->free_list_lock); + num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size)); + atomic_sub(num_from_free_list, &allocator->free_list_size); + for (i = 0; i < num_from_free_list; i++) { + BUG_ON(list_empty(&allocator->free_list_head)); + p = list_first_entry(&allocator->free_list_head, struct page, lru); + list_move(&p->lru, &from_free_list); + } + mutex_unlock(&allocator->free_list_lock); + i = 0; + + /* Allocate as many pages from the pool of already allocated pages. */ + list_for_each_entry(p, &from_free_list, lru) { + pages[i] = PFN_PHYS(page_to_pfn(p)); + i++; + } + + if (i == nr_pages) + return 0; + +#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) + /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ + gfp = GFP_USER | __GFP_ZERO; +#else + gfp = GFP_HIGHUSER | __GFP_ZERO; +#endif + + if (current->flags & PF_KTHREAD) { + /* Don't trigger OOM killer from kernel threads, e.g. when + * growing memory on GPU page fault */ + gfp |= __GFP_NORETRY; + } + + /* If not all pages were sourced from the pool, request new ones. */ + for (; i < nr_pages; i++) { + dma_addr_t dma_addr; + + p = alloc_page(gfp); + if (NULL == p) + goto err_out_roll_back; + + dma_addr = dma_map_page(allocator->kbdev->dev, p, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(allocator->kbdev->dev, dma_addr)) { + __free_page(p); + goto err_out_roll_back; + } + + kbase_set_dma_addr(p, dma_addr); + pages[i] = PFN_PHYS(page_to_pfn(p)); + BUG_ON(dma_addr != pages[i]); + } + + return 0; + +err_out_roll_back: + while (i--) { + p = pfn_to_page(PFN_DOWN(pages[i])); + pages[i] = (phys_addr_t)0; + dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + ClearPagePrivate(p); + __free_page(p); + } + + return -ENOMEM; +} +KBASE_EXPORT_TEST_API(kbase_mem_allocator_alloc); + +void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages, bool sync_back) +{ + int i = 0; + int page_count = 0; + int tofree; + + LIST_HEAD(new_free_list_items); + + KBASE_DEBUG_ASSERT(NULL != allocator); + + might_sleep(); + + /* Starting by just freeing the overspill. + * As we do this outside of the lock we might spill too many pages + * or get too many on the free list, but the max_size is just a ballpark so it is ok + * providing that tofree doesn't exceed nr_pages + */ + tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size), 0); + tofree = nr_pages - MIN(tofree, nr_pages); + for (; i < tofree; i++) { + if (likely(0 != pages[i])) { + struct page *p; + + p = pfn_to_page(PFN_DOWN(pages[i])); + dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + ClearPagePrivate(p); + pages[i] = (phys_addr_t)0; + __free_page(p); + } + } + + for (; i < nr_pages; i++) { + if (likely(0 != pages[i])) { + struct page *p; + + p = pfn_to_page(PFN_DOWN(pages[i])); + pages[i] = (phys_addr_t)0; + /* Sync back the memory to ensure that future cache + * invalidations don't trample on memory. + */ + if (sync_back) + dma_sync_single_for_cpu(allocator->kbdev->dev, + kbase_dma_addr(p), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + + list_add(&p->lru, &new_free_list_items); + page_count++; + } + } + mutex_lock(&allocator->free_list_lock); + list_splice(&new_free_list_items, &allocator->free_list_head); + atomic_add(page_count, &allocator->free_list_size); + mutex_unlock(&allocator->free_list_lock); +} +KBASE_EXPORT_TEST_API(kbase_mem_allocator_free); + diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h new file mode 100644 index 00000000000..ab7b25953f2 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc.h @@ -0,0 +1,32 @@ +/* + * + * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include <linux/atomic.h> +#include <linux/mempool.h> +#include <linux/slab.h> + +/* raw page handling */ +struct kbase_mem_allocator { + struct kbase_device *kbdev; + atomic_t free_list_size; + unsigned int free_list_max_size; + struct mutex free_list_lock; + struct list_head free_list_head; + struct shrinker free_list_reclaimer; +}; diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c new file mode 100644 index 00000000000..df9761b1e2e --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_alloc_carveout.c @@ -0,0 +1,402 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_mem.c + * Base kernel memory APIs + */ +#include <mali_kbase.h> +#include <linux/dma-mapping.h> +#include <linux/highmem.h> +#include <linux/mempool.h> +#include <linux/mm.h> +#include <linux/atomic.h> +#include <linux/debugfs.h> +#include <linux/memblock.h> +#include <linux/seq_file.h> +#include <linux/version.h> + + +/* This code does not support having multiple kbase devices, or rmmod/insmod */ + +static unsigned long kbase_carveout_start_pfn = ~0UL; +static unsigned long kbase_carveout_end_pfn; +static LIST_HEAD(kbase_carveout_free_list); +static DEFINE_MUTEX(kbase_carveout_free_list_lock); +static unsigned int kbase_carveout_pages; +static atomic_t kbase_carveout_used_pages; +static atomic_t kbase_carveout_system_pages; + +static struct page *kbase_carveout_get_page(struct kbase_mem_allocator *allocator) +{ + struct page *p = NULL; + gfp_t gfp; + + mutex_lock(&kbase_carveout_free_list_lock); + if (!list_empty(&kbase_carveout_free_list)) { + p = list_first_entry(&kbase_carveout_free_list, struct page, lru); + list_del(&p->lru); + atomic_inc(&kbase_carveout_used_pages); + } + mutex_unlock(&kbase_carveout_free_list_lock); + + if (!p) { + dma_addr_t dma_addr; +#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) + /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ + gfp = GFP_USER | __GFP_ZERO; +#else + gfp = GFP_HIGHUSER | __GFP_ZERO; +#endif + + if (current->flags & PF_KTHREAD) { + /* Don't trigger OOM killer from kernel threads, e.g. + * when growing memory on GPU page fault */ + gfp |= __GFP_NORETRY; + } + + p = alloc_page(gfp); + if (!p) + goto out; + + dma_addr = dma_map_page(allocator->kbdev->dev, p, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(allocator->kbdev->dev, dma_addr)) { + __free_page(p); + p = NULL; + goto out; + } + + kbase_set_dma_addr(p, dma_addr); + BUG_ON(dma_addr != PFN_PHYS(page_to_pfn(p))); + atomic_inc(&kbase_carveout_system_pages); + } +out: + return p; +} + +static void kbase_carveout_put_page(struct page *p, + struct kbase_mem_allocator *allocator) +{ + if (page_to_pfn(p) >= kbase_carveout_start_pfn && + page_to_pfn(p) <= kbase_carveout_end_pfn) { + mutex_lock(&kbase_carveout_free_list_lock); + list_add(&p->lru, &kbase_carveout_free_list); + atomic_dec(&kbase_carveout_used_pages); + mutex_unlock(&kbase_carveout_free_list_lock); + } else { + dma_unmap_page(allocator->kbdev->dev, kbase_dma_addr(p), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + ClearPagePrivate(p); + __free_page(p); + atomic_dec(&kbase_carveout_system_pages); + } +} + +static int kbase_carveout_seq_show(struct seq_file *s, void *data) +{ + seq_printf(s, "carveout pages: %u\n", kbase_carveout_pages); + seq_printf(s, "used carveout pages: %u\n", + atomic_read(&kbase_carveout_used_pages)); + seq_printf(s, "used system pages: %u\n", + atomic_read(&kbase_carveout_system_pages)); + return 0; +} + +static int kbasep_carveout_debugfs_open(struct inode *inode, struct file *file) +{ + return single_open(file, kbase_carveout_seq_show, NULL); +} + +static const struct file_operations kbase_carveout_debugfs_fops = { + .open = kbasep_carveout_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static int kbase_carveout_init(struct device *dev) +{ + unsigned long pfn; + static int once; + + mutex_lock(&kbase_carveout_free_list_lock); + BUG_ON(once); + once = 1; + + for (pfn = kbase_carveout_start_pfn; pfn <= kbase_carveout_end_pfn; pfn++) { + struct page *p = pfn_to_page(pfn); + dma_addr_t dma_addr; + + dma_addr = dma_map_page(dev, p, 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, dma_addr)) + goto out_rollback; + + kbase_set_dma_addr(p, dma_addr); + BUG_ON(dma_addr != PFN_PHYS(page_to_pfn(p))); + + list_add_tail(&p->lru, &kbase_carveout_free_list); + } + + mutex_unlock(&kbase_carveout_free_list_lock); + + debugfs_create_file("kbase_carveout", S_IRUGO, NULL, NULL, + &kbase_carveout_debugfs_fops); + + return 0; + +out_rollback: + while (!list_empty(&kbase_carveout_free_list)) { + struct page *p; + + p = list_first_entry(&kbase_carveout_free_list, struct page, lru); + dma_unmap_page(dev, kbase_dma_addr(p), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + ClearPagePrivate(p); + list_del(&p->lru); + } + + mutex_unlock(&kbase_carveout_free_list_lock); + return -ENOMEM; +} + +int __init kbase_carveout_mem_reserve(phys_addr_t size) +{ + phys_addr_t mem; + +#if defined(CONFIG_ARM) && !defined(CONFIG_HAVE_DMA_ATTRS) \ + && LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) + /* DMA cache sync fails for HIGHMEM before 3.5 on ARM */ + mem = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ACCESSIBLE); +#else + mem = memblock_alloc_base(size, PAGE_SIZE, MEMBLOCK_ALLOC_ANYWHERE); +#endif + if (mem == 0) { + pr_warn("%s: Failed to allocate %d for kbase carveout\n", + __func__, size); + return -ENOMEM; + } + + kbase_carveout_start_pfn = PFN_DOWN(mem); + kbase_carveout_end_pfn = PFN_DOWN(mem + size - 1); + kbase_carveout_pages = kbase_carveout_end_pfn - kbase_carveout_start_pfn + 1; + + return 0; +} + +int kbase_mem_lowlevel_init(struct kbase_device *kbdev) +{ + return kbase_carveout_init(kbdev->dev); +} + +void kbase_mem_lowlevel_term(struct kbase_device *kbdev) +{ +} + +static int kbase_mem_allocator_shrink(struct shrinker *s, struct shrink_control *sc) +{ + struct kbase_mem_allocator *allocator; + int i; + int freed; + + allocator = container_of(s, struct kbase_mem_allocator, free_list_reclaimer); + + if (sc->nr_to_scan == 0) + return atomic_read(&allocator->free_list_size); + + might_sleep(); + + mutex_lock(&allocator->free_list_lock); + i = MIN(atomic_read(&allocator->free_list_size), sc->nr_to_scan); + freed = i; + + atomic_sub(i, &allocator->free_list_size); + + while (i--) { + struct page *p; + + BUG_ON(list_empty(&allocator->free_list_head)); + p = list_first_entry(&allocator->free_list_head, struct page, lru); + list_del(&p->lru); + kbase_carveout_put_page(p, allocator); + } + mutex_unlock(&allocator->free_list_lock); + return atomic_read(&allocator->free_list_size); +} + +int kbase_mem_allocator_init(struct kbase_mem_allocator * const allocator, + unsigned int max_size, struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(NULL != allocator); + KBASE_DEBUG_ASSERT(kbdev); + + INIT_LIST_HEAD(&allocator->free_list_head); + + allocator->kbdev = kbdev; + + mutex_init(&allocator->free_list_lock); + + atomic_set(&allocator->free_list_size, 0); + + allocator->free_list_max_size = max_size; + allocator->free_list_reclaimer.shrink = kbase_mem_allocator_shrink; + allocator->free_list_reclaimer.seeks = DEFAULT_SEEKS; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0) /* Kernel versions prior to 3.1 : struct shrinker does not define batch */ + allocator->free_list_reclaimer.batch = 0; +#endif + + register_shrinker(&allocator->free_list_reclaimer); + + return 0; +} + +void kbase_mem_allocator_term(struct kbase_mem_allocator *allocator) +{ + KBASE_DEBUG_ASSERT(NULL != allocator); + + unregister_shrinker(&allocator->free_list_reclaimer); + + while (!list_empty(&allocator->free_list_head)) { + struct page *p; + + p = list_first_entry(&allocator->free_list_head, struct page, + lru); + list_del(&p->lru); + + kbase_carveout_put_page(p, allocator); + } + mutex_destroy(&allocator->free_list_lock); +} + + +int kbase_mem_allocator_alloc(struct kbase_mem_allocator *allocator, size_t nr_pages, phys_addr_t *pages) +{ + struct page *p; + int i; + int num_from_free_list; + struct list_head from_free_list = LIST_HEAD_INIT(from_free_list); + + might_sleep(); + + KBASE_DEBUG_ASSERT(NULL != allocator); + + /* take from the free list first */ + mutex_lock(&allocator->free_list_lock); + num_from_free_list = MIN(nr_pages, atomic_read(&allocator->free_list_size)); + atomic_sub(num_from_free_list, &allocator->free_list_size); + for (i = 0; i < num_from_free_list; i++) { + BUG_ON(list_empty(&allocator->free_list_head)); + p = list_first_entry(&allocator->free_list_head, struct page, lru); + list_move(&p->lru, &from_free_list); + } + mutex_unlock(&allocator->free_list_lock); + i = 0; + + /* Allocate as many pages from the pool of already allocated pages. */ + list_for_each_entry(p, &from_free_list, lru) { + pages[i] = PFN_PHYS(page_to_pfn(p)); + i++; + } + + if (i == nr_pages) + return 0; + + /* If not all pages were sourced from the pool, request new ones. */ + for (; i < nr_pages; i++) { + p = kbase_carveout_get_page(allocator); + if (NULL == p) + goto err_out_roll_back; + + kbase_sync_single_for_device(allocator->kbdev, + kbase_dma_addr(p), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + + pages[i] = PFN_PHYS(page_to_pfn(p)); + } + + return 0; + +err_out_roll_back: + while (i--) { + struct page *p; + + p = pfn_to_page(PFN_DOWN(pages[i])); + pages[i] = (phys_addr_t)0; + kbase_carveout_put_page(p, allocator); + } + + return -ENOMEM; +} + +void kbase_mem_allocator_free(struct kbase_mem_allocator *allocator, u32 nr_pages, phys_addr_t *pages, bool sync_back) +{ + int i = 0; + int page_count = 0; + int tofree; + + LIST_HEAD(new_free_list_items); + + KBASE_DEBUG_ASSERT(NULL != allocator); + + might_sleep(); + + /* Starting by just freeing the overspill. + * As we do this outside of the lock we might spill too many pages + * or get too many on the free list, but the max_size is just a ballpark so it is ok + * providing that tofree doesn't exceed nr_pages + */ + tofree = MAX((int)allocator->free_list_max_size - atomic_read(&allocator->free_list_size), 0); + tofree = nr_pages - MIN(tofree, nr_pages); + for (; i < tofree; i++) { + if (likely(0 != pages[i])) { + struct page *p; + + p = pfn_to_page(PFN_DOWN(pages[i])); + pages[i] = (phys_addr_t)0; + kbase_carveout_put_page(p, allocator); + } + } + + for (; i < nr_pages; i++) { + if (likely(0 != pages[i])) { + struct page *p; + + p = pfn_to_page(PFN_DOWN(pages[i])); + pages[i] = (phys_addr_t)0; + /* Sync back the memory to ensure that future cache + * invalidations don't trample on memory. + */ + if (sync_back) + kbase_sync_single_for_cpu(allocator->kbdev, + kbase_dma_addr(p), + PAGE_SIZE, + DMA_BIDIRECTIONAL); + list_add(&p->lru, &new_free_list_items); + page_count++; + } + } + mutex_lock(&allocator->free_list_lock); + list_splice(&new_free_list_items, &allocator->free_list_head); + atomic_add(page_count, &allocator->free_list_size); + mutex_unlock(&allocator->free_list_lock); +} +KBASE_EXPORT_TEST_API(kbase_mem_allocator_free); + diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c new file mode 100644 index 00000000000..1ddfe5c9737 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.c @@ -0,0 +1,1909 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem_linux.c + * Base kernel memory APIs, Linux implementation. + */ + +#include <linux/compat.h> +#include <linux/kernel.h> +#include <linux/bug.h> +#include <linux/mm.h> +#include <linux/fs.h> +#include <linux/version.h> +#include <linux/dma-mapping.h> +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + #include <linux/dma-attrs.h> +#endif +#ifdef CONFIG_DMA_SHARED_BUFFER +#include <linux/dma-buf.h> +#endif /* defined(CONFIG_DMA_SHARED_BUFFER) */ + +#include <mali_kbase.h> +#include <mali_kbase_mem_linux.h> +#include <mali_kbase_config_defaults.h> +#include <mali_kbase_hwaccess_time.h> + +static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma); +static const struct vm_operations_struct kbase_vm_ops; + +struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment) +{ + int zone; + int gpu_pc_bits; + int cpu_va_bits; + struct kbase_va_region *reg; + struct device *dev; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(flags); + KBASE_DEBUG_ASSERT(gpu_va); + KBASE_DEBUG_ASSERT(va_alignment); + + dev = kctx->kbdev->dev; + *va_alignment = 0; /* no alignment by default */ + *gpu_va = 0; /* return 0 on failure */ + + gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + cpu_va_bits = BITS_PER_LONG; + + if (0 == va_pages) { + dev_warn(dev, "kbase_mem_alloc called with 0 va_pages!"); + goto bad_size; + } + + if (va_pages > (U64_MAX / PAGE_SIZE)) + /* 64-bit address range is the max */ + goto bad_size; + +#if defined(CONFIG_64BIT) + if (kctx->is_compat) + cpu_va_bits = 32; + else + /* force SAME_VA if a 64-bit client */ + *flags |= BASE_MEM_SAME_VA; +#endif + + if (!kbase_check_alloc_flags(*flags)) { + dev_warn(dev, + "kbase_mem_alloc called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + + if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 && + kctx->kbdev->system_coherency != COHERENCY_ACE) { + dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable"); + goto bad_flags; + } + if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 && + kctx->kbdev->system_coherency != COHERENCY_ACE) { + /* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */ + *flags &= ~BASE_MEM_COHERENT_SYSTEM; + } + + /* Limit GPU executable allocs to GPU PC size */ + if ((*flags & BASE_MEM_PROT_GPU_EX) && + (va_pages > (1ULL << gpu_pc_bits >> PAGE_SHIFT))) + goto bad_ex_size; + + /* find out which VA zone to use */ + if (*flags & BASE_MEM_SAME_VA) + zone = KBASE_REG_ZONE_SAME_VA; + else if (*flags & BASE_MEM_PROT_GPU_EX) + zone = KBASE_REG_ZONE_EXEC; + else + zone = KBASE_REG_ZONE_CUSTOM_VA; + + reg = kbase_alloc_free_region(kctx, 0, va_pages, zone); + if (!reg) { + dev_err(dev, "Failed to allocate free region"); + goto no_region; + } + + kbase_update_region_flags(reg, *flags); + + if (kbase_reg_prepare_native(reg, kctx) != 0) { + dev_err(dev, "Failed to prepare region"); + goto prepare_failed; + } + + if (*flags & BASE_MEM_GROW_ON_GPF) + reg->extent = extent; + else + reg->extent = 0; + + if (kbase_alloc_phy_pages(reg, va_pages, commit_pages)) { + dev_warn(dev, "Failed to allocate %lld pages (va_pages=%lld)", + (unsigned long long)commit_pages, + (unsigned long long)va_pages); + goto no_mem; + } + + kbase_gpu_vm_lock(kctx); + + /* mmap needed to setup VA? */ + if (*flags & BASE_MEM_SAME_VA) { + /* Bind to a cookie */ + if (!kctx->cookies) { + dev_err(dev, "No cookies available for allocation!"); + goto no_cookie; + } + /* return a cookie */ + *gpu_va = __ffs(kctx->cookies); + kctx->cookies &= ~(1UL << *gpu_va); + BUG_ON(kctx->pending_regions[*gpu_va]); + kctx->pending_regions[*gpu_va] = reg; + + /* relocate to correct base */ + *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); + *gpu_va <<= PAGE_SHIFT; + + /* See if we must align memory due to GPU PC bits vs CPU VA */ + if ((*flags & BASE_MEM_PROT_GPU_EX) && + (cpu_va_bits > gpu_pc_bits)) { + *va_alignment = gpu_pc_bits; + reg->flags |= KBASE_REG_ALIGNED; + } + } else /* we control the VA */ { + if (kbase_gpu_mmap(kctx, reg, 0, va_pages, 1) != 0) { + dev_warn(dev, "Failed to map memory on GPU"); + goto no_mmap; + } + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + kbase_gpu_vm_unlock(kctx); + return reg; + +no_mmap: +no_cookie: + kbase_gpu_vm_unlock(kctx); +no_mem: + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); +prepare_failed: + kfree(reg); +no_region: +bad_ex_size: +bad_flags: +bad_size: + return NULL; +} + +int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 * const out) +{ + struct kbase_va_region *reg; + int ret = -EINVAL; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(out); + + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE)) + goto out_unlock; + + switch (query) { + case KBASE_MEM_QUERY_COMMIT_SIZE: + if (reg->cpu_alloc->type != KBASE_MEM_TYPE_ALIAS) { + *out = kbase_reg_current_backed_size(reg); + } else { + size_t i; + struct kbase_aliased *aliased; + *out = 0; + aliased = reg->cpu_alloc->imported.alias.aliased; + for (i = 0; i < reg->cpu_alloc->imported.alias.nents; i++) + *out += aliased[i].length; + } + break; + case KBASE_MEM_QUERY_VA_SIZE: + *out = reg->nr_pages; + break; + case KBASE_MEM_QUERY_FLAGS: + { + *out = 0; + if (KBASE_REG_CPU_WR & reg->flags) + *out |= BASE_MEM_PROT_CPU_WR; + if (KBASE_REG_CPU_RD & reg->flags) + *out |= BASE_MEM_PROT_CPU_RD; + if (KBASE_REG_CPU_CACHED & reg->flags) + *out |= BASE_MEM_CACHED_CPU; + if (KBASE_REG_GPU_WR & reg->flags) + *out |= BASE_MEM_PROT_GPU_WR; + if (KBASE_REG_GPU_RD & reg->flags) + *out |= BASE_MEM_PROT_GPU_RD; + if (!(KBASE_REG_GPU_NX & reg->flags)) + *out |= BASE_MEM_PROT_GPU_EX; + if (KBASE_REG_SHARE_BOTH & reg->flags) + *out |= BASE_MEM_COHERENT_SYSTEM; + if (KBASE_REG_SHARE_IN & reg->flags) + *out |= BASE_MEM_COHERENT_LOCAL; + break; + } + default: + *out = 0; + goto out_unlock; + } + + ret = 0; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + return ret; +} + +int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask) +{ + struct kbase_va_region *reg; + int ret = -EINVAL; + unsigned int real_flags = 0; + unsigned int prev_flags = 0; + + KBASE_DEBUG_ASSERT(kctx); + + if (!gpu_addr) + return -EINVAL; + + /* nuke other bits */ + flags &= mask; + + /* check for only supported flags */ + if (flags & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + goto out; + + /* mask covers bits we don't support? */ + if (mask & ~(BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL)) + goto out; + + /* convert flags */ + if (BASE_MEM_COHERENT_SYSTEM & flags) + real_flags |= KBASE_REG_SHARE_BOTH; + else if (BASE_MEM_COHERENT_LOCAL & flags) + real_flags |= KBASE_REG_SHARE_IN; + + /* now we can lock down the context, and find the region */ + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE)) + goto out_unlock; + + /* limit to imported memory */ + if ((reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMP) && + (reg->gpu_alloc->type != KBASE_MEM_TYPE_IMPORTED_UMM)) + goto out_unlock; + + /* no change? */ + if (real_flags == (reg->flags & (KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH))) { + ret = 0; + goto out_unlock; + } + + /* save for roll back */ + prev_flags = reg->flags; + reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH); + reg->flags |= real_flags; + + /* Currently supporting only imported memory */ + switch (reg->gpu_alloc->type) { +#ifdef CONFIG_UMP + case KBASE_MEM_TYPE_IMPORTED_UMP: + ret = kbase_mmu_update_pages(kctx, reg->start_pfn, kbase_get_cpu_phy_pages(reg), reg->gpu_alloc->nents, reg->flags); + break; +#endif +#ifdef CONFIG_DMA_SHARED_BUFFER + case KBASE_MEM_TYPE_IMPORTED_UMM: + /* Future use will use the new flags, existing mapping will NOT be updated + * as memory should not be in use by the GPU when updating the flags. + */ + ret = 0; + WARN_ON(reg->gpu_alloc->imported.umm.current_mapping_usage_count); + break; +#endif + default: + break; + } + + /* roll back on error, i.e. not UMP */ + if (ret) + reg->flags = prev_flags; + +out_unlock: + kbase_gpu_vm_unlock(kctx); +out: + return ret; +} + +#define KBASE_MEM_IMPORT_HAVE_PAGES (1UL << BASE_MEM_FLAGS_NR_BITS) + +#ifdef CONFIG_UMP +static struct kbase_va_region *kbase_mem_from_ump(struct kbase_context *kctx, ump_secure_id id, u64 *va_pages, u64 *flags) +{ + struct kbase_va_region *reg; + ump_dd_handle umph; + u64 block_count; + const ump_dd_physical_block_64 *block_array; + u64 i, j; + int page = 0; + ump_alloc_flags ump_flags; + ump_alloc_flags cpu_flags; + ump_alloc_flags gpu_flags; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(va_pages); + KBASE_DEBUG_ASSERT(flags); + + if (*flags & BASE_MEM_SECURE) + goto bad_flags; + + umph = ump_dd_from_secure_id(id); + if (UMP_DD_INVALID_MEMORY_HANDLE == umph) + goto bad_id; + + ump_flags = ump_dd_allocation_flags_get(umph); + cpu_flags = (ump_flags >> UMP_DEVICE_CPU_SHIFT) & UMP_DEVICE_MASK; + gpu_flags = (ump_flags >> DEFAULT_UMP_GPU_DEVICE_SHIFT) & + UMP_DEVICE_MASK; + + *va_pages = ump_dd_size_get_64(umph); + *va_pages >>= PAGE_SHIFT; + + if (!*va_pages) + goto bad_size; + + if (*va_pages > (U64_MAX / PAGE_SIZE)) + /* 64-bit address range is the max */ + goto bad_size; + + if (*flags & BASE_MEM_SAME_VA) + reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); + else + reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); + + if (!reg) + goto no_region; + + /* we've got pages to map now, and support SAME_VA */ + *flags |= KBASE_MEM_IMPORT_HAVE_PAGES; + + reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMP); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc_obj; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + reg->gpu_alloc->imported.ump_handle = umph; + + reg->flags &= ~KBASE_REG_FREE; + reg->flags |= KBASE_REG_GPU_NX; /* UMP is always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* UMP cannot be grown */ + + /* Override import flags based on UMP flags */ + *flags &= ~(BASE_MEM_CACHED_CPU); + *flags &= ~(BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR); + *flags &= ~(BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR); + + if ((cpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) == + (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) { + reg->flags |= KBASE_REG_CPU_CACHED; + *flags |= BASE_MEM_CACHED_CPU; + } + + if (cpu_flags & UMP_PROT_CPU_WR) { + reg->flags |= KBASE_REG_CPU_WR; + *flags |= BASE_MEM_PROT_CPU_WR; + } + + if (cpu_flags & UMP_PROT_CPU_RD) { + reg->flags |= KBASE_REG_CPU_RD; + *flags |= BASE_MEM_PROT_CPU_RD; + } + + if ((gpu_flags & (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) == + (UMP_HINT_DEVICE_RD | UMP_HINT_DEVICE_WR)) + reg->flags |= KBASE_REG_GPU_CACHED; + + if (gpu_flags & UMP_PROT_DEVICE_WR) { + reg->flags |= KBASE_REG_GPU_WR; + *flags |= BASE_MEM_PROT_GPU_WR; + } + + if (gpu_flags & UMP_PROT_DEVICE_RD) { + reg->flags |= KBASE_REG_GPU_RD; + *flags |= BASE_MEM_PROT_GPU_RD; + } + + /* ump phys block query */ + ump_dd_phys_blocks_get_64(umph, &block_count, &block_array); + + for (i = 0; i < block_count; i++) { + for (j = 0; j < (block_array[i].size >> PAGE_SHIFT); j++) { + reg->gpu_alloc->pages[page] = block_array[i].addr + (j << PAGE_SHIFT); + page++; + } + } + reg->gpu_alloc->nents = *va_pages; + reg->extent = 0; + + return reg; + +no_alloc_obj: + kfree(reg); +no_region: +bad_size: + ump_dd_release(umph); +bad_id: +bad_flags: + return NULL; +} +#endif /* CONFIG_UMP */ + +#ifdef CONFIG_DMA_SHARED_BUFFER +static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, int fd, u64 *va_pages, u64 *flags) +{ + struct kbase_va_region *reg; + struct dma_buf *dma_buf; + struct dma_buf_attachment *dma_attachment; + + dma_buf = dma_buf_get(fd); + if (IS_ERR_OR_NULL(dma_buf)) + goto no_buf; + + dma_attachment = dma_buf_attach(dma_buf, kctx->kbdev->dev); + if (!dma_attachment) + goto no_attachment; + + *va_pages = PAGE_ALIGN(dma_buf->size) >> PAGE_SHIFT; + if (!*va_pages) + goto bad_size; + + if (*va_pages > (U64_MAX / PAGE_SIZE)) + /* 64-bit address range is the max */ + goto bad_size; + + /* ignore SAME_VA */ + *flags &= ~BASE_MEM_SAME_VA; + +#ifdef CONFIG_64BIT + if (!kctx->is_compat) { + /* 64-bit tasks must MMAP anyway, but not expose this address to clients */ + *flags |= BASE_MEM_NEED_MMAP; + reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_SAME_VA); + } else { +#else + if (1) { +#endif + reg = kbase_alloc_free_region(kctx, 0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA); + } + + if (!reg) + goto no_region; + + reg->gpu_alloc = kbase_alloc_create(*va_pages, KBASE_MEM_TYPE_IMPORTED_UMM); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc_obj; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + /* No pages to map yet */ + reg->gpu_alloc->nents = 0; + + reg->flags &= ~KBASE_REG_FREE; + reg->flags |= KBASE_REG_GPU_NX; /* UMM is always No eXecute */ + reg->flags &= ~KBASE_REG_GROWABLE; /* UMM cannot be grown */ + reg->flags |= KBASE_REG_GPU_CACHED; + + if (*flags & BASE_MEM_PROT_CPU_WR) + reg->flags |= KBASE_REG_CPU_WR; + + if (*flags & BASE_MEM_PROT_CPU_RD) + reg->flags |= KBASE_REG_CPU_RD; + + if (*flags & BASE_MEM_PROT_GPU_WR) + reg->flags |= KBASE_REG_GPU_WR; + + if (*flags & BASE_MEM_PROT_GPU_RD) + reg->flags |= KBASE_REG_GPU_RD; + + if (*flags & BASE_MEM_SECURE) + reg->flags |= KBASE_REG_SECURE; + + /* no read or write permission given on import, only on run do we give the right permissions */ + + reg->gpu_alloc->type = BASE_TMEM_IMPORT_TYPE_UMM; + reg->gpu_alloc->imported.umm.sgt = NULL; + reg->gpu_alloc->imported.umm.dma_buf = dma_buf; + reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment; + reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0; + reg->extent = 0; + + return reg; + +no_alloc_obj: + kfree(reg); +no_region: +bad_size: + dma_buf_detach(dma_buf, dma_attachment); +no_attachment: + dma_buf_put(dma_buf); +no_buf: + return NULL; +} +#endif /* CONFIG_DMA_SHARED_BUFFER */ + +u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, + u64 nents, struct base_mem_aliasing_info *ai, + u64 *num_pages) +{ + struct kbase_va_region *reg; + u64 gpu_va; + size_t i; + bool coherent; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(flags); + KBASE_DEBUG_ASSERT(ai); + KBASE_DEBUG_ASSERT(num_pages); + + /* mask to only allowed flags */ + *flags &= (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | + BASE_MEM_COHERENT_SYSTEM | BASE_MEM_COHERENT_LOCAL | + BASE_MEM_COHERENT_SYSTEM_REQUIRED); + + if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) { + dev_warn(kctx->kbdev->dev, + "kbase_mem_alias called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + coherent = (*flags & BASE_MEM_COHERENT_SYSTEM) != 0 || + (*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0; + + if (!stride) + goto bad_stride; + + if (!nents) + goto bad_nents; + + if ((nents * stride) > (U64_MAX / PAGE_SIZE)) + /* 64-bit address range is the max */ + goto bad_size; + + /* calculate the number of pages this alias will cover */ + *num_pages = nents * stride; + +#ifdef CONFIG_64BIT + if (!kctx->is_compat) { + /* 64-bit tasks must MMAP anyway, but not expose this address to + * clients */ + *flags |= BASE_MEM_NEED_MMAP; + reg = kbase_alloc_free_region(kctx, 0, *num_pages, + KBASE_REG_ZONE_SAME_VA); + } else { +#else + if (1) { +#endif + reg = kbase_alloc_free_region(kctx, 0, *num_pages, + KBASE_REG_ZONE_CUSTOM_VA); + } + + if (!reg) + goto no_reg; + + /* zero-sized page array, as we don't need one/can support one */ + reg->gpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_ALIAS); + if (IS_ERR_OR_NULL(reg->gpu_alloc)) + goto no_alloc_obj; + + reg->cpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + + kbase_update_region_flags(reg, *flags); + + reg->gpu_alloc->imported.alias.nents = nents; + reg->gpu_alloc->imported.alias.stride = stride; + reg->gpu_alloc->imported.alias.aliased = vzalloc(sizeof(*reg->gpu_alloc->imported.alias.aliased) * nents); + if (!reg->gpu_alloc->imported.alias.aliased) + goto no_aliased_array; + + kbase_gpu_vm_lock(kctx); + + /* validate and add src handles */ + for (i = 0; i < nents; i++) { + if (ai[i].handle < BASE_MEM_FIRST_FREE_ADDRESS) { + if (ai[i].handle != BASE_MEM_WRITE_ALLOC_PAGES_HANDLE) + goto bad_handle; /* unsupported magic handle */ + if (!ai[i].length) + goto bad_handle; /* must be > 0 */ + if (ai[i].length > stride) + goto bad_handle; /* can't be larger than the + stride */ + reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; + } else { + struct kbase_va_region *aliasing_reg; + struct kbase_mem_phy_alloc *alloc; + + aliasing_reg = kbase_region_tracker_find_region_base_address(kctx, (ai[i].handle >> PAGE_SHIFT) << PAGE_SHIFT); + + /* validate found region */ + if (!aliasing_reg) + goto bad_handle; /* Not found */ + if (aliasing_reg->flags & KBASE_REG_FREE) + goto bad_handle; /* Free region */ + if (!aliasing_reg->gpu_alloc) + goto bad_handle; /* No alloc */ + if (aliasing_reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) + goto bad_handle; /* Not a native alloc */ + if (coherent != ((aliasing_reg->flags & KBASE_REG_SHARE_BOTH) != 0)) + goto bad_handle; + /* Non-coherent memory cannot alias + coherent memory, and vice versa.*/ + + /* check size against stride */ + if (!ai[i].length) + goto bad_handle; /* must be > 0 */ + if (ai[i].length > stride) + goto bad_handle; /* can't be larger than the + stride */ + + alloc = aliasing_reg->gpu_alloc; + + /* check against the alloc's size */ + if (ai[i].offset > alloc->nents) + goto bad_handle; /* beyond end */ + if (ai[i].offset + ai[i].length > alloc->nents) + goto bad_handle; /* beyond end */ + + reg->gpu_alloc->imported.alias.aliased[i].alloc = kbase_mem_phy_alloc_get(alloc); + reg->gpu_alloc->imported.alias.aliased[i].length = ai[i].length; + reg->gpu_alloc->imported.alias.aliased[i].offset = ai[i].offset; + } + } + +#ifdef CONFIG_64BIT + if (!kctx->is_compat) { + /* Bind to a cookie */ + if (!kctx->cookies) { + dev_err(kctx->kbdev->dev, "No cookies available for allocation!"); + goto no_cookie; + } + /* return a cookie */ + gpu_va = __ffs(kctx->cookies); + kctx->cookies &= ~(1UL << gpu_va); + BUG_ON(kctx->pending_regions[gpu_va]); + kctx->pending_regions[gpu_va] = reg; + + /* relocate to correct base */ + gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); + gpu_va <<= PAGE_SHIFT; + } else /* we control the VA */ { +#else + if (1) { +#endif + if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1) != 0) { + dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU"); + goto no_mmap; + } + /* return real GPU VA */ + gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + reg->flags &= ~KBASE_REG_FREE; + reg->flags &= ~KBASE_REG_GROWABLE; + + kbase_gpu_vm_unlock(kctx); + + return gpu_va; + +#ifdef CONFIG_64BIT +no_cookie: +#endif +no_mmap: +bad_handle: + kbase_gpu_vm_unlock(kctx); +no_aliased_array: + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); +no_alloc_obj: + kfree(reg); +no_reg: +bad_size: +bad_nents: +bad_stride: +bad_flags: + return 0; +} + +int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags) +{ + struct kbase_va_region *reg; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(gpu_va); + KBASE_DEBUG_ASSERT(va_pages); + KBASE_DEBUG_ASSERT(flags); + +#ifdef CONFIG_64BIT + if (!kctx->is_compat) + *flags |= BASE_MEM_SAME_VA; +#endif + + if (!kbase_check_import_flags(*flags)) { + dev_warn(kctx->kbdev->dev, + "kbase_mem_import called with bad flags (%llx)", + (unsigned long long)*flags); + goto bad_flags; + } + + switch (type) { +#ifdef CONFIG_UMP + case BASE_MEM_IMPORT_TYPE_UMP: + reg = kbase_mem_from_ump(kctx, (ump_secure_id)handle, va_pages, flags); + break; +#endif /* CONFIG_UMP */ +#ifdef CONFIG_DMA_SHARED_BUFFER + case BASE_MEM_IMPORT_TYPE_UMM: + reg = kbase_mem_from_umm(kctx, handle, va_pages, flags); + break; +#endif /* CONFIG_DMA_SHARED_BUFFER */ + default: + reg = NULL; + break; + } + + if (!reg) + goto no_reg; + + kbase_gpu_vm_lock(kctx); + + /* mmap needed to setup VA? */ + if (*flags & (BASE_MEM_SAME_VA | BASE_MEM_NEED_MMAP)) { + /* Bind to a cookie */ + if (!kctx->cookies) + goto no_cookie; + /* return a cookie */ + *gpu_va = __ffs(kctx->cookies); + kctx->cookies &= ~(1UL << *gpu_va); + BUG_ON(kctx->pending_regions[*gpu_va]); + kctx->pending_regions[*gpu_va] = reg; + + /* relocate to correct base */ + *gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE); + *gpu_va <<= PAGE_SHIFT; + + } else if (*flags & KBASE_MEM_IMPORT_HAVE_PAGES) { + /* we control the VA, mmap now to the GPU */ + if (kbase_gpu_mmap(kctx, reg, 0, *va_pages, 1) != 0) + goto no_gpu_va; + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } else { + /* we control the VA, but nothing to mmap yet */ + if (kbase_add_va_region(kctx, reg, 0, *va_pages, 1) != 0) + goto no_gpu_va; + /* return real GPU VA */ + *gpu_va = reg->start_pfn << PAGE_SHIFT; + } + + /* clear out private flags */ + *flags &= ((1UL << BASE_MEM_FLAGS_NR_BITS) - 1); + + kbase_gpu_vm_unlock(kctx); + + return 0; + +no_gpu_va: +no_cookie: + kbase_gpu_vm_unlock(kctx); + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); + kfree(reg); +no_reg: +bad_flags: + *gpu_va = 0; + *va_pages = 0; + *flags = 0; + return -ENOMEM; +} + + +static int zap_range_nolock(struct mm_struct *mm, + const struct vm_operations_struct *vm_ops, + unsigned long start, unsigned long end) +{ + struct vm_area_struct *vma; + int err = -EINVAL; /* in case end < start */ + + while (start < end) { + unsigned long local_end; + + vma = find_vma_intersection(mm, start, end); + if (!vma) + break; + + /* is it ours? */ + if (vma->vm_ops != vm_ops) + goto try_next; + + local_end = vma->vm_end; + + if (end < local_end) + local_end = end; + + err = zap_vma_ptes(vma, start, local_end - start); + if (unlikely(err)) + break; + +try_next: + /* go to next vma, if any */ + start = vma->vm_end; + } + + return err; +} + +int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason) +{ + u64 old_pages; + u64 delta; + int res = -EINVAL; + struct kbase_va_region *reg; + phys_addr_t *phy_pages; + + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(failure_reason); + KBASE_DEBUG_ASSERT(gpu_addr != 0); + + down_read(¤t->mm->mmap_sem); + kbase_gpu_vm_lock(kctx); + + /* Validate the region */ + reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE)) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS; + goto out_unlock; + } + + KBASE_DEBUG_ASSERT(reg->cpu_alloc); + KBASE_DEBUG_ASSERT(reg->gpu_alloc); + + if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; + goto out_unlock; + } + + if (0 == (reg->flags & KBASE_REG_GROWABLE)) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; + goto out_unlock; + } + + if (new_pages > reg->nr_pages) { + /* Would overflow the VA region */ + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_INVALID_ARGUMENTS; + goto out_unlock; + } + + /* can't be mapped more than once on the GPU */ + if (atomic_read(®->gpu_alloc->gpu_mappings) > 1) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_NOT_GROWABLE; + goto out_unlock; + } + + if (new_pages == reg->gpu_alloc->nents) { + /* no change */ + res = 0; + goto out_unlock; + } + + phy_pages = kbase_get_gpu_phy_pages(reg); + old_pages = kbase_reg_current_backed_size(reg); + + if (new_pages > old_pages) { + /* growing */ + int err; + + delta = new_pages - old_pages; + /* Allocate some more pages */ + if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, delta) != 0) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; + goto out_unlock; + } + if (reg->cpu_alloc != reg->gpu_alloc) { + if (kbase_alloc_phy_pages_helper( + reg->gpu_alloc, delta) != 0) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; + kbase_free_phy_pages_helper(reg->cpu_alloc, + delta); + goto out_unlock; + } + } + err = kbase_mmu_insert_pages(kctx, reg->start_pfn + old_pages, + phy_pages + old_pages, delta, reg->flags); + if (err) { + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); + if (reg->cpu_alloc != reg->gpu_alloc) + kbase_free_phy_pages_helper(reg->gpu_alloc, + delta); + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; + goto out_unlock; + } + } else { + /* shrinking */ + struct kbase_cpu_mapping *mapping; + int err; + + /* first, unmap from any mappings affected */ + list_for_each_entry(mapping, ®->cpu_alloc->mappings, mappings_list) { + unsigned long mapping_size = (mapping->vm_end - mapping->vm_start) >> PAGE_SHIFT; + + /* is this mapping affected ?*/ + if ((mapping->page_off + mapping_size) > new_pages) { + unsigned long first_bad = 0; + int zap_res; + + if (new_pages > mapping->page_off) + first_bad = new_pages - mapping->page_off; + + zap_res = zap_range_nolock(current->mm, + &kbase_vm_ops, + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end); + WARN(zap_res, + "Failed to zap VA range (0x%lx - 0x%lx);\n", + mapping->vm_start + + (first_bad << PAGE_SHIFT), + mapping->vm_end + ); + } + } + + /* Free some pages */ + delta = old_pages - new_pages; + err = kbase_mmu_teardown_pages(kctx, reg->start_pfn + new_pages, + delta); + if (err) { + *failure_reason = BASE_BACKING_THRESHOLD_ERROR_OOM; + goto out_unlock; + } +#ifndef CONFIG_MALI_NO_MALI + if (kbase_hw_has_issue(kctx->kbdev, BASE_HW_ISSUE_6367)) { + /* Wait for GPU to flush write buffer before freeing physical pages */ + kbase_wait_write_flush(kctx); + } +#endif + kbase_free_phy_pages_helper(reg->cpu_alloc, delta); + if (reg->cpu_alloc != reg->gpu_alloc) + kbase_free_phy_pages_helper(reg->gpu_alloc, delta); + } + + res = 0; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + up_read(¤t->mm->mmap_sem); + + return res; +} + +static void kbase_cpu_vm_open(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + /* non-atomic as we're under Linux' mm lock */ + map->count++; +} + +static void kbase_cpu_vm_close(struct vm_area_struct *vma) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + + /* non-atomic as we're under Linux' mm lock */ + if (--map->count) + return; + + KBASE_DEBUG_ASSERT(map->kctx); + KBASE_DEBUG_ASSERT(map->alloc); + + kbase_gpu_vm_lock(map->kctx); + + if (map->region) { + KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == + KBASE_REG_ZONE_SAME_VA); + /* Avoid freeing memory on the process death which results in + * GPU Page Fault. Memory will be freed in kbase_destroy_context + */ + if (!(current->flags & PF_EXITING)) + kbase_mem_free_region(map->kctx, map->region); + } + + list_del(&map->mappings_list); + + kbase_gpu_vm_unlock(map->kctx); + + kbase_mem_phy_alloc_put(map->alloc); + kfree(map); +} + +KBASE_EXPORT_TEST_API(kbase_cpu_vm_close); + + +static int kbase_cpu_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct kbase_cpu_mapping *map = vma->vm_private_data; + pgoff_t rel_pgoff; + size_t i; + + KBASE_DEBUG_ASSERT(map); + KBASE_DEBUG_ASSERT(map->count > 0); + KBASE_DEBUG_ASSERT(map->kctx); + KBASE_DEBUG_ASSERT(map->alloc); + + /* we don't use vmf->pgoff as it's affected by our mmap with + * offset being a GPU VA or a cookie */ + rel_pgoff = ((unsigned long)vmf->virtual_address - map->vm_start) + >> PAGE_SHIFT; + + kbase_gpu_vm_lock(map->kctx); + if (map->page_off + rel_pgoff >= map->alloc->nents) + goto locked_bad_fault; + + /* insert all valid pages from the fault location */ + for (i = rel_pgoff; + i < MIN((vma->vm_end - vma->vm_start) >> PAGE_SHIFT, + map->alloc->nents - map->page_off); i++) { + int ret = vm_insert_pfn(vma, map->vm_start + (i << PAGE_SHIFT), + PFN_DOWN(map->alloc->pages[map->page_off + i])); + if (ret < 0 && ret != -EBUSY) + goto locked_bad_fault; + } + + kbase_gpu_vm_unlock(map->kctx); + /* we resolved it, nothing for VM to do */ + return VM_FAULT_NOPAGE; + +locked_bad_fault: + kbase_gpu_vm_unlock(map->kctx); + send_sig(SIGSEGV, current, 1); + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct kbase_vm_ops = { + .open = kbase_cpu_vm_open, + .close = kbase_cpu_vm_close, + .fault = kbase_cpu_vm_fault +}; + +static int kbase_cpu_mmap(struct kbase_va_region *reg, struct vm_area_struct *vma, void *kaddr, size_t nr_pages, unsigned long aligned_offset, int free_on_close) +{ + struct kbase_cpu_mapping *map; + u64 start_off = vma->vm_pgoff - reg->start_pfn; + phys_addr_t *page_array; + int err = 0; + int i; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + + if (!map) { + WARN_ON(1); + err = -ENOMEM; + goto out; + } + + /* + * VM_DONTCOPY - don't make this mapping available in fork'ed processes + * VM_DONTEXPAND - disable mremap on this region + * VM_IO - disables paging + * VM_DONTDUMP - Don't include in core dumps (3.7 only) + * VM_MIXEDMAP - Support mixing struct page*s and raw pfns. + * This is needed to support using the dedicated and + * the OS based memory backends together. + */ + /* + * This will need updating to propagate coherency flags + * See MIDBASE-1057 + */ + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; +#else + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; +#endif + vma->vm_ops = &kbase_vm_ops; + vma->vm_private_data = map; + + page_array = kbase_get_cpu_phy_pages(reg); + + if (!(reg->flags & KBASE_REG_CPU_CACHED) && + (reg->flags & (KBASE_REG_CPU_WR|KBASE_REG_CPU_RD))) { + /* We can't map vmalloc'd memory uncached. + * Other memory will have been returned from + * kbase_mem_allocator_alloc which would be + * suitable for mapping uncached. + */ + BUG_ON(kaddr); + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + } + + if (!kaddr) { + vma->vm_flags |= VM_PFNMAP; + for (i = 0; i < nr_pages; i++) { + err = vm_insert_pfn(vma, vma->vm_start + (i << PAGE_SHIFT), page_array[i + start_off] >> PAGE_SHIFT); + if (WARN_ON(err)) + break; + } + } else { + /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ + vma->vm_flags |= VM_MIXEDMAP; + /* vmalloc remaping is easy... */ + err = remap_vmalloc_range(vma, kaddr, 0); + WARN_ON(err); + } + + if (err) { + kfree(map); + goto out; + } + + map->page_off = start_off; + map->region = free_on_close ? reg : NULL; + map->kctx = reg->kctx; + map->vm_start = vma->vm_start + aligned_offset; + if (aligned_offset) { + KBASE_DEBUG_ASSERT(!start_off); + map->vm_end = map->vm_start + (reg->nr_pages << PAGE_SHIFT); + } else { + map->vm_end = vma->vm_end; + } + map->alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + map->count = 1; /* start with one ref */ + + if (reg->flags & KBASE_REG_CPU_CACHED) + map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; + + list_add(&map->mappings_list, &map->alloc->mappings); + + out: + return err; +} + +static int kbase_trace_buffer_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kaddr) +{ + struct kbase_va_region *new_reg; + u32 nr_pages; + size_t size; + int err = 0; + u32 *tb; + int owns_tb = 1; + + dev_dbg(kctx->kbdev->dev, "in %s\n", __func__); + size = (vma->vm_end - vma->vm_start); + nr_pages = size >> PAGE_SHIFT; + + if (!kctx->jctx.tb) { + KBASE_DEBUG_ASSERT(0 != size); + tb = vmalloc_user(size); + + if (NULL == tb) { + err = -ENOMEM; + goto out; + } + + kbase_device_trace_buffer_install(kctx, tb, size); + } else { + err = -EINVAL; + goto out; + } + + *kaddr = kctx->jctx.tb; + + new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA); + if (!new_reg) { + err = -ENOMEM; + WARN_ON(1); + goto out_no_region; + } + + new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_TB); + if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { + err = -ENOMEM; + new_reg->cpu_alloc = NULL; + WARN_ON(1); + goto out_no_alloc; + } + + new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); + + new_reg->cpu_alloc->imported.kctx = kctx; + new_reg->flags &= ~KBASE_REG_FREE; + new_reg->flags |= KBASE_REG_CPU_CACHED; + + /* alloc now owns the tb */ + owns_tb = 0; + + if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { + err = -ENOMEM; + WARN_ON(1); + goto out_no_va_region; + } + + *reg = new_reg; + + /* map read only, noexec */ + vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); + /* the rest of the flags is added by the cpu_mmap handler */ + + dev_dbg(kctx->kbdev->dev, "%s done\n", __func__); + return 0; + +out_no_va_region: +out_no_alloc: + kbase_free_alloced_region(new_reg); +out_no_region: + if (owns_tb) { + kbase_device_trace_buffer_uninstall(kctx); + vfree(tb); + } +out: + return err; +} + +static int kbase_mmu_dump_mmap(struct kbase_context *kctx, struct vm_area_struct *vma, struct kbase_va_region **const reg, void **const kmap_addr) +{ + struct kbase_va_region *new_reg; + void *kaddr; + u32 nr_pages; + size_t size; + int err = 0; + + dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n"); + size = (vma->vm_end - vma->vm_start); + nr_pages = size >> PAGE_SHIFT; + + kaddr = kbase_mmu_dump(kctx, nr_pages); + + if (!kaddr) { + err = -ENOMEM; + goto out; + } + + new_reg = kbase_alloc_free_region(kctx, 0, nr_pages, KBASE_REG_ZONE_SAME_VA); + if (!new_reg) { + err = -ENOMEM; + WARN_ON(1); + goto out; + } + + new_reg->cpu_alloc = kbase_alloc_create(0, KBASE_MEM_TYPE_RAW); + if (IS_ERR_OR_NULL(new_reg->cpu_alloc)) { + err = -ENOMEM; + new_reg->cpu_alloc = NULL; + WARN_ON(1); + goto out_no_alloc; + } + + new_reg->gpu_alloc = kbase_mem_phy_alloc_get(new_reg->cpu_alloc); + + new_reg->flags &= ~KBASE_REG_FREE; + new_reg->flags |= KBASE_REG_CPU_CACHED; + if (kbase_add_va_region(kctx, new_reg, vma->vm_start, nr_pages, 1) != 0) { + err = -ENOMEM; + WARN_ON(1); + goto out_va_region; + } + + *kmap_addr = kaddr; + *reg = new_reg; + + dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n"); + return 0; + +out_no_alloc: +out_va_region: + kbase_free_alloced_region(new_reg); +out: + return err; +} + + +void kbase_os_mem_map_lock(struct kbase_context *kctx) +{ + struct mm_struct *mm = current->mm; + (void)kctx; + down_read(&mm->mmap_sem); +} + +void kbase_os_mem_map_unlock(struct kbase_context *kctx) +{ + struct mm_struct *mm = current->mm; + (void)kctx; + up_read(&mm->mmap_sem); +} + +int kbase_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct kbase_context *kctx = file->private_data; + struct kbase_va_region *reg; + void *kaddr = NULL; + size_t nr_pages; + int err = 0; + int free_on_close = 0; + struct device *dev = kctx->kbdev->dev; + size_t aligned_offset = 0; + + dev_dbg(dev, "kbase_mmap\n"); + nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + + /* strip away corresponding VM_MAY% flags to the VM_% flags requested */ + vma->vm_flags &= ~((vma->vm_flags & (VM_READ | VM_WRITE)) << 4); + + if (0 == nr_pages) { + err = -EINVAL; + goto out; + } + + if (!(vma->vm_flags & VM_SHARED)) { + err = -EINVAL; + goto out; + } + + kbase_gpu_vm_lock(kctx); + + if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MAP_TRACKING_HANDLE)) { + /* The non-mapped tracking helper page */ + err = kbase_tracking_page_setup(kctx, vma); + goto out_unlock; + } + + /* if not the MTP, verify that the MTP has been mapped */ + rcu_read_lock(); + /* catches both when the special page isn't present or + * when we've forked */ + if (rcu_dereference(kctx->process_mm) != current->mm) { + err = -EINVAL; + rcu_read_unlock(); + goto out_unlock; + } + rcu_read_unlock(); + + switch (vma->vm_pgoff) { + case PFN_DOWN(BASE_MEM_INVALID_HANDLE): + case PFN_DOWN(BASE_MEM_WRITE_ALLOC_PAGES_HANDLE): + /* Illegal handle for direct map */ + err = -EINVAL; + goto out_unlock; + case PFN_DOWN(BASE_MEM_TRACE_BUFFER_HANDLE): + err = kbase_trace_buffer_mmap(kctx, vma, ®, &kaddr); + if (0 != err) + goto out_unlock; + dev_dbg(dev, "kbase_trace_buffer_mmap ok\n"); + /* free the region on munmap */ + free_on_close = 1; + goto map; + case PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE): + /* MMU dump */ + err = kbase_mmu_dump_mmap(kctx, vma, ®, &kaddr); + if (0 != err) + goto out_unlock; + /* free the region on munmap */ + free_on_close = 1; + goto map; + case PFN_DOWN(BASE_MEM_COOKIE_BASE) ... + PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) - 1: { + /* SAME_VA stuff, fetch the right region */ + int gpu_pc_bits; + int cookie = vma->vm_pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE); + + gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; + reg = kctx->pending_regions[cookie]; + if (NULL != reg) { + if (reg->flags & KBASE_REG_ALIGNED) { + /* nr_pages must be able to hold alignment pages + * plus actual pages */ + if (nr_pages != ((1UL << gpu_pc_bits >> + PAGE_SHIFT) + + reg->nr_pages)) { + /* incorrect mmap size */ + /* leave the cookie for a potential + * later mapping, or to be reclaimed + * later when the context is freed */ + err = -ENOMEM; + goto out_unlock; + } + + aligned_offset = (vma->vm_start + + (1UL << gpu_pc_bits) - 1) & + ~((1UL << gpu_pc_bits) - 1); + aligned_offset -= vma->vm_start; + } else if (reg->nr_pages != nr_pages) { + /* incorrect mmap size */ + /* leave the cookie for a potential later + * mapping, or to be reclaimed later when the + * context is freed */ + err = -ENOMEM; + goto out_unlock; + } + + if ((vma->vm_flags & VM_READ && + !(reg->flags & KBASE_REG_CPU_RD)) || + (vma->vm_flags & VM_WRITE && + !(reg->flags & KBASE_REG_CPU_WR))) { + /* VM flags inconsistent with region flags */ + err = -EPERM; + dev_err(dev, "%s:%d inconsistent VM flags\n", + __FILE__, __LINE__); + goto out_unlock; + } + + /* adjust down nr_pages to what we have physically */ + nr_pages = kbase_reg_current_backed_size(reg); + + if (kbase_gpu_mmap(kctx, reg, + vma->vm_start + aligned_offset, + reg->nr_pages, 1) != 0) { + dev_err(dev, "%s:%d\n", __FILE__, __LINE__); + /* Unable to map in GPU space. */ + WARN_ON(1); + err = -ENOMEM; + goto out_unlock; + } + + /* no need for the cookie anymore */ + kctx->pending_regions[cookie] = NULL; + kctx->cookies |= (1UL << cookie); + + /* + * Overwrite the offset with the + * region start_pfn, so we effectively + * map from offset 0 in the region. + */ + vma->vm_pgoff = reg->start_pfn; + + /* free the region on munmap */ + free_on_close = 1; + goto map; + } + + err = -ENOMEM; + goto out_unlock; + } + default: { + reg = kbase_region_tracker_find_region_enclosing_address(kctx, (u64)vma->vm_pgoff << PAGE_SHIFT); + + if (reg && !(reg->flags & KBASE_REG_FREE)) { + /* will this mapping overflow the size of the region? */ + if (nr_pages > (reg->nr_pages - (vma->vm_pgoff - reg->start_pfn))) + goto overflow; + + if ((vma->vm_flags & VM_READ && + !(reg->flags & KBASE_REG_CPU_RD)) || + (vma->vm_flags & VM_WRITE && + !(reg->flags & KBASE_REG_CPU_WR))) { + /* VM flags inconsistent with region flags */ + err = -EPERM; + dev_err(dev, "%s:%d inconsistent VM flags\n", + __FILE__, __LINE__); + goto out_unlock; + } + +#ifdef CONFIG_DMA_SHARED_BUFFER + if (reg->cpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) + goto dma_map; +#endif /* CONFIG_DMA_SHARED_BUFFER */ + + /* limit what we map to the amount currently backed */ + if (reg->cpu_alloc->nents < (vma->vm_pgoff - reg->start_pfn + nr_pages)) { + if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents) + nr_pages = 0; + else + nr_pages = reg->cpu_alloc->nents - (vma->vm_pgoff - reg->start_pfn); + } + + goto map; + } + +overflow: + err = -ENOMEM; + goto out_unlock; + } /* default */ + } /* switch */ +map: + err = kbase_cpu_mmap(reg, vma, kaddr, nr_pages, aligned_offset, free_on_close); + + if (vma->vm_pgoff == PFN_DOWN(BASE_MEM_MMU_DUMP_HANDLE)) { + /* MMU dump - userspace should now have a reference on + * the pages, so we can now free the kernel mapping */ + vfree(kaddr); + } + goto out_unlock; + +#ifdef CONFIG_DMA_SHARED_BUFFER +dma_map: + err = dma_buf_mmap(reg->cpu_alloc->imported.umm.dma_buf, vma, vma->vm_pgoff - reg->start_pfn); +#endif /* CONFIG_DMA_SHARED_BUFFER */ +out_unlock: + kbase_gpu_vm_unlock(kctx); +out: + if (err) + dev_err(dev, "mmap failed %d\n", err); + + return err; +} + +KBASE_EXPORT_TEST_API(kbase_mmap); + +void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, + struct kbase_vmap_struct *map) +{ + struct kbase_va_region *reg; + unsigned long page_index; + unsigned int offset = gpu_addr & ~PAGE_MASK; + size_t page_count = PFN_UP(offset + size); + phys_addr_t *page_array; + struct page **pages; + void *cpu_addr = NULL; + pgprot_t prot; + size_t i; + bool sync_needed; + + if (!size || !map) + return NULL; + + /* check if page_count calculation will wrap */ + if (size > ((size_t)-1 / PAGE_SIZE)) + return NULL; + + kbase_gpu_vm_lock(kctx); + + reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr); + if (!reg || (reg->flags & KBASE_REG_FREE)) + goto out_unlock; + + page_index = (gpu_addr >> PAGE_SHIFT) - reg->start_pfn; + + /* check if page_index + page_count will wrap */ + if (-1UL - page_count < page_index) + goto out_unlock; + + if (page_index + page_count > kbase_reg_current_backed_size(reg)) + goto out_unlock; + + page_array = kbase_get_cpu_phy_pages(reg); + if (!page_array) + goto out_unlock; + + pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto out_unlock; + + for (i = 0; i < page_count; i++) + pages[i] = pfn_to_page(PFN_DOWN(page_array[page_index + i])); + + prot = PAGE_KERNEL; + if (!(reg->flags & KBASE_REG_CPU_CACHED)) { + /* Map uncached */ + prot = pgprot_writecombine(prot); + } + + cpu_addr = vmap(pages, page_count, VM_MAP, prot); + + kfree(pages); + + if (!cpu_addr) + goto out_unlock; + + map->gpu_addr = gpu_addr; + map->cpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + map->cpu_pages = &kbase_get_cpu_phy_pages(reg)[page_index]; + map->gpu_alloc = kbase_mem_phy_alloc_get(reg->gpu_alloc); + map->gpu_pages = &kbase_get_gpu_phy_pages(reg)[page_index]; + map->addr = (void *)((uintptr_t)cpu_addr + offset); + map->size = size; + map->is_cached = (reg->flags & KBASE_REG_CPU_CACHED) != 0; + sync_needed = map->is_cached; + + if (sync_needed) { + /* Sync first page */ + size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); + phys_addr_t cpu_pa = map->cpu_pages[0]; + phys_addr_t gpu_pa = map->gpu_pages[0]; + + kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, + KBASE_SYNC_TO_CPU); + + /* Sync middle pages (if any) */ + for (i = 1; page_count > 2 && i < page_count - 1; i++) { + cpu_pa = map->cpu_pages[i]; + gpu_pa = map->gpu_pages[i]; + kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, + KBASE_SYNC_TO_CPU); + } + + /* Sync last page (if any) */ + if (page_count > 1) { + cpu_pa = map->cpu_pages[page_count - 1]; + gpu_pa = map->gpu_pages[page_count - 1]; + sz = ((offset + size - 1) & ~PAGE_MASK) + 1; + kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, + KBASE_SYNC_TO_CPU); + } + } + kbase_gpu_vm_unlock(kctx); + + return map->addr; + +out_unlock: + kbase_gpu_vm_unlock(kctx); + return NULL; +} + +void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map) +{ + void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK); + bool sync_needed = map->is_cached; + vunmap(addr); + if (sync_needed) { + off_t offset = (uintptr_t)map->addr & ~PAGE_MASK; + size_t size = map->size; + size_t page_count = PFN_UP(offset + size); + size_t i; + + /* Sync first page */ + size_t sz = MIN(((size_t) PAGE_SIZE - offset), size); + phys_addr_t cpu_pa = map->cpu_pages[0]; + phys_addr_t gpu_pa = map->gpu_pages[0]; + + kbase_sync_single(kctx, cpu_pa, gpu_pa, offset, sz, + KBASE_SYNC_TO_DEVICE); + + /* Sync middle pages (if any) */ + for (i = 1; page_count > 2 && i < page_count - 1; i++) { + cpu_pa = map->cpu_pages[i]; + gpu_pa = map->gpu_pages[i]; + kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, PAGE_SIZE, + KBASE_SYNC_TO_DEVICE); + } + + /* Sync last page (if any) */ + if (page_count > 1) { + cpu_pa = map->cpu_pages[page_count - 1]; + gpu_pa = map->gpu_pages[page_count - 1]; + sz = ((offset + size - 1) & ~PAGE_MASK) + 1; + kbase_sync_single(kctx, cpu_pa, gpu_pa, 0, sz, + KBASE_SYNC_TO_DEVICE); + } + } + map->gpu_addr = 0; + map->cpu_alloc = kbase_mem_phy_alloc_put(map->cpu_alloc); + map->gpu_alloc = kbase_mem_phy_alloc_put(map->gpu_alloc); + map->cpu_pages = NULL; + map->gpu_pages = NULL; + map->addr = NULL; + map->size = 0; + map->is_cached = false; +} + +void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) +{ + struct mm_struct *mm; + + rcu_read_lock(); + mm = rcu_dereference(kctx->process_mm); + if (mm) { + atomic_add(pages, &kctx->nonmapped_pages); +#ifdef SPLIT_RSS_COUNTING + add_mm_counter(mm, MM_FILEPAGES, pages); +#else + spin_lock(&mm->page_table_lock); + add_mm_counter(mm, MM_FILEPAGES, pages); + spin_unlock(&mm->page_table_lock); +#endif + } + rcu_read_unlock(); +} + +static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx) +{ + int pages; + struct mm_struct *mm; + + spin_lock(&kctx->mm_update_lock); + mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock)); + if (!mm) { + spin_unlock(&kctx->mm_update_lock); + return; + } + + rcu_assign_pointer(kctx->process_mm, NULL); + spin_unlock(&kctx->mm_update_lock); + synchronize_rcu(); + + pages = atomic_xchg(&kctx->nonmapped_pages, 0); +#ifdef SPLIT_RSS_COUNTING + add_mm_counter(mm, MM_FILEPAGES, -pages); +#else + spin_lock(&mm->page_table_lock); + add_mm_counter(mm, MM_FILEPAGES, -pages); + spin_unlock(&mm->page_table_lock); +#endif +} + +static void kbase_special_vm_close(struct vm_area_struct *vma) +{ + struct kbase_context *kctx; + + kctx = vma->vm_private_data; + kbasep_os_process_page_usage_drain(kctx); +} + +static const struct vm_operations_struct kbase_vm_special_ops = { + .close = kbase_special_vm_close, +}; + +static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) +{ + /* check that this is the only tracking page */ + spin_lock(&kctx->mm_update_lock); + if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) { + spin_unlock(&kctx->mm_update_lock); + return -EFAULT; + } + + rcu_assign_pointer(kctx->process_mm, current->mm); + + spin_unlock(&kctx->mm_update_lock); + + /* no real access */ + vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; +#else + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO; +#endif + vma->vm_ops = &kbase_vm_special_ops; + vma->vm_private_data = kctx; + + return 0; +} +void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle) +{ + int i; + int res; + void *va; + dma_addr_t dma_pa; + struct kbase_va_region *reg; + phys_addr_t *page_array; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + DEFINE_DMA_ATTRS(attrs); +#endif + + u32 pages = ((size - 1) >> PAGE_SHIFT) + 1; + u32 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_CPU_WR | + BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR; + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(0 != size); + KBASE_DEBUG_ASSERT(0 != pages); + + if (size == 0) + goto err; + + /* All the alloc calls return zeroed memory */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + va = dma_alloc_attrs(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL, &attrs); +#else + va = dma_alloc_writecombine(kctx->kbdev->dev, size, &dma_pa, GFP_KERNEL); +#endif + if (!va) + goto err; + + /* Store the state so we can free it later. */ + handle->cpu_va = va; + handle->dma_pa = dma_pa; + handle->size = size; + + + reg = kbase_alloc_free_region(kctx, 0, pages, KBASE_REG_ZONE_SAME_VA); + if (!reg) + goto no_reg; + + reg->flags &= ~KBASE_REG_FREE; + kbase_update_region_flags(reg, flags); + + reg->cpu_alloc = kbase_alloc_create(pages, KBASE_MEM_TYPE_RAW); + if (IS_ERR_OR_NULL(reg->cpu_alloc)) + goto no_alloc; + + reg->gpu_alloc = kbase_mem_phy_alloc_get(reg->cpu_alloc); + + page_array = kbase_get_cpu_phy_pages(reg); + + for (i = 0; i < pages; i++) + page_array[i] = dma_pa + (i << PAGE_SHIFT); + + reg->cpu_alloc->nents = pages; + + kbase_gpu_vm_lock(kctx); + res = kbase_gpu_mmap(kctx, reg, (uintptr_t) va, pages, 1); + kbase_gpu_vm_unlock(kctx); + if (res) + goto no_mmap; + + return va; + +no_mmap: + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); +no_alloc: + kfree(reg); +no_reg: +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + dma_free_attrs(kctx->kbdev->dev, size, va, dma_pa, &attrs); +#else + dma_free_writecombine(kctx->kbdev->dev, size, va, dma_pa); +#endif +err: + return NULL; +} +KBASE_EXPORT_SYMBOL(kbase_va_alloc); + +void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle) +{ + struct kbase_va_region *reg; + int err; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + DEFINE_DMA_ATTRS(attrs); +#endif + + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(handle->cpu_va != NULL); + + kbase_gpu_vm_lock(kctx); + reg = kbase_region_tracker_find_region_base_address(kctx, (uintptr_t)handle->cpu_va); + KBASE_DEBUG_ASSERT(reg); + err = kbase_gpu_munmap(kctx, reg); + kbase_gpu_vm_unlock(kctx); + KBASE_DEBUG_ASSERT(!err); + + kbase_mem_phy_alloc_put(reg->cpu_alloc); + kbase_mem_phy_alloc_put(reg->gpu_alloc); + kfree(reg); + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + dma_free_attrs(kctx->kbdev->dev, handle->size, + handle->cpu_va, handle->dma_pa, &attrs); +#else + dma_free_writecombine(kctx->kbdev->dev, handle->size, + handle->cpu_va, handle->dma_pa); +#endif +} +KBASE_EXPORT_SYMBOL(kbase_va_free); + diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h new file mode 100644 index 00000000000..1d854152704 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_linux.h @@ -0,0 +1,73 @@ +/* + * + * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem_linux.h + * Base kernel memory APIs, Linux implementation. + */ + +#ifndef _KBASE_MEM_LINUX_H_ +#define _KBASE_MEM_LINUX_H_ + +/** A HWC dump mapping */ +struct kbase_hwc_dma_mapping { + void *cpu_va; + dma_addr_t dma_pa; + size_t size; +}; + +struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages, u64 extent, u64 *flags, u64 *gpu_va, u16 *va_alignment); +int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, int query, u64 *const pages); +int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type, int handle, u64 *gpu_va, u64 *va_pages, u64 *flags); +u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, u64 nents, struct base_mem_aliasing_info *ai, u64 *num_pages); +int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned int flags, unsigned int mask); +int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages, enum base_backing_threshold_status *failure_reason); +int kbase_mmap(struct file *file, struct vm_area_struct *vma); + +struct kbase_vmap_struct { + u64 gpu_addr; + struct kbase_mem_phy_alloc *cpu_alloc; + struct kbase_mem_phy_alloc *gpu_alloc; + phys_addr_t *cpu_pages; + phys_addr_t *gpu_pages; + void *addr; + size_t size; + bool is_cached; +}; +void *kbase_vmap(struct kbase_context *kctx, u64 gpu_addr, size_t size, + struct kbase_vmap_struct *map); +void kbase_vunmap(struct kbase_context *kctx, struct kbase_vmap_struct *map); + +/** @brief Allocate memory from kernel space and map it onto the GPU + * + * @param kctx The context used for the allocation/mapping + * @param size The size of the allocation in bytes + * @param handle An opaque structure used to contain the state needed to free the memory + * @return the VA for kernel space and GPU MMU + */ +void *kbase_va_alloc(struct kbase_context *kctx, u32 size, struct kbase_hwc_dma_mapping *handle); + +/** @brief Free/unmap memory allocated by kbase_va_alloc + * + * @param kctx The context used for the allocation/mapping + * @param handle An opaque structure returned by the kbase_va_alloc function. + */ +void kbase_va_free(struct kbase_context *kctx, struct kbase_hwc_dma_mapping *handle); + +#endif /* _KBASE_MEM_LINUX_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h new file mode 100644 index 00000000000..441180b64d5 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_lowlevel.h @@ -0,0 +1,45 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_MEM_LOWLEVEL_H +#define _KBASE_MEM_LOWLEVEL_H + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +#include <linux/dma-mapping.h> + +/** + * @brief Flags for kbase_phy_allocator_pages_alloc + */ +#define KBASE_PHY_PAGES_FLAG_DEFAULT (0) /** Default allocation flag */ +#define KBASE_PHY_PAGES_FLAG_CLEAR (1 << 0) /** Clear the pages after allocation */ +#define KBASE_PHY_PAGES_FLAG_POISON (1 << 1) /** Fill the memory with a poison value */ + +#define KBASE_PHY_PAGES_SUPPORTED_FLAGS (KBASE_PHY_PAGES_FLAG_DEFAULT|KBASE_PHY_PAGES_FLAG_CLEAR|KBASE_PHY_PAGES_FLAG_POISON) + +#define KBASE_PHY_PAGES_POISON_VALUE 0xFD /** Value to fill the memory with when KBASE_PHY_PAGES_FLAG_POISON is set */ + +enum kbase_sync_type { + KBASE_SYNC_TO_CPU, + KBASE_SYNC_TO_DEVICE +}; + +#endif /* _KBASE_LOWLEVEL_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c new file mode 100644 index 00000000000..bf60c192029 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.c @@ -0,0 +1,105 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase_gpu_memory_debugfs.h> + +#ifdef CONFIG_DEBUG_FS + +/* mam_profile file name max length 22 based on format <int>_<int>\0 */ +#define KBASEP_DEBUGFS_FNAME_SIZE_MAX (10+1+10+1) + +void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) +{ + spin_lock(&kctx->mem_profile_lock); + kfree(kctx->mem_profile_data); + kctx->mem_profile_data = data; + kctx->mem_profile_size = size; + spin_unlock(&kctx->mem_profile_lock); +} + +/** Show callback for the @c mem_profile debugfs file. + * + * This function is called to get the contents of the @c mem_profile debugfs + * file. This is a report of current memory usage and distribution in userspace. + * + * @param sfile The debugfs entry + * @param data Data associated with the entry + * + * @return 0 if successfully prints data in debugfs entry file + * -1 if it encountered an error + */ +static int kbasep_mem_profile_seq_show(struct seq_file *sfile, void *data) +{ + struct kbase_context *kctx = sfile->private; + + KBASE_DEBUG_ASSERT(kctx != NULL); + + spin_lock(&kctx->mem_profile_lock); + seq_write(sfile, kctx->mem_profile_data, kctx->mem_profile_size); + seq_putc(sfile, '\n'); + spin_unlock(&kctx->mem_profile_lock); + + return 0; +} + +/* + * File operations related to debugfs entry for mem_profile + */ +static int kbasep_mem_profile_debugfs_open(struct inode *in, struct file *file) +{ + return single_open(file, kbasep_mem_profile_seq_show, in->i_private); +} + +static const struct file_operations kbasep_mem_profile_debugfs_fops = { + .open = kbasep_mem_profile_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + + spin_lock_init(&kctx->mem_profile_lock); + + debugfs_create_file("mem_profile", S_IRUGO, kctx->kctx_dentry, kctx, + &kbasep_mem_profile_debugfs_fops); +} + +void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(kctx != NULL); + + spin_lock(&kctx->mem_profile_lock); + kfree(kctx->mem_profile_data); + kctx->mem_profile_data = NULL; + spin_unlock(&kctx->mem_profile_lock); +} + +#else /* CONFIG_DEBUG_FS */ + +/** + * @brief Stub function for when debugfs is disabled + */ +void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size) +{ + kfree(data); +} +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h new file mode 100644 index 00000000000..205bd378c8e --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs.h @@ -0,0 +1,58 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mem_profile_debugfs.h + * Header file for mem profiles entries in debugfs + * + */ + +#ifndef _KBASE_MEM_PROFILE_DEBUGFS_H +#define _KBASE_MEM_PROFILE_DEBUGFS_H + +#include <mali_kbase.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +/** + * @brief Add new entry to Mali memory profile debugfs + */ +void kbasep_mem_profile_debugfs_add(struct kbase_context *kctx); + +/** + * @brief Remove entry from Mali memory profile debugfs + */ +void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx); + +/** + * @brief Insert data to debugfs file, so it can be read by userspce + * + * Function takes ownership of @c data and frees it later when new data + * are inserted. + * + * @param kctx Context to which file data should be inserted + * @param data NULL-terminated string to be inserted to mem_profile file, + without trailing new line character + * @param size @c buf length + */ +void kbasep_mem_profile_debugfs_insert(struct kbase_context *kctx, char *data, + size_t size); + +#endif /*_KBASE_MEM_PROFILE_DEBUGFS_H*/ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h new file mode 100644 index 00000000000..82f0702974c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mem_profile_debugfs_buf_size.h @@ -0,0 +1,33 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_mem_profile_debugfs_buf_size.h + * Header file for the size of the buffer to accumulate the histogram report text in + */ + +#ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ +#define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_ + +/** + * The size of the buffer to accumulate the histogram report text in + * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT + */ +#define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t) (64 + ((80 + (56 * 64)) * 15) + 56)) + +#endif /*_KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_*/ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu.c b/drivers/gpu/arm/midgard/mali_kbase_mmu.c new file mode 100644 index 00000000000..325b79082eb --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu.c @@ -0,0 +1,1653 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_mmu.c + * Base kernel MMU management. + */ + +/* #define DEBUG 1 */ +#include <linux/kernel.h> +#include <linux/dma-mapping.h> +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#if defined(CONFIG_MALI_GATOR_SUPPORT) +#include <mali_kbase_gator.h> +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) +#include <mali_kbase_tlstream.h> +#endif +#include <mali_kbase_debug.h> + +#define beenthere(kctx, f, a...) dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a) + +#include <mali_kbase_defs.h> +#include <mali_kbase_hw.h> +#include <mali_kbase_mmu_hw.h> +#include <mali_kbase_hwaccess_jm.h> + +#define KBASE_MMU_PAGE_ENTRIES 512 + +/** + * kbase_mmu_sync_pgd - sync page directory to memory + * @dev: Device pointer. + * @handle: Address of DMA region. + * @size: Size of the region to sync. + * + * This should be called after each page directory update. + */ + +static void kbase_mmu_sync_pgd(struct device *dev, + dma_addr_t handle, size_t size) +{ + dma_sync_single_for_device(dev, handle, size, DMA_TO_DEVICE); +} + +/* + * Definitions: + * - PGD: Page Directory. + * - PTE: Page Table Entry. A 64bit value pointing to the next + * level of translation + * - ATE: Address Transation Entry. A 64bit value pointing to + * a 4kB physical page. + */ + +static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, const char *reason_str); + + +static size_t make_multiple(size_t minimum, size_t multiple) +{ + size_t remainder = minimum % multiple; + + if (remainder == 0) + return minimum; + + return minimum + multiple - remainder; +} + +void page_fault_worker(struct work_struct *data) +{ + u64 fault_pfn; + u32 fault_status; + size_t new_pages; + size_t fault_rel_pfn; + struct kbase_as *faulting_as; + int as_no; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_va_region *region; + int err; + bool grown = false; + + faulting_as = container_of(data, struct kbase_as, work_pagefault); + fault_pfn = faulting_as->fault_addr >> PAGE_SHIFT; + as_no = faulting_as->number; + + kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + + /* Grab the context that was already refcounted in kbase_mmu_interrupt(). + * Therefore, it cannot be scheduled out of this AS until we explicitly release it + * + * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */ + kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); + + if (kctx == NULL) { + /* Only handle this if not already suspended */ + if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + /* Address space has no context, terminate the work */ + + /* AS transaction begin */ + mutex_lock(&faulting_as->transaction_mutex); + + kbase_mmu_disable_as(kbdev, as_no); + + mutex_unlock(&faulting_as->transaction_mutex); + /* AS transaction end */ + + kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_pm_context_idle(kbdev); + } + atomic_dec(&kbdev->faults_pending); + return; + } + + KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev); + + fault_status = faulting_as->fault_status; + switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) { + + case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT: + /* need to check against the region to handle this one */ + break; + + case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Permission failure"); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Tranlation table bus fault"); + goto fault_done; + + case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG: + /* nothing to do, but we don't expect this fault currently */ + dev_warn(kbdev->dev, "Access flag unexpectedly set"); + goto fault_done; + + + default: + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Unknown fault code"); + goto fault_done; + } + + /* so we have a translation fault, let's see if it is for growable + * memory */ + kbase_gpu_vm_lock(kctx); + + region = kbase_region_tracker_find_region_enclosing_address(kctx, + faulting_as->fault_addr); + if (!region || region->flags & KBASE_REG_FREE) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory is not mapped on the GPU"); + goto fault_done; + } + + if ((region->flags & GROWABLE_FLAGS_REQUIRED) + != GROWABLE_FLAGS_REQUIRED) { + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Memory is not growable"); + goto fault_done; + } + + /* find the size we need to grow it by */ + /* we know the result fit in a size_t due to kbase_region_tracker_find_region_enclosing_address + * validating the fault_adress to be within a size_t from the start_pfn */ + fault_rel_pfn = fault_pfn - region->start_pfn; + + if (fault_rel_pfn < kbase_reg_current_backed_size(region)) { + dev_dbg(kbdev->dev, "Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring", + faulting_as->fault_addr, region->start_pfn, + region->start_pfn + + kbase_reg_current_backed_size(region)); + + kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE); + /* [1] in case another page fault occurred while we were + * handling the (duplicate) page fault we need to ensure we + * don't loose the other page fault as result of us clearing + * the MMU IRQ. Therefore, after we clear the MMU IRQ we send + * an UNLOCK command that will retry any stalled memory + * transaction (which should cause the other page fault to be + * raised again). + */ + kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, + AS_COMMAND_UNLOCK, 1); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_gpu_vm_unlock(kctx); + + goto fault_done; + } + + new_pages = make_multiple(fault_rel_pfn - + kbase_reg_current_backed_size(region) + 1, + region->extent); + + /* cap to max vsize */ + if (new_pages + kbase_reg_current_backed_size(region) > + region->nr_pages) + new_pages = region->nr_pages - + kbase_reg_current_backed_size(region); + + if (0 == new_pages) { + /* Duplicate of a fault we've already handled, nothing to do */ + kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE); + /* See comment [1] about UNLOCK usage */ + kbase_mmu_hw_do_operation(kbdev, faulting_as, NULL, 0, 0, + AS_COMMAND_UNLOCK, 1); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_gpu_vm_unlock(kctx); + goto fault_done; + } + + if (kbase_alloc_phy_pages_helper(region->gpu_alloc, new_pages) == 0) { + if (region->gpu_alloc != region->cpu_alloc) { + if (kbase_alloc_phy_pages_helper( + region->cpu_alloc, new_pages) == 0) { + grown = true; + } else { + kbase_free_phy_pages_helper(region->gpu_alloc, + new_pages); + } + } else { + grown = true; + } + } + + + if (grown) { + u32 op; + + /* alloc success */ + KBASE_DEBUG_ASSERT(kbase_reg_current_backed_size(region) <= region->nr_pages); + + /* AS transaction begin */ + mutex_lock(&faulting_as->transaction_mutex); + + /* set up the new pages */ + err = kbase_mmu_insert_pages(kctx, region->start_pfn + kbase_reg_current_backed_size(region) - new_pages, &kbase_get_gpu_phy_pages(region)[kbase_reg_current_backed_size(region) - new_pages], new_pages, region->flags); + if (err) { + /* failed to insert pages, handle as a normal PF */ + mutex_unlock(&faulting_as->transaction_mutex); + kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); + if (region->gpu_alloc != region->cpu_alloc) + kbase_free_phy_pages_helper(region->cpu_alloc, + new_pages); + kbase_gpu_vm_unlock(kctx); + /* The locked VA region will be unlocked and the cache invalidated in here */ + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Page table update failure"); + goto fault_done; + } +#if defined(CONFIG_MALI_GATOR_SUPPORT) + kbase_trace_mali_page_fault_insert_pages(as_no, new_pages); +#endif +#if defined(CONFIG_MALI_MIPE_ENABLED) + kbase_tlstream_aux_pagefault( + as_no, + atomic_read(&kctx->used_pages)); +#endif + + /* flush L2 and unlock the VA (resumes the MMU) */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_6367)) + op = AS_COMMAND_FLUSH; + else + op = AS_COMMAND_FLUSH_PT; + + /* clear MMU interrupt - this needs to be done after updating + * the page tables but before issuing a FLUSH command. The + * FLUSH cmd has a side effect that it restarts stalled memory + * transactions in other address spaces which may cause + * another fault to occur. If we didn't clear the interrupt at + * this stage a new IRQ might not be raised when the GPU finds + * a MMU IRQ is already pending. + */ + kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE); + + kbase_mmu_hw_do_operation(kbdev, faulting_as, kctx, + faulting_as->fault_addr >> PAGE_SHIFT, + new_pages, + op, 1); + + mutex_unlock(&faulting_as->transaction_mutex); + /* AS transaction end */ + + /* reenable this in the mask */ + kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_PAGE); + kbase_gpu_vm_unlock(kctx); + } else { + /* failed to extend, handle as a normal PF */ + kbase_gpu_vm_unlock(kctx); + kbase_mmu_report_fault_and_kill(kctx, faulting_as, + "Page allocation failure"); + } + +fault_done: + /* + * By this point, the fault was handled in some way, + * so release the ctx refcount + */ + kbasep_js_runpool_release_ctx(kbdev, kctx); + + atomic_dec(&kbdev->faults_pending); +} + +phys_addr_t kbase_mmu_alloc_pgd(struct kbase_context *kctx) +{ + phys_addr_t pgd; + u64 *page; + int i; + struct page *p; + + KBASE_DEBUG_ASSERT(NULL != kctx); + kbase_atomic_add_pages(1, &kctx->used_pages); + kbase_atomic_add_pages(1, &kctx->kbdev->memdev.used_pages); + + if (kbase_mem_allocator_alloc(kctx->pgd_allocator, 1, &pgd) != 0) + goto sub_pages; + + p = pfn_to_page(PFN_DOWN(pgd)); + page = kmap(p); + if (NULL == page) + goto alloc_free; + + kbase_process_page_usage_inc(kctx, 1); + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) + kctx->kbdev->mmu_mode->entry_invalidate(&page[i]); + + kbase_mmu_sync_pgd(kctx->kbdev->dev, kbase_dma_addr(p), PAGE_SIZE); + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + return pgd; + +alloc_free: + kbase_mem_allocator_free(kctx->pgd_allocator, 1, &pgd, false); +sub_pages: + kbase_atomic_sub_pages(1, &kctx->used_pages); + kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_mmu_alloc_pgd); + +/* Given PGD PFN for level N, return PGD PFN for level N+1 */ +static phys_addr_t mmu_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level) +{ + u64 *page; + phys_addr_t target_pgd; + struct page *p; + + KBASE_DEBUG_ASSERT(pgd); + KBASE_DEBUG_ASSERT(NULL != kctx); + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Architecture spec defines level-0 as being the top-most. + * This is a bit unfortunate here, but we keep the same convention. + */ + vpfn >>= (3 - level) * 9; + vpfn &= 0x1FF; + + p = pfn_to_page(PFN_DOWN(pgd)); + page = kmap(p); + if (NULL == page) { + dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kmap failure\n"); + return 0; + } + + target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); + + if (!target_pgd) { + target_pgd = kbase_mmu_alloc_pgd(kctx); + if (!target_pgd) { + dev_warn(kctx->kbdev->dev, "mmu_get_next_pgd: kbase_mmu_alloc_pgd failure\n"); + kunmap(p); + return 0; + } + + kctx->kbdev->mmu_mode->entry_set_pte(&page[vpfn], target_pgd); + + kbase_mmu_sync_pgd(kctx->kbdev->dev, + kbase_dma_addr(p), PAGE_SIZE); + /* Rely on the caller to update the address space flags. */ + } + + kunmap(p); + return target_pgd; +} + +static phys_addr_t mmu_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) +{ + phys_addr_t pgd; + int l; + + pgd = kctx->pgd; + + for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) { + pgd = mmu_get_next_pgd(kctx, pgd, vpfn, l); + /* Handle failure condition */ + if (!pgd) { + dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd: mmu_get_next_pgd failure\n"); + return 0; + } + } + + return pgd; +} + +static phys_addr_t mmu_insert_pages_recover_get_next_pgd(struct kbase_context *kctx, phys_addr_t pgd, u64 vpfn, int level) +{ + u64 *page; + phys_addr_t target_pgd; + + KBASE_DEBUG_ASSERT(pgd); + KBASE_DEBUG_ASSERT(NULL != kctx); + + lockdep_assert_held(&kctx->reg_lock); + + /* + * Architecture spec defines level-0 as being the top-most. + * This is a bit unfortunate here, but we keep the same convention. + */ + vpfn >>= (3 - level) * 9; + vpfn &= 0x1FF; + + page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + /* kmap_atomic should NEVER fail */ + KBASE_DEBUG_ASSERT(NULL != page); + + target_pgd = kctx->kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]); + /* As we are recovering from what has already been set up, we should have a target_pgd */ + KBASE_DEBUG_ASSERT(0 != target_pgd); + kunmap_atomic(page); + return target_pgd; +} + +static phys_addr_t mmu_insert_pages_recover_get_bottom_pgd(struct kbase_context *kctx, u64 vpfn) +{ + phys_addr_t pgd; + int l; + + pgd = kctx->pgd; + + for (l = MIDGARD_MMU_TOPLEVEL; l < 3; l++) { + pgd = mmu_insert_pages_recover_get_next_pgd(kctx, pgd, vpfn, l); + /* Should never fail */ + KBASE_DEBUG_ASSERT(0 != pgd); + } + + return pgd; +} + +static void mmu_insert_pages_failure_recovery(struct kbase_context *kctx, u64 vpfn, + size_t nr) +{ + phys_addr_t pgd; + u64 *pgd_page; + struct kbase_mmu_mode const *mmu_mode; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(0 != vpfn); + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + + lockdep_assert_held(&kctx->reg_lock); + + mmu_mode = kctx->kbdev->mmu_mode; + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + struct page *p; + + if (count > nr) + count = nr; + + pgd = mmu_insert_pages_recover_get_bottom_pgd(kctx, vpfn); + KBASE_DEBUG_ASSERT(0 != pgd); + + p = pfn_to_page(PFN_DOWN(pgd)); + + pgd_page = kmap_atomic(p); + KBASE_DEBUG_ASSERT(NULL != pgd_page); + + /* Invalidate the entries we added */ + for (i = 0; i < count; i++) + mmu_mode->entry_invalidate(&pgd_page[index + i]); + + vpfn += count; + nr -= count; + + kbase_mmu_sync_pgd(kctx->kbdev->dev, + kbase_dma_addr(p), + PAGE_SIZE); + + kunmap_atomic(pgd_page); + } +} + +/* + * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' + */ +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, + phys_addr_t phys, size_t nr, + unsigned long flags) +{ + phys_addr_t pgd; + u64 *pgd_page; + /* In case the insert_single_page only partially completes we need to be + * able to recover */ + bool recover_required = false; + u64 recover_vpfn = vpfn; + size_t recover_count = 0; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(0 != vpfn); + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + + lockdep_assert_held(&kctx->reg_lock); + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + struct page *p; + + if (count > nr) + count = nr; + + /* + * Repeatedly calling mmu_get_bottom_pte() is clearly + * suboptimal. We don't have to re-parse the whole tree + * each time (just cache the l0-l2 sequence). + * On the other hand, it's only a gain when we map more than + * 256 pages at once (on average). Do we really care? + */ + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { + dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed */ + mmu_insert_pages_failure_recovery(kctx, + recover_vpfn, + recover_count); + } + return -EINVAL; + } + + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap(p); + if (!pgd_page) { + dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed */ + mmu_insert_pages_failure_recovery(kctx, + recover_vpfn, + recover_count); + } + return -ENOMEM; + } + + for (i = 0; i < count; i++) { + unsigned int ofs = index + i; + + KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); + kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs], + phys, flags); + } + + vpfn += count; + nr -= count; + + kbase_mmu_sync_pgd(kctx->kbdev->dev, + kbase_dma_addr(p) + + (index * sizeof(u64)), + count * sizeof(u64)); + + kunmap(p); + /* We have started modifying the page table. + * If further pages need inserting and fail we need to undo what + * has already taken place */ + recover_required = true; + recover_count += count; + } + return 0; +} + +/* + * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' + */ +int kbase_mmu_insert_pages(struct kbase_context *kctx, u64 vpfn, + phys_addr_t *phys, size_t nr, + unsigned long flags) +{ + phys_addr_t pgd; + u64 *pgd_page; + /* In case the insert_pages only partially completes we need to be able + * to recover */ + bool recover_required = false; + u64 recover_vpfn = vpfn; + size_t recover_count = 0; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(0 != vpfn); + /* 64-bit address range is the max */ + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + + lockdep_assert_held(&kctx->reg_lock); + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + struct page *p; + + if (count > nr) + count = nr; + + /* + * Repeatedly calling mmu_get_bottom_pte() is clearly + * suboptimal. We don't have to re-parse the whole tree + * each time (just cache the l0-l2 sequence). + * On the other hand, it's only a gain when we map more than + * 256 pages at once (on average). Do we really care? + */ + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { + dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: mmu_get_bottom_pgd failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed */ + mmu_insert_pages_failure_recovery(kctx, + recover_vpfn, + recover_count); + } + return -EINVAL; + } + + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap(p); + if (!pgd_page) { + dev_warn(kctx->kbdev->dev, "kbase_mmu_insert_pages: kmap failure\n"); + if (recover_required) { + /* Invalidate the pages we have partially + * completed */ + mmu_insert_pages_failure_recovery(kctx, + recover_vpfn, + recover_count); + } + return -ENOMEM; + } + + for (i = 0; i < count; i++) { + unsigned int ofs = index + i; + + KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); + kctx->kbdev->mmu_mode->entry_set_ate(&pgd_page[ofs], + phys[i], flags); + } + + phys += count; + vpfn += count; + nr -= count; + + kbase_mmu_sync_pgd(kctx->kbdev->dev, + kbase_dma_addr(p) + + (index * sizeof(u64)), + count * sizeof(u64)); + + kunmap(p); + /* We have started modifying the page table. If further pages + * need inserting and fail we need to undo what has already + * taken place */ + recover_required = true; + recover_count += count; + } + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); + +/** + * This function is responsible for validating the MMU PTs + * triggering reguired flushes. + * + * * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + */ +static void kbase_mmu_flush(struct kbase_context *kctx, u64 vpfn, size_t nr) +{ + struct kbase_device *kbdev; + bool ctx_is_in_runpool; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + kbdev = kctx->kbdev; + + /* We must flush if we're currently running jobs. At the very least, we need to retain the + * context to ensure it doesn't schedule out whilst we're trying to flush it */ + ctx_is_in_runpool = kbasep_js_runpool_retain_ctx(kbdev, kctx); + + if (ctx_is_in_runpool) { + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + /* Second level check is to try to only do this when jobs are running. The refcount is + * a heuristic for this. */ + if (kbdev->js_data.runpool_irq.per_as_data[kctx->as_nr].as_busy_refcount >= 2) { + if (!kbase_pm_context_active_handle_suspend(kbdev, + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + int ret; + u32 op; + + /* AS transaction begin */ + mutex_lock(&kbdev->as[ + kctx->as_nr].transaction_mutex); + + if (kbase_hw_has_issue(kbdev, + BASE_HW_ISSUE_6367)) + op = AS_COMMAND_FLUSH; + else + op = AS_COMMAND_FLUSH_MEM; + + ret = kbase_mmu_hw_do_operation(kbdev, + &kbdev->as[kctx->as_nr], + kctx, vpfn, nr, + op, 0); +#if KBASE_GPU_RESET_EN + if (ret) { + /* Flush failed to complete, assume the + * GPU has hung and perform a reset to + * recover */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issueing GPU soft-reset to recover\n"); + if (kbase_prepare_to_reset_gpu(kbdev)) + kbase_reset_gpu(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + + mutex_unlock(&kbdev->as[ + kctx->as_nr].transaction_mutex); + /* AS transaction end */ + + kbase_pm_context_idle(kbdev); + } + } + kbasep_js_runpool_release_ctx(kbdev, kctx); + } +} + +/* + * We actually only discard the ATE, and not the page table + * pages. There is a potential DoS here, as we'll leak memory by + * having PTEs that are potentially unused. Will require physical + * page accounting, so MMU pages are part of the process allocation. + * + * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + */ +int kbase_mmu_teardown_pages(struct kbase_context *kctx, u64 vpfn, size_t nr) +{ + phys_addr_t pgd; + u64 *pgd_page; + struct kbase_device *kbdev; + size_t requested_nr = nr; + struct kbase_mmu_mode const *mmu_mode; + + KBASE_DEBUG_ASSERT(NULL != kctx); + beenthere(kctx, "kctx %p vpfn %lx nr %zd", (void *)kctx, (unsigned long)vpfn, nr); + + lockdep_assert_held(&kctx->reg_lock); + + if (0 == nr) { + /* early out if nothing to do */ + return 0; + } + + kbdev = kctx->kbdev; + mmu_mode = kbdev->mmu_mode; + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + struct page *p; + + if (count > nr) + count = nr; + + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { + dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: mmu_get_bottom_pgd failure\n"); + return -EINVAL; + } + + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap(p); + if (!pgd_page) { + dev_warn(kbdev->dev, "kbase_mmu_teardown_pages: kmap failure\n"); + return -ENOMEM; + } + + for (i = 0; i < count; i++) + mmu_mode->entry_invalidate(&pgd_page[index + i]); + + vpfn += count; + nr -= count; + + kbase_mmu_sync_pgd(kctx->kbdev->dev, + kbase_dma_addr(p) + + (index * sizeof(u64)), + count * sizeof(u64)); + + kunmap(p); + } + + kbase_mmu_flush(kctx, vpfn, requested_nr); + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); + +/** + * Update the entries for specified number of pages pointed to by 'phys' at GPU PFN 'vpfn'. + * This call is being triggered as a response to the changes of the mem attributes + * + * @pre : The caller is responsible for validating the memory attributes + * + * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + */ +int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, phys_addr_t *phys, size_t nr, unsigned long flags) +{ + phys_addr_t pgd; + u64 *pgd_page; + size_t requested_nr = nr; + struct kbase_mmu_mode const *mmu_mode; + + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(0 != vpfn); + KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + + lockdep_assert_held(&kctx->reg_lock); + + mmu_mode = kctx->kbdev->mmu_mode; + + dev_warn(kctx->kbdev->dev, "kbase_mmu_update_pages(): updating page share flags on GPU PFN 0x%llx from phys %p, %zu pages", + vpfn, phys, nr); + + while (nr) { + unsigned int i; + unsigned int index = vpfn & 0x1FF; + size_t count = KBASE_MMU_PAGE_ENTRIES - index; + struct page *p; + + if (count > nr) + count = nr; + + pgd = mmu_get_bottom_pgd(kctx, vpfn); + if (!pgd) { + dev_warn(kctx->kbdev->dev, "mmu_get_bottom_pgd failure\n"); + return -EINVAL; + } + + p = pfn_to_page(PFN_DOWN(pgd)); + pgd_page = kmap(p); + if (!pgd_page) { + dev_warn(kctx->kbdev->dev, "kmap failure\n"); + return -ENOMEM; + } + + for (i = 0; i < count; i++) + mmu_mode->entry_set_ate(&pgd_page[index + i], phys[i], + flags); + + phys += count; + vpfn += count; + nr -= count; + + kbase_mmu_sync_pgd(kctx->kbdev->dev, + kbase_dma_addr(p) + + (index * sizeof(u64)), + count * sizeof(u64)); + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + } + + kbase_mmu_flush(kctx, vpfn, requested_nr); + + return 0; +} + +/* This is a debug feature only */ +static void mmu_check_unused(struct kbase_context *kctx, phys_addr_t pgd) +{ + u64 *page; + int i; + + page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + /* kmap_atomic should NEVER fail. */ + KBASE_DEBUG_ASSERT(NULL != page); + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + if (kctx->kbdev->mmu_mode->ate_is_valid(page[i])) + beenthere(kctx, "live pte %016lx", (unsigned long)page[i]); + } + kunmap_atomic(page); +} + +static void mmu_teardown_level(struct kbase_context *kctx, phys_addr_t pgd, int level, int zap, u64 *pgd_page_buffer) +{ + phys_addr_t target_pgd; + u64 *pgd_page; + int i; + struct kbase_mmu_mode const *mmu_mode; + + KBASE_DEBUG_ASSERT(NULL != kctx); + lockdep_assert_held(&kctx->reg_lock); + + pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd))); + /* kmap_atomic should NEVER fail. */ + KBASE_DEBUG_ASSERT(NULL != pgd_page); + /* Copy the page to our preallocated buffer so that we can minimize kmap_atomic usage */ + memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); + kunmap_atomic(pgd_page); + pgd_page = pgd_page_buffer; + + mmu_mode = kctx->kbdev->mmu_mode; + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); + + if (target_pgd) { + if (level < 2) { + mmu_teardown_level(kctx, target_pgd, level + 1, zap, pgd_page_buffer + (PAGE_SIZE / sizeof(u64))); + } else { + /* + * So target_pte is a level-3 page. + * As a leaf, it is safe to free it. + * Unless we have live pages attached to it! + */ + mmu_check_unused(kctx, target_pgd); + } + + beenthere(kctx, "pte %lx level %d", (unsigned long)target_pgd, level + 1); + if (zap) { + kbase_mem_allocator_free(kctx->pgd_allocator, 1, &target_pgd, true); + kbase_process_page_usage_dec(kctx, 1); + kbase_atomic_sub_pages(1, &kctx->used_pages); + kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); + } + } + } +} + +int kbase_mmu_init(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL == kctx->mmu_teardown_pages); + + /* Preallocate MMU depth of four pages for mmu_teardown_level to use */ + kctx->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); + + if (NULL == kctx->mmu_teardown_pages) + return -ENOMEM; + + return 0; +} + +void kbase_mmu_term(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); + + kfree(kctx->mmu_teardown_pages); + kctx->mmu_teardown_pages = NULL; +} + +void kbase_mmu_free_pgd(struct kbase_context *kctx) +{ + KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(NULL != kctx->mmu_teardown_pages); + + lockdep_assert_held(&kctx->reg_lock); + + mmu_teardown_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, 1, kctx->mmu_teardown_pages); + + beenthere(kctx, "pgd %lx", (unsigned long)kctx->pgd); + kbase_mem_allocator_free(kctx->pgd_allocator, 1, &kctx->pgd, true); + kbase_process_page_usage_dec(kctx, 1); + kbase_atomic_sub_pages(1, &kctx->used_pages); + kbase_atomic_sub_pages(1, &kctx->kbdev->memdev.used_pages); +} + +KBASE_EXPORT_TEST_API(kbase_mmu_free_pgd); + +static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, int level, char ** const buffer, size_t *size_left) +{ + phys_addr_t target_pgd; + u64 *pgd_page; + int i; + size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64); + size_t dump_size; + struct kbase_mmu_mode const *mmu_mode; + + KBASE_DEBUG_ASSERT(NULL != kctx); + lockdep_assert_held(&kctx->reg_lock); + + mmu_mode = kctx->kbdev->mmu_mode; + + pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + if (!pgd_page) { + dev_warn(kctx->kbdev->dev, "kbasep_mmu_dump_level: kmap failure\n"); + return 0; + } + + if (*size_left >= size) { + /* A modified physical address that contains the page table level */ + u64 m_pgd = pgd | level; + + /* Put the modified physical address in the output buffer */ + memcpy(*buffer, &m_pgd, sizeof(m_pgd)); + *buffer += sizeof(m_pgd); + + /* Followed by the page table itself */ + memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES); + *buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES; + + *size_left -= size; + } + + for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) { + if (mmu_mode->pte_is_valid(pgd_page[i])) { + target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]); + + dump_size = kbasep_mmu_dump_level(kctx, target_pgd, level + 1, buffer, size_left); + if (!dump_size) { + kunmap(pfn_to_page(PFN_DOWN(pgd))); + return 0; + } + size += dump_size; + } + } + + kunmap(pfn_to_page(PFN_DOWN(pgd))); + + return size; +} + +void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages) +{ + void *kaddr; + size_t size_left; + + KBASE_DEBUG_ASSERT(kctx); + + lockdep_assert_held(&kctx->reg_lock); + + if (0 == nr_pages) { + /* can't find in a 0 sized buffer, early out */ + return NULL; + } + + size_left = nr_pages * PAGE_SIZE; + + KBASE_DEBUG_ASSERT(0 != size_left); + kaddr = vmalloc_user(size_left); + + if (kaddr) { + u64 end_marker = 0xFFULL; + char *buffer = (char *)kaddr; + size_t size = kbasep_mmu_dump_level(kctx, kctx->pgd, MIDGARD_MMU_TOPLEVEL, &buffer, &size_left); + + if (!size) { + vfree(kaddr); + return NULL; + } + + /* Add on the size for the end marker */ + size += sizeof(u64); + + if (size > nr_pages * PAGE_SIZE || size_left < sizeof(u64)) { + /* The buffer isn't big enough - free the memory and return failure */ + vfree(kaddr); + return NULL; + } + + /* Add the end marker */ + memcpy(buffer, &end_marker, sizeof(u64)); + } + + return kaddr; +} +KBASE_EXPORT_TEST_API(kbase_mmu_dump); + +void bus_fault_worker(struct work_struct *data) +{ + struct kbase_as *faulting_as; + int as_no; + struct kbase_context *kctx; + struct kbase_device *kbdev; +#if KBASE_GPU_RESET_EN + bool reset_status = false; +#endif /* KBASE_GPU_RESET_EN */ + + faulting_as = container_of(data, struct kbase_as, work_busfault); + + as_no = faulting_as->number; + + kbdev = container_of(faulting_as, struct kbase_device, as[as_no]); + + /* Grab the context that was already refcounted in kbase_mmu_interrupt(). + * Therefore, it cannot be scheduled out of this AS until we explicitly release it + * + * NOTE: NULL can be returned here if we're gracefully handling a spurious interrupt */ + kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as_no); +#if KBASE_GPU_RESET_EN + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { + /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. + * We start the reset before switching to UNMAPPED to ensure that unrelated jobs + * are evicted from the GPU before the switch. + */ + dev_err(kbdev->dev, "GPU bus error occurred. For this GPU version we now soft-reset as part of bus error recovery\n"); + reset_status = kbase_prepare_to_reset_gpu(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + /* NOTE: If GPU already powered off for suspend, we don't need to switch to unmapped */ + if (!kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) { + + /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ + /* AS transaction begin */ + mutex_lock(&kbdev->as[as_no].transaction_mutex); + + /* Set the MMU into unmapped mode */ + kbase_mmu_disable_as(kbdev, as_no); + + mutex_unlock(&kbdev->as[as_no].transaction_mutex); + /* AS transaction end */ + + kbase_mmu_hw_clear_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_BUS); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, kctx, + KBASE_MMU_FAULT_TYPE_BUS); + + kbase_pm_context_idle(kbdev); + } +#if KBASE_GPU_RESET_EN + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) + kbase_reset_gpu(kbdev); +#endif /* KBASE_GPU_RESET_EN */ + /* By this point, the fault was handled in some way, so release the ctx refcount */ + if (kctx != NULL) + kbasep_js_runpool_release_ctx(kbdev, kctx); + + atomic_dec(&kbdev->faults_pending); +} + +const char *kbase_exception_name(struct kbase_device *kbdev, u32 exception_code) +{ + const char *e; + + switch (exception_code) { + /* Non-Fault Status code */ + case 0x00: + e = "NOT_STARTED/IDLE/OK"; + break; + case 0x01: + e = "DONE"; + break; + case 0x02: + e = "INTERRUPTED"; + break; + case 0x03: + e = "STOPPED"; + break; + case 0x04: + e = "TERMINATED"; + break; + case 0x08: + e = "ACTIVE"; + break; + /* Job exceptions */ + case 0x40: + e = "JOB_CONFIG_FAULT"; + break; + case 0x41: + e = "JOB_POWER_FAULT"; + break; + case 0x42: + e = "JOB_READ_FAULT"; + break; + case 0x43: + e = "JOB_WRITE_FAULT"; + break; + case 0x44: + e = "JOB_AFFINITY_FAULT"; + break; + case 0x48: + e = "JOB_BUS_FAULT"; + break; + case 0x50: + e = "INSTR_INVALID_PC"; + break; + case 0x51: + e = "INSTR_INVALID_ENC"; + break; + case 0x52: + e = "INSTR_TYPE_MISMATCH"; + break; + case 0x53: + e = "INSTR_OPERAND_FAULT"; + break; + case 0x54: + e = "INSTR_TLS_FAULT"; + break; + case 0x55: + e = "INSTR_BARRIER_FAULT"; + break; + case 0x56: + e = "INSTR_ALIGN_FAULT"; + break; + case 0x58: + e = "DATA_INVALID_FAULT"; + break; + case 0x59: + e = "TILE_RANGE_FAULT"; + break; + case 0x5A: + e = "ADDR_RANGE_FAULT"; + break; + case 0x60: + e = "OUT_OF_MEMORY"; + break; + /* GPU exceptions */ + case 0x80: + e = "DELAYED_BUS_FAULT"; + break; + case 0x88: + e = "SHAREABILITY_FAULT"; + break; + /* MMU exceptions */ + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + e = "TRANSLATION_FAULT"; + break; + case 0xC8: + e = "PERMISSION_FAULT"; + break; + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + e = "TRANSTAB_BUS_FAULT"; + break; + case 0xD8: + e = "ACCESS_FLAG"; + break; + break; + default: + e = "UNKNOWN"; + break; + }; + + return e; +} + +static const char *access_type_name(struct kbase_device *kbdev, + u32 fault_status) +{ + switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { + return "UNKNOWN"; + case AS_FAULTSTATUS_ACCESS_TYPE_READ: + return "READ"; + case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: + return "WRITE"; + case AS_FAULTSTATUS_ACCESS_TYPE_EX: + return "EXECUTE"; + default: + KBASE_DEBUG_ASSERT(0); + return NULL; + } +} + +/** + * The caller must ensure it's retained the ctx to prevent it from being scheduled out whilst it's being worked on. + */ +static void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, + struct kbase_as *as, const char *reason_str) +{ + unsigned long flags; + int exception_type; + int access_type; + int source_id; + int as_no; + struct kbase_device *kbdev; + struct kbasep_js_device_data *js_devdata; + +#if KBASE_GPU_RESET_EN + bool reset_status = false; +#endif + + as_no = as->number; + kbdev = kctx->kbdev; + js_devdata = &kbdev->js_data; + + /* ASSERT that the context won't leave the runpool */ + KBASE_DEBUG_ASSERT(kbasep_js_debug_check_ctx_refcount(kbdev, kctx) > 0); + + /* decode the fault status */ + exception_type = as->fault_status & 0xFF; + access_type = (as->fault_status >> 8) & 0x3; + source_id = (as->fault_status >> 16); + + /* terminal fault, print info about the fault */ + dev_err(kbdev->dev, + "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status 0x%X\n" + "decoded fault status: %s\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n" + "pid: %d\n", + as_no, as->fault_addr, + reason_str, + as->fault_status, + (as->fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), + exception_type, kbase_exception_name(kbdev, exception_type), + access_type, access_type_name(kbdev, as->fault_status), + source_id, + kctx->pid); + + /* hardware counters dump fault handling */ + if ((kbdev->hwcnt.kctx) && (kbdev->hwcnt.kctx->as_nr == as_no) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) { + unsigned int num_core_groups = kbdev->gpu_props.num_core_groups; + + if ((as->fault_addr >= kbdev->hwcnt.addr) && + (as->fault_addr < (kbdev->hwcnt.addr + + (num_core_groups * 2048)))) + kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT; + } + + /* Stop the kctx from submitting more jobs and cause it to be scheduled + * out/rescheduled - this will occur on releasing the context's refcount */ + spin_lock_irqsave(&js_devdata->runpool_irq.lock, flags); + kbasep_js_clear_submit_allowed(js_devdata, kctx); + spin_unlock_irqrestore(&js_devdata->runpool_irq.lock, flags); + + /* Kill any running jobs from the context. Submit is disallowed, so no more jobs from this + * context can appear in the job slots from this point on */ + kbase_backend_jm_kill_jobs_from_kctx(kctx); + /* AS transaction begin */ + mutex_lock(&as->transaction_mutex); +#if KBASE_GPU_RESET_EN + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245)) { + /* Due to H/W issue 8245 we need to reset the GPU after using UNMAPPED mode. + * We start the reset before switching to UNMAPPED to ensure that unrelated jobs + * are evicted from the GPU before the switch. + */ + dev_err(kbdev->dev, "Unhandled page fault. For this GPU version we now soft-reset the GPU as part of page fault recovery."); + reset_status = kbase_prepare_to_reset_gpu(kbdev); + } +#endif /* KBASE_GPU_RESET_EN */ + /* switch to UNMAPPED mode, will abort all jobs and stop any hw counter dumping */ + kbase_mmu_disable_as(kbdev, as_no); + + mutex_unlock(&as->transaction_mutex); + /* AS transaction end */ + /* Clear down the fault */ + kbase_mmu_hw_clear_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); + kbase_mmu_hw_enable_fault(kbdev, as, kctx, KBASE_MMU_FAULT_TYPE_PAGE); + +#if KBASE_GPU_RESET_EN + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8245) && reset_status) + kbase_reset_gpu(kbdev); +#endif /* KBASE_GPU_RESET_EN */ +} + +void kbasep_as_do_poke(struct work_struct *work) +{ + struct kbase_as *as; + struct kbase_device *kbdev; + struct kbase_context *kctx; + unsigned long flags; + + KBASE_DEBUG_ASSERT(work); + as = container_of(work, struct kbase_as, poke_work); + kbdev = container_of(as, struct kbase_device, as[as->number]); + KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); + + /* GPU power will already be active by virtue of the caller holding a JS + * reference on the address space, and will not release it until this worker + * has finished */ + + /* Further to the comment above, we know that while this function is running + * the AS will not be released as before the atom is released this workqueue + * is flushed (in kbase_as_poking_timer_release_atom) + */ + kctx = kbasep_js_runpool_lookup_ctx_noretain(kbdev, as->number); + + /* AS transaction begin */ + mutex_lock(&as->transaction_mutex); + /* Force a uTLB invalidate */ + kbase_mmu_hw_do_operation(kbdev, as, kctx, 0, 0, + AS_COMMAND_UNLOCK, 0); + mutex_unlock(&as->transaction_mutex); + /* AS transaction end */ + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + if (as->poke_refcount && + !(as->poke_state & KBASE_AS_POKE_STATE_KILLING_POKE)) { + /* Only queue up the timer if we need it, and we're not trying to kill it */ + hrtimer_start(&as->poke_timer, HR_TIMER_DELAY_MSEC(5), HRTIMER_MODE_REL); + } + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); +} + +enum hrtimer_restart kbasep_as_poke_timer_callback(struct hrtimer *timer) +{ + struct kbase_as *as; + int queue_work_ret; + + KBASE_DEBUG_ASSERT(NULL != timer); + as = container_of(timer, struct kbase_as, poke_timer); + KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); + + queue_work_ret = queue_work(as->poke_wq, &as->poke_work); + KBASE_DEBUG_ASSERT(queue_work_ret); + return HRTIMER_NORESTART; +} + +/** + * Retain the poking timer on an atom's context (if the atom hasn't already + * done so), and start the timer (if it's not already started). + * + * This must only be called on a context that's scheduled in, and an atom + * that's running on the GPU. + * + * The caller must hold kbasep_js_device_data::runpool_irq::lock + * + * This can be called safely from atomic context + */ +void kbase_as_poking_timer_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + struct kbase_as *as; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if (katom->poking) + return; + + katom->poking = 1; + + /* It's safe to work on the as/as_nr without an explicit reference, + * because the caller holds the runpool_irq lock, and the atom itself + * was also running and had already taken a reference */ + as = &kbdev->as[kctx->as_nr]; + + if (++(as->poke_refcount) == 1) { + /* First refcount for poke needed: check if not already in flight */ + if (!as->poke_state) { + /* need to start poking */ + as->poke_state |= KBASE_AS_POKE_STATE_IN_FLIGHT; + queue_work(as->poke_wq, &as->poke_work); + } + } +} + +/** + * If an atom holds a poking timer, release it and wait for it to finish + * + * This must only be called on a context that's scheduled in, and an atom + * that still has a JS reference on the context + * + * This must \b not be called from atomic context, since it can sleep. + */ +void kbase_as_poking_timer_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom) +{ + struct kbase_as *as; + unsigned long flags; + + KBASE_DEBUG_ASSERT(kbdev); + KBASE_DEBUG_ASSERT(kctx); + KBASE_DEBUG_ASSERT(katom); + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + if (!katom->poking) + return; + + as = &kbdev->as[kctx->as_nr]; + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + KBASE_DEBUG_ASSERT(as->poke_refcount > 0); + KBASE_DEBUG_ASSERT(as->poke_state & KBASE_AS_POKE_STATE_IN_FLIGHT); + + if (--(as->poke_refcount) == 0) { + as->poke_state |= KBASE_AS_POKE_STATE_KILLING_POKE; + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + + hrtimer_cancel(&as->poke_timer); + flush_workqueue(as->poke_wq); + + spin_lock_irqsave(&kbdev->js_data.runpool_irq.lock, flags); + + /* Re-check whether it's still needed */ + if (as->poke_refcount) { + int queue_work_ret; + /* Poking still needed: + * - Another retain will not be starting the timer or queueing work, + * because it's still marked as in-flight + * - The hrtimer has finished, and has not started a new timer or + * queued work because it's been marked as killing + * + * So whatever happens now, just queue the work again */ + as->poke_state &= ~((kbase_as_poke_state)KBASE_AS_POKE_STATE_KILLING_POKE); + queue_work_ret = queue_work(as->poke_wq, &as->poke_work); + KBASE_DEBUG_ASSERT(queue_work_ret); + } else { + /* It isn't - so mark it as not in flight, and not killing */ + as->poke_state = 0u; + + /* The poke associated with the atom has now finished. If this is + * also the last atom on the context, then we can guarentee no more + * pokes (and thus no more poking register accesses) will occur on + * the context until new atoms are run */ + } + } + spin_unlock_irqrestore(&kbdev->js_data.runpool_irq.lock, flags); + + katom->poking = 0; +} + +void kbase_mmu_interrupt_process(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_as *as) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if (kctx == NULL) { + dev_warn(kbdev->dev, "%s in AS%d at 0x%016llx with no context present! Suprious IRQ or SW Design Error?\n", + kbase_as_has_bus_fault(as) ? "Bus error" : "Page fault", + as->number, as->fault_addr); + } + + if (kbase_as_has_bus_fault(as)) { + if (kctx) { + /* + * hw counters dumping in progress, signal the + * other thread that it failed + */ + if ((kbdev->hwcnt.kctx == kctx) && + (kbdev->hwcnt.backend.state == + KBASE_INSTR_STATE_DUMPING)) + kbdev->hwcnt.backend.state = + KBASE_INSTR_STATE_FAULT; + + /* + * Stop the kctx from submitting more jobs and cause it + * to be scheduled out/rescheduled when all references + * to it are released + */ + kbasep_js_clear_submit_allowed(js_devdata, kctx); + + dev_warn(kbdev->dev, "Bus error in AS%d at 0x%016llx\n", + as->number, as->fault_addr); + + } + + /* + * We need to switch to UNMAPPED mode - but we do this in a + * worker so that we can sleep + */ + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_busfault)); + WARN_ON(work_pending(&as->work_busfault)); + INIT_WORK(&as->work_busfault, bus_fault_worker); + queue_work(as->pf_wq, &as->work_busfault); + atomic_inc(&kbdev->faults_pending); + } else { + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&as->work_pagefault)); + WARN_ON(work_pending(&as->work_pagefault)); + INIT_WORK(&as->work_pagefault, page_fault_worker); + queue_work(as->pf_wq, &as->work_pagefault); + atomic_inc(&kbdev->faults_pending); + } +} + +void kbase_flush_mmu_wqs(struct kbase_device *kbdev) +{ + int i; + + for (i = 0; i < kbdev->nr_hw_address_spaces; i++) { + struct kbase_as *as = &kbdev->as[i]; + + flush_workqueue(as->pf_wq); + } +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h new file mode 100644 index 00000000000..c6478b2e0a6 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_hw.h @@ -0,0 +1,121 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file + * Interface file for accessing MMU hardware functionality + */ + +/** + * @page mali_kbase_mmu_hw_page MMU hardware interface + * + * @section mali_kbase_mmu_hw_intro_sec Introduction + * This module provides an abstraction for accessing the functionality provided + * by the midgard MMU and thus allows all MMU HW access to be contained within + * one common place and allows for different backends (implementations) to + * be provided. + */ + +#ifndef _MALI_KBASE_MMU_HW_H_ +#define _MALI_KBASE_MMU_HW_H_ + +/* Forward declarations */ +struct kbase_device; +struct kbase_as; +struct kbase_context; + +/** + * @addtogroup base_kbase_api + * @{ + */ + +/** + * @addtogroup mali_kbase_mmu_hw MMU access APIs + * @{ + */ + +/** @brief MMU fault type descriptor. + */ +enum kbase_mmu_fault_type { + KBASE_MMU_FAULT_TYPE_UNKNOWN = 0, + KBASE_MMU_FAULT_TYPE_PAGE, + KBASE_MMU_FAULT_TYPE_BUS +}; + +/** @brief Configure an address space for use. + * + * Configure the MMU using the address space details setup in the + * @ref kbase_context structure. + * + * @param[in] kbdev kbase device to configure. + * @param[in] as address space to configure. + * @param[in] kctx kbase context to configure. + */ +void kbase_mmu_hw_configure(struct kbase_device *kbdev, + struct kbase_as *as, struct kbase_context *kctx); + +/** @brief Issue an operation to the MMU. + * + * Issue an operation (MMU invalidate, MMU flush, etc) on the address space that + * is associated with the provided @ref kbase_context over the specified range + * + * @param[in] kbdev kbase device to issue the MMU operation on. + * @param[in] as address space to issue the MMU operation on. + * @param[in] kctx kbase context to issue the MMU operation on. + * @param[in] vpfn MMU Virtual Page Frame Number to start the + * operation on. + * @param[in] nr Number of pages to work on. + * @param[in] type Operation type (written to ASn_COMMAND). + * @param[in] handling_irq Is this operation being called during the handling + * of an interrupt? + * + * @return Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx, u64 vpfn, u32 nr, u32 type, + unsigned int handling_irq); + +/** @brief Clear a fault that has been previously reported by the MMU. + * + * Clear a bus error or page fault that has been reported by the MMU. + * + * @param[in] kbdev kbase device to clear the fault from. + * @param[in] as address space to clear the fault from. + * @param[in] kctx kbase context to clear the fault from or NULL. + * @param[in] type The type of fault that needs to be cleared. + */ +void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx, enum kbase_mmu_fault_type type); + +/** @brief Enable fault that has been previously reported by the MMU. + * + * After a page fault or bus error has been reported by the MMU these + * will be disabled. After these are handled this function needs to be + * called to enable the page fault or bus error fault again. + * + * @param[in] kbdev kbase device to again enable the fault from. + * @param[in] as address space to again enable the fault from. + * @param[in] kctx kbase context to again enable the fault from. + * @param[in] type The type of fault that needs to be enabled again. + */ +void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, + struct kbase_context *kctx, enum kbase_mmu_fault_type type); + +/** @} *//* end group mali_kbase_mmu_hw */ +/** @} *//* end group base_kbase_api */ + +#endif /* _MALI_KBASE_MMU_HW_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h new file mode 100644 index 00000000000..26e683e2960 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode.h @@ -0,0 +1,44 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _MALI_KBASE_MMU_MODE_ +#define _MALI_KBASE_MMU_MODE_ + +#include <linux/types.h> + +/* Forward declarations */ +struct kbase_context; +struct kbase_device; +struct kbase_as; + +struct kbase_mmu_mode { + void (*update)(struct kbase_context *kctx); + void (*disable_as)(struct kbase_device *kbdev, int as_nr); + phys_addr_t (*pte_to_phy_addr)(u64 entry); + int (*ate_is_valid)(u64 ate); + int (*pte_is_valid)(u64 pte); + void (*entry_set_ate)(u64 *entry, phys_addr_t phy, unsigned long flags); + void (*entry_set_pte)(u64 *entry, phys_addr_t phy); + void (*entry_invalidate)(u64 *entry); +}; + +struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void); +struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void); + +#endif /* _MALI_KBASE_MMU_MODE_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c new file mode 100644 index 00000000000..2211a1dcf01 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_mmu_mode_lpae.c @@ -0,0 +1,188 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include "mali_kbase_mmu_mode.h" + +#include "mali_kbase.h" +#include "mali_midg_regmap.h" + +#define ENTRY_TYPE_MASK 3ULL +#define ENTRY_IS_ATE 1ULL +#define ENTRY_IS_INVAL 2ULL +#define ENTRY_IS_PTE 3ULL + +#define ENTRY_ATTR_BITS (7ULL << 2) /* bits 4:2 */ +#define ENTRY_RD_BIT (1ULL << 6) +#define ENTRY_WR_BIT (1ULL << 7) +#define ENTRY_SHARE_BITS (3ULL << 8) /* bits 9:8 */ +#define ENTRY_ACCESS_BIT (1ULL << 10) +#define ENTRY_NX_BIT (1ULL << 54) + +#define ENTRY_FLAGS_MASK (ENTRY_ATTR_BITS | ENTRY_RD_BIT | ENTRY_WR_BIT | \ + ENTRY_SHARE_BITS | ENTRY_ACCESS_BIT | ENTRY_NX_BIT) + +/* Helper Function to perform assignment of page table entries, to + * ensure the use of strd, which is required on LPAE systems. + */ +static inline void page_table_entry_set(u64 *pte, u64 phy) +{ +#ifdef CONFIG_64BIT + *pte = phy; +#elif defined(CONFIG_ARM) + /* + * In order to prevent the compiler keeping cached copies of + * memory, we have to explicitly say that we have updated + * memory. + * + * Note: We could manually move the data ourselves into R0 and + * R1 by specifying register variables that are explicitly + * given registers assignments, the down side of this is that + * we have to assume cpu endianness. To avoid this we can use + * the ldrd to read the data from memory into R0 and R1 which + * will respect the cpu endianness, we then use strd to make + * the 64 bit assignment to the page table entry. + */ + asm volatile("ldrd r0, r1, [%[ptemp]]\n\t" + "strd r0, r1, [%[pte]]\n\t" + : "=m" (*pte) + : [ptemp] "r" (&phy), [pte] "r" (pte), "m" (phy) + : "r0", "r1"); +#else +#error "64-bit atomic write must be implemented for your architecture" +#endif +} + +static void mmu_update(struct kbase_context *kctx) +{ + struct kbase_device * const kbdev = kctx->kbdev; + struct kbase_as * const as = &kbdev->as[kctx->as_nr]; + struct kbase_mmu_setup * const current_setup = &as->current_setup; + + /* Set up the required caching policies at the correct indices + * in the memattr register. */ + current_setup->memattr = + (AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY << + (AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY * 8)) | + (AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL << + (AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL * 8)) | + (AS_MEMATTR_LPAE_WRITE_ALLOC << + (AS_MEMATTR_INDEX_WRITE_ALLOC * 8)) | + 0; /* The other indices are unused for now */ + + current_setup->transtab = (u64)kctx->pgd & + ((0xFFFFFFFFULL << 32) | AS_TRANSTAB_LPAE_ADDR_SPACE_MASK); + + current_setup->transtab |= AS_TRANSTAB_LPAE_ADRMODE_TABLE; + current_setup->transtab |= AS_TRANSTAB_LPAE_READ_INNER; + + + /* Apply the address space setting */ + kbase_mmu_hw_configure(kbdev, as, kctx); +} + +static void mmu_disable_as(struct kbase_device *kbdev, int as_nr) +{ + struct kbase_as * const as = &kbdev->as[as_nr]; + struct kbase_mmu_setup * const current_setup = &as->current_setup; + + current_setup->transtab = AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED; + + + /* Apply the address space setting */ + kbase_mmu_hw_configure(kbdev, as, NULL); +} + +static phys_addr_t pte_to_phy_addr(u64 entry) +{ + if (!(entry & 1)) + return 0; + + return entry & ~0xFFF; +} + +static int ate_is_valid(u64 ate) +{ + return ((ate & ENTRY_TYPE_MASK) == ENTRY_IS_ATE); +} + +static int pte_is_valid(u64 pte) +{ + return ((pte & ENTRY_TYPE_MASK) == ENTRY_IS_PTE); +} + +/* + * Map KBASE_REG flags to MMU flags + */ +static u64 get_mmu_flags(unsigned long flags) +{ + u64 mmu_flags; + + /* store mem_attr index as 4:2 (macro called ensures 3 bits already) */ + mmu_flags = KBASE_REG_MEMATTR_VALUE(flags) << 2; + + /* write perm if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_WR) ? ENTRY_WR_BIT : 0; + /* read perm if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_RD) ? ENTRY_RD_BIT : 0; + /* nx if requested */ + mmu_flags |= (flags & KBASE_REG_GPU_NX) ? ENTRY_NX_BIT : 0; + + if (flags & KBASE_REG_SHARE_BOTH) { + /* inner and outer shareable */ + mmu_flags |= SHARE_BOTH_BITS; + } else if (flags & KBASE_REG_SHARE_IN) { + /* inner shareable coherency */ + mmu_flags |= SHARE_INNER_BITS; + } + + return mmu_flags; +} + +static void entry_set_ate(u64 *entry, phys_addr_t phy, unsigned long flags) +{ + page_table_entry_set(entry, (phy & ~0xFFF) | + get_mmu_flags(flags) | + ENTRY_IS_ATE); +} + +static void entry_set_pte(u64 *entry, phys_addr_t phy) +{ + page_table_entry_set(entry, (phy & ~0xFFF) | ENTRY_IS_PTE); +} + +static void entry_invalidate(u64 *entry) +{ + page_table_entry_set(entry, ENTRY_IS_INVAL); +} + +static struct kbase_mmu_mode const lpae_mode = { + .update = mmu_update, + .disable_as = mmu_disable_as, + .pte_to_phy_addr = pte_to_phy_addr, + .ate_is_valid = ate_is_valid, + .pte_is_valid = pte_is_valid, + .entry_set_ate = entry_set_ate, + .entry_set_pte = entry_set_pte, + .entry_invalidate = entry_invalidate +}; + +struct kbase_mmu_mode const *kbase_mmu_mode_get_lpae(void) +{ + return &lpae_mode; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c new file mode 100644 index 00000000000..5bbd6d48563 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_platform_fake.c @@ -0,0 +1,126 @@ +/* + * + * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifdef CONFIG_MALI_PLATFORM_FAKE + +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/ioport.h> +#include <linux/platform_device.h> +#include <linux/string.h> + +#ifdef CONFIG_MACH_MANTA +#include <plat/devs.h> +#endif + +/* + * This file is included only for type definitions and functions belonging to + * specific platform folders. Do not add dependencies with symbols that are + * defined somewhere else. + */ +#include <mali_kbase_config.h> + +#define PLATFORM_CONFIG_RESOURCE_COUNT 4 +#define PLATFORM_CONFIG_IRQ_RES_COUNT 3 + +static struct platform_device *mali_device; + +#ifndef CONFIG_OF +/** + * @brief Convert data in struct kbase_io_resources struct to Linux-specific resources + * + * Function converts data in struct kbase_io_resources struct to an array of Linux resource structures. Note that function + * assumes that size of linux_resource array is at least PLATFORM_CONFIG_RESOURCE_COUNT. + * Resources are put in fixed order: I/O memory region, job IRQ, MMU IRQ, GPU IRQ. + * + * @param[in] io_resource Input IO resource data + * @param[out] linux_resources Pointer to output array of Linux resource structures + */ +static void kbasep_config_parse_io_resources(const struct kbase_io_resources *io_resources, struct resource *const linux_resources) +{ + if (!io_resources || !linux_resources) { + pr_err("%s: couldn't find proper resources\n", __func__); + return; + } + + memset(linux_resources, 0, PLATFORM_CONFIG_RESOURCE_COUNT * sizeof(struct resource)); + + linux_resources[0].start = io_resources->io_memory_region.start; + linux_resources[0].end = io_resources->io_memory_region.end; + linux_resources[0].flags = IORESOURCE_MEM; + linux_resources[1].start = io_resources->job_irq_number; + linux_resources[1].end = io_resources->job_irq_number; + linux_resources[1].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; + + linux_resources[2].start = io_resources->mmu_irq_number; + linux_resources[2].end = io_resources->mmu_irq_number; + linux_resources[2].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; + + linux_resources[3].start = io_resources->gpu_irq_number; + linux_resources[3].end = io_resources->gpu_irq_number; + linux_resources[3].flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL; +} +#endif /* CONFIG_OF */ + +int kbase_platform_fake_register(void) +{ + struct kbase_platform_config *config; +#ifndef CONFIG_OF + struct resource resources[PLATFORM_CONFIG_RESOURCE_COUNT]; +#endif + int err; + + config = kbase_get_platform_config(); /* declared in midgard/mali_kbase_config.h but defined in platform folder */ + if (config == NULL) { + pr_err("%s: couldn't get platform config\n", __func__); + return -ENODEV; + } + + mali_device = platform_device_alloc("mali", 0); + if (mali_device == NULL) + return -ENOMEM; + +#ifndef CONFIG_OF + kbasep_config_parse_io_resources(config->io_resources, resources); + err = platform_device_add_resources(mali_device, resources, PLATFORM_CONFIG_RESOURCE_COUNT); + if (err) { + platform_device_put(mali_device); + mali_device = NULL; + return err; + } +#endif /* CONFIG_OF */ + + err = platform_device_add(mali_device); + if (err) { + platform_device_unregister(mali_device); + mali_device = NULL; + return err; + } + + return 0; +} +EXPORT_SYMBOL(kbase_platform_fake_register); + +void kbase_platform_fake_unregister(void) +{ + if (mali_device) + platform_device_unregister(mali_device); +} +EXPORT_SYMBOL(kbase_platform_fake_unregister); + +#endif /* CONFIG_MALI_PLATFORM_FAKE */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.c b/drivers/gpu/arm/midgard/mali_kbase_pm.c new file mode 100644 index 00000000000..261441fa145 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm.c @@ -0,0 +1,204 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm.c + * Base kernel power management APIs + */ +#include <mali_kbase.h> +#include <mali_midg_regmap.h> +#include <mali_kbase_config_defaults.h> +#include <mali_kbase_instr.h> + +#include <mali_kbase_pm.h> + +int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags) +{ + return kbase_hwaccess_pm_powerup(kbdev, flags); +} + +void kbase_pm_halt(struct kbase_device *kbdev) +{ + kbase_hwaccess_pm_halt(kbdev); +} + +void kbase_pm_context_active(struct kbase_device *kbdev) +{ + (void)kbase_pm_context_active_handle_suspend(kbdev, KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE); +} + +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + int c; + int old_count; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + /* Trace timeline information about how long it took to handle the decision + * to powerup. Sometimes the event might be missed due to reading the count + * outside of mutex, but this is necessary to get the trace timing + * correct. */ + old_count = kbdev->pm.active_count; + if (old_count == 0) + kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); + + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + if (kbase_pm_is_suspending(kbdev)) { + switch (suspend_handler) { + case KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE: + if (kbdev->pm.active_count != 0) + break; + /* FALLTHROUGH */ + case KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE: + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + if (old_count == 0) + kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); + return 1; + + case KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE: + /* FALLTHROUGH */ + default: + KBASE_DEBUG_ASSERT_MSG(false, "unreachable"); + break; + } + } + c = ++kbdev->pm.active_count; + KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c); + KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_ACTIVE, NULL, NULL, 0u, c); + + /* Trace the event being handled */ + if (old_count == 0) + kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_ACTIVE); + + if (c == 1) + /* First context active: Power on the GPU and any cores requested by + * the policy */ + kbase_hwaccess_pm_gpu_active(kbdev); + + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); + + return 0; +} + +KBASE_EXPORT_TEST_API(kbase_pm_context_active); + +void kbase_pm_context_idle(struct kbase_device *kbdev) +{ + struct kbasep_js_device_data *js_devdata = &kbdev->js_data; + int c; + int old_count; + + KBASE_DEBUG_ASSERT(kbdev != NULL); + + /* Trace timeline information about how long it took to handle the decision + * to powerdown. Sometimes the event might be missed due to reading the + * count outside of mutex, but this is necessary to get the trace timing + * correct. */ + old_count = kbdev->pm.active_count; + if (old_count == 0) + kbase_timeline_pm_send_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE); + + mutex_lock(&js_devdata->runpool_mutex); + mutex_lock(&kbdev->pm.lock); + + c = --kbdev->pm.active_count; + KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, c); + KBASE_TRACE_ADD_REFCOUNT(kbdev, PM_CONTEXT_IDLE, NULL, NULL, 0u, c); + + KBASE_DEBUG_ASSERT(c >= 0); + + /* Trace the event being handled */ + if (old_count == 0) + kbase_timeline_pm_handle_event(kbdev, KBASE_TIMELINE_PM_EVENT_GPU_IDLE); + + if (c == 0) { + /* Last context has gone idle */ + kbase_hwaccess_pm_gpu_idle(kbdev); + + /* Wake up anyone waiting for this to become 0 (e.g. suspend). The + * waiters must synchronize with us by locking the pm.lock after + * waiting */ + wake_up(&kbdev->pm.zero_active_count_wait); + } + + mutex_unlock(&kbdev->pm.lock); + mutex_unlock(&js_devdata->runpool_mutex); +} + +KBASE_EXPORT_TEST_API(kbase_pm_context_idle); + +void kbase_pm_suspend(struct kbase_device *kbdev) +{ + KBASE_DEBUG_ASSERT(kbdev); + + mutex_lock(&kbdev->pm.lock); + KBASE_DEBUG_ASSERT(!kbase_pm_is_suspending(kbdev)); + kbdev->pm.suspending = true; + mutex_unlock(&kbdev->pm.lock); + + /* From now on, the active count will drop towards zero. Sometimes, it'll + * go up briefly before going down again. However, once it reaches zero it + * will stay there - guaranteeing that we've idled all pm references */ + + /* Suspend job scheduler and associated components, so that it releases all + * the PM active count references */ + kbasep_js_suspend(kbdev); + + /* Suspend any counter collection that might be happening */ + kbase_instr_hwcnt_suspend(kbdev); + + /* Wait for the active count to reach zero. This is not the same as + * waiting for a power down, since not all policies power down when this + * reaches zero. */ + wait_event(kbdev->pm.zero_active_count_wait, kbdev->pm.active_count == 0); + + /* NOTE: We synchronize with anything that was just finishing a + * kbase_pm_context_idle() call by locking the pm.lock below */ + + kbase_hwaccess_pm_suspend(kbdev); +} + +void kbase_pm_resume(struct kbase_device *kbdev) +{ + /* MUST happen before any pm_context_active calls occur */ + kbase_hwaccess_pm_resume(kbdev); + + /* Initial active call, to power on the GPU/cores if needed */ + kbase_pm_context_active(kbdev); + + /* Re-enable instrumentation, if it was previously disabled */ + kbase_instr_hwcnt_resume(kbdev); + + /* Resume any blocked atoms (which may cause contexts to be scheduled in + * and dependent atoms to run) */ + kbase_resume_suspended_soft_jobs(kbdev); + + /* Resume the Job Scheduler and associated components, and start running + * atoms */ + kbasep_js_resume(kbdev); + + /* Matching idle call, to power off the GPU/cores if we didn't actually + * need it and the policy doesn't want it on */ + kbase_pm_context_idle(kbdev); +} + diff --git a/drivers/gpu/arm/midgard/mali_kbase_pm.h b/drivers/gpu/arm/midgard/mali_kbase_pm.h new file mode 100644 index 00000000000..37fa2479df7 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_pm.h @@ -0,0 +1,171 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_pm.h + * Power management API definitions + */ + +#ifndef _KBASE_PM_H_ +#define _KBASE_PM_H_ + +#include "mali_kbase_hwaccess_pm.h" + +#define PM_ENABLE_IRQS 0x01 +#define PM_HW_ISSUES_DETECT 0x02 + + +/** Initialize the power management framework. + * + * Must be called before any other power management function + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @return 0 if the power management framework was successfully initialized. + */ +int kbase_pm_init(struct kbase_device *kbdev); + +/** Power up GPU after all modules have been initialized and interrupt handlers installed. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * + * @param flags Flags to pass on to kbase_pm_init_hw + * + * @return 0 if powerup was successful. + */ +int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags); + +/** + * Halt the power management framework. + * Should ensure that no new interrupts are generated, + * but allow any currently running interrupt handlers to complete successfully. + * The GPU is forced off by the time this function returns, regardless of + * whether or not the active power policy asks for the GPU to be powered off. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_halt(struct kbase_device *kbdev); + +/** Terminate the power management framework. + * + * No power management functions may be called after this + * (except @ref kbase_pm_init) + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_term(struct kbase_device *kbdev); + +/** Increment the count of active contexts. + * + * This function should be called when a context is about to submit a job. It informs the active power policy that the + * GPU is going to be in use shortly and the policy is expected to start turning on the GPU. + * + * This function will block until the GPU is available. + * + * This function ASSERTS if a suspend is occuring/has occurred whilst this is + * in use. Use kbase_pm_contect_active_unless_suspending() instead. + * + * @note a Suspend is only visible to Kernel threads; user-space threads in a + * syscall cannot witness a suspend, because they are frozen before the suspend + * begins. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_context_active(struct kbase_device *kbdev); + + +/** Handler codes for doing kbase_pm_context_active_handle_suspend() */ +enum kbase_pm_suspend_handler { + /** A suspend is not expected/not possible - this is the same as + * kbase_pm_context_active() */ + KBASE_PM_SUSPEND_HANDLER_NOT_POSSIBLE, + /** If we're suspending, fail and don't increase the active count */ + KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE, + /** If we're suspending, succeed and allow the active count to increase iff + * it didn't go from 0->1 (i.e., we didn't re-activate the GPU). + * + * This should only be used when there is a bounded time on the activation + * (e.g. guarantee it's going to be idled very soon after) */ + KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE +}; + +/** Suspend 'safe' variant of kbase_pm_context_active() + * + * If a suspend is in progress, this allows for various different ways of + * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details. + * + * We returns a status code indicating whether we're allowed to keep the GPU + * active during the suspend, depending on the handler code. If the status code + * indicates a failure, the caller must abort whatever operation it was + * attempting, and potentially queue it up for after the OS has resumed. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + * @param suspend_handler The handler code for how to handle a suspend that might occur + * @return zero Indicates success + * @return non-zero Indicates failure due to the system being suspending/suspended. + */ +int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler); + +/** Decrement the reference count of active contexts. + * + * This function should be called when a context becomes idle. After this call the GPU may be turned off by the power + * policy so the calling code should ensure that it does not access the GPU's registers. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_context_idle(struct kbase_device *kbdev); + +/** + * Suspend the GPU and prevent any further register accesses to it from Kernel + * threads. + * + * This is called in response to an OS suspend event, and calls into the various + * kbase components to complete the suspend. + * + * @note the mechanisms used here rely on all user-space threads being frozen + * by the OS before we suspend. Otherwise, an IOCTL could occur that powers up + * the GPU e.g. via atom submission. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_suspend(struct kbase_device *kbdev); + +/** + * Resume the GPU, allow register accesses to it, and resume running atoms on + * the GPU. + * + * This is called in response to an OS resume event, and calls into the various + * kbase components to complete the resume. + * + * @param kbdev The kbase device structure for the device (must be a valid pointer) + */ +void kbase_pm_resume(struct kbase_device *kbdev); + +/** + * kbase_pm_vsync_callback - vsync callback + * + * @buffer_updated: 1 if a new frame was displayed, 0 otherwise + * @data: Pointer to the kbase device as returned by kbase_find_device() + * + * Callback function used to notify the power management code that a vsync has + * occurred on the display. + */ +void kbase_pm_vsync_callback(int buffer_updated, void *data); + +#endif /* _KBASE_PM_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h new file mode 100644 index 00000000000..7fb674eded3 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_profiling_gator_api.h @@ -0,0 +1,40 @@ +/* + * + * (C) COPYRIGHT 2010, 2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_profiling_gator_api.h + * Model interface + */ + +#ifndef _KBASE_PROFILING_GATOR_API_H_ +#define _KBASE_PROFILING_GATOR_API_H_ + +/* + * List of possible actions to be controlled by Streamline. + * The following numbers are used by gator to control + * the frame buffer dumping and s/w counter reporting. + */ +#define FBDUMP_CONTROL_ENABLE (1) +#define FBDUMP_CONTROL_RATE (2) +#define SW_COUNTER_ENABLE (3) +#define FBDUMP_CONTROL_RESIZE_FACTOR (4) +#define FBDUMP_CONTROL_MAX (5) +#define FBDUMP_CONTROL_MIN FBDUMP_CONTROL_ENABLE + +void _mali_profiling_control(u32 action, u32 value); + +#endif /* _KBASE_PROFILING_GATOR_API */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_replay.c b/drivers/gpu/arm/midgard/mali_kbase_replay.c new file mode 100644 index 00000000000..b5c7758e1bb --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_replay.c @@ -0,0 +1,1134 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @file mali_kbase_replay.c + * Replay soft job handlers + */ + +#include <linux/dma-mapping.h> +#include <mali_kbase_config.h> +#include <mali_kbase.h> +#include <mali_kbase_mem.h> +#include <mali_kbase_mem_linux.h> + +#define JOB_NOT_STARTED 0 +#define JOB_TYPE_MASK 0xfe +#define JOB_TYPE_NULL (1 << 1) +#define JOB_TYPE_VERTEX (5 << 1) +#define JOB_TYPE_TILER (7 << 1) +#define JOB_TYPE_FUSED (8 << 1) +#define JOB_TYPE_FRAGMENT (9 << 1) + +#define JOB_FLAG_DESC_SIZE (1 << 0) +#define JOB_FLAG_PERFORM_JOB_BARRIER (1 << 8) + +#define JOB_HEADER_32_FBD_OFFSET (31*4) +#define JOB_HEADER_64_FBD_OFFSET (44*4) + +#define FBD_POINTER_MASK (~0x3f) + +#define SFBD_TILER_OFFSET (48*4) + +#define MFBD_TILER_OFFSET (14*4) + +#define FBD_HIERARCHY_WEIGHTS 8 +#define FBD_HIERARCHY_MASK_MASK 0x1fff + +#define FBD_TYPE 1 + +#define HIERARCHY_WEIGHTS 13 + +#define JOB_HEADER_ID_MAX 0xffff + +#define JOB_SOURCE_ID(status) (((status) >> 16) & 0xFFFF) +#define JOB_POLYGON_LIST (0x03) + +struct job_head { + u32 status; + u32 not_complete_index; + u64 fault_addr; + u16 flags; + u16 index; + u16 dependencies[2]; + union { + u64 _64; + u32 _32; + } next; + u32 x[2]; + union { + u64 _64; + u32 _32; + } fragment_fbd; +}; + +static void dump_job_head(struct kbase_context *kctx, char *head_str, + struct job_head *job) +{ +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kctx->kbdev->dev, "%s\n", head_str); + dev_dbg(kctx->kbdev->dev, "addr = %p\n" + "status = %x\n" + "not_complete_index = %x\n" + "fault_addr = %llx\n" + "flags = %x\n" + "index = %x\n" + "dependencies = %x,%x\n", + job, job->status, job->not_complete_index, + job->fault_addr, job->flags, job->index, + job->dependencies[0], + job->dependencies[1]); + + if (job->flags & JOB_FLAG_DESC_SIZE) + dev_dbg(kctx->kbdev->dev, "next = %llx\n", + job->next._64); + else + dev_dbg(kctx->kbdev->dev, "next = %x\n", + job->next._32); +#endif +} + +static int kbasep_replay_reset_sfbd(struct kbase_context *kctx, + u64 fbd_address, u64 tiler_heap_free, + u16 hierarchy_mask, u32 default_weight) +{ + struct { + u32 padding_1[1]; + u32 flags; + u64 padding_2[2]; + u64 heap_free_address; + u32 padding[8]; + u32 weights[FBD_HIERARCHY_WEIGHTS]; + } *fbd_tiler; + struct kbase_vmap_struct map; + + dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address); + + fbd_tiler = kbase_vmap(kctx, fbd_address + SFBD_TILER_OFFSET, + sizeof(*fbd_tiler), &map); + if (!fbd_tiler) { + dev_err(kctx->kbdev->dev, "kbasep_replay_reset_fbd: failed to map fbd\n"); + return -EINVAL; + } + +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kctx->kbdev->dev, + "FBD tiler:\n" + "flags = %x\n" + "heap_free_address = %llx\n", + fbd_tiler->flags, fbd_tiler->heap_free_address); +#endif + if (hierarchy_mask) { + u32 weights[HIERARCHY_WEIGHTS]; + u16 old_hierarchy_mask = fbd_tiler->flags & + FBD_HIERARCHY_MASK_MASK; + int i, j = 0; + + for (i = 0; i < HIERARCHY_WEIGHTS; i++) { + if (old_hierarchy_mask & (1 << i)) { + KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); + weights[i] = fbd_tiler->weights[j++]; + } else { + weights[i] = default_weight; + } + } + + + dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x New hierarchy mask=%x\n", + old_hierarchy_mask, hierarchy_mask); + + for (i = 0; i < HIERARCHY_WEIGHTS; i++) + dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n", + i, weights[i]); + + j = 0; + + for (i = 0; i < HIERARCHY_WEIGHTS; i++) { + if (hierarchy_mask & (1 << i)) { + KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); + + dev_dbg(kctx->kbdev->dev, " Writing hierarchy level %02d (%08x) to %d\n", + i, weights[i], j); + + fbd_tiler->weights[j++] = weights[i]; + } + } + + for (; j < FBD_HIERARCHY_WEIGHTS; j++) + fbd_tiler->weights[j] = 0; + + fbd_tiler->flags = hierarchy_mask | (1 << 16); + } + + fbd_tiler->heap_free_address = tiler_heap_free; + + dev_dbg(kctx->kbdev->dev, "heap_free_address=%llx flags=%x\n", + fbd_tiler->heap_free_address, fbd_tiler->flags); + + kbase_vunmap(kctx, &map); + + return 0; +} + +static int kbasep_replay_reset_mfbd(struct kbase_context *kctx, + u64 fbd_address, u64 tiler_heap_free, + u16 hierarchy_mask, u32 default_weight) +{ + struct kbase_vmap_struct map; + struct { + u32 padding_0; + u32 flags; + u64 padding_1[2]; + u64 heap_free_address; + u64 padding_2; + u32 weights[FBD_HIERARCHY_WEIGHTS]; + } *fbd_tiler; + + dev_dbg(kctx->kbdev->dev, "fbd_address: %llx\n", fbd_address); + + fbd_tiler = kbase_vmap(kctx, fbd_address + MFBD_TILER_OFFSET, + sizeof(*fbd_tiler), &map); + if (!fbd_tiler) { + dev_err(kctx->kbdev->dev, + "kbasep_replay_reset_fbd: failed to map fbd\n"); + return -EINVAL; + } + +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kctx->kbdev->dev, "FBD tiler:\n" + "flags = %x\n" + "heap_free_address = %llx\n", + fbd_tiler->flags, + fbd_tiler->heap_free_address); +#endif + if (hierarchy_mask) { + u32 weights[HIERARCHY_WEIGHTS]; + u16 old_hierarchy_mask = (fbd_tiler->flags) & + FBD_HIERARCHY_MASK_MASK; + int i, j = 0; + + for (i = 0; i < HIERARCHY_WEIGHTS; i++) { + if (old_hierarchy_mask & (1 << i)) { + KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); + weights[i] = fbd_tiler->weights[j++]; + } else { + weights[i] = default_weight; + } + } + + + dev_dbg(kctx->kbdev->dev, "Old hierarchy mask=%x New hierarchy mask=%x\n", + old_hierarchy_mask, hierarchy_mask); + + for (i = 0; i < HIERARCHY_WEIGHTS; i++) + dev_dbg(kctx->kbdev->dev, " Hierarchy weight %02d: %08x\n", + i, weights[i]); + + j = 0; + + for (i = 0; i < HIERARCHY_WEIGHTS; i++) { + if (hierarchy_mask & (1 << i)) { + KBASE_DEBUG_ASSERT(j < FBD_HIERARCHY_WEIGHTS); + + dev_dbg(kctx->kbdev->dev, + " Writing hierarchy level %02d (%08x) to %d\n", + i, weights[i], j); + + fbd_tiler->weights[j++] = weights[i]; + } + } + + for (; j < FBD_HIERARCHY_WEIGHTS; j++) + fbd_tiler->weights[j] = 0; + + fbd_tiler->flags = hierarchy_mask | (1 << 16); + } + + fbd_tiler->heap_free_address = tiler_heap_free; + + kbase_vunmap(kctx, &map); + + return 0; +} + +/** + * @brief Reset the status of an FBD pointed to by a tiler job + * + * This performs two functions : + * - Set the hierarchy mask + * - Reset the tiler free heap address + * + * @param[in] kctx Context pointer + * @param[in] job_header Address of job header to reset. + * @param[in] tiler_heap_free The value to reset Tiler Heap Free to + * @param[in] hierarchy_mask The hierarchy mask to use + * @param[in] default_weight Default hierarchy weight to write when no other + * weight is given in the FBD + * @param[in] job_64 true if this job is using 64-bit + * descriptors + * + * @return 0 on success, error code on failure + */ +static int kbasep_replay_reset_tiler_job(struct kbase_context *kctx, + u64 job_header, u64 tiler_heap_free, + u16 hierarchy_mask, u32 default_weight, bool job_64) +{ + struct kbase_vmap_struct map; + u64 fbd_address; + + if (job_64) { + u64 *job_ext; + + job_ext = kbase_vmap(kctx, + job_header + JOB_HEADER_64_FBD_OFFSET, + sizeof(*job_ext), &map); + + if (!job_ext) { + dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n"); + return -EINVAL; + } + + fbd_address = *job_ext; + + kbase_vunmap(kctx, &map); + } else { + u32 *job_ext; + + job_ext = kbase_vmap(kctx, + job_header + JOB_HEADER_32_FBD_OFFSET, + sizeof(*job_ext), &map); + + if (!job_ext) { + dev_err(kctx->kbdev->dev, "kbasep_replay_reset_tiler_job: failed to map jc\n"); + return -EINVAL; + } + + fbd_address = *job_ext; + + kbase_vunmap(kctx, &map); + } + + if (fbd_address & FBD_TYPE) { + return kbasep_replay_reset_mfbd(kctx, + fbd_address & FBD_POINTER_MASK, + tiler_heap_free, + hierarchy_mask, + default_weight); + } else { + return kbasep_replay_reset_sfbd(kctx, + fbd_address & FBD_POINTER_MASK, + tiler_heap_free, + hierarchy_mask, + default_weight); + } +} + +/** + * @brief Reset the status of a job + * + * This performs the following functions : + * + * - Reset the Job Status field of each job to NOT_STARTED. + * - Set the Job Type field of any Vertex Jobs to Null Job. + * - For any jobs using an FBD, set the Tiler Heap Free field to the value of + * the tiler_heap_free parameter, and set the hierarchy level mask to the + * hier_mask parameter. + * - Offset HW dependencies by the hw_job_id_offset parameter + * - Set the Perform Job Barrier flag if this job is the first in the chain + * - Read the address of the next job header + * + * @param[in] kctx Context pointer + * @param[in,out] job_header Address of job header to reset. Set to address + * of next job header on exit. + * @param[in] prev_jc Previous job chain to link to, if this job is + * the last in the chain. + * @param[in] hw_job_id_offset Offset for HW job IDs + * @param[in] tiler_heap_free The value to reset Tiler Heap Free to + * @param[in] hierarchy_mask The hierarchy mask to use + * @param[in] default_weight Default hierarchy weight to write when no other + * weight is given in the FBD + * @param[in] first_in_chain true if this job is the first in the chain + * @param[in] fragment_chain true if this job is in the fragment chain + * + * @return 0 on success, error code on failure + */ +static int kbasep_replay_reset_job(struct kbase_context *kctx, + u64 *job_header, u64 prev_jc, + u64 tiler_heap_free, u16 hierarchy_mask, + u32 default_weight, u16 hw_job_id_offset, + bool first_in_chain, bool fragment_chain) +{ + struct job_head *job; + u64 new_job_header; + struct kbase_vmap_struct map; + + job = kbase_vmap(kctx, *job_header, sizeof(*job), &map); + if (!job) { + dev_err(kctx->kbdev->dev, + "kbasep_replay_parse_jc: failed to map jc\n"); + return -EINVAL; + } + + dump_job_head(kctx, "Job header:", job); + + if (job->status == JOB_NOT_STARTED && !fragment_chain) { + dev_err(kctx->kbdev->dev, "Job already not started\n"); + goto out_unmap; + } + job->status = JOB_NOT_STARTED; + + if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_VERTEX) + job->flags = (job->flags & ~JOB_TYPE_MASK) | JOB_TYPE_NULL; + + if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FUSED) { + dev_err(kctx->kbdev->dev, "Fused jobs can not be replayed\n"); + goto out_unmap; + } + + if (first_in_chain) + job->flags |= JOB_FLAG_PERFORM_JOB_BARRIER; + + if ((job->dependencies[0] + hw_job_id_offset) > JOB_HEADER_ID_MAX || + (job->dependencies[1] + hw_job_id_offset) > JOB_HEADER_ID_MAX || + (job->index + hw_job_id_offset) > JOB_HEADER_ID_MAX) { + dev_err(kctx->kbdev->dev, + "Job indicies/dependencies out of valid range\n"); + goto out_unmap; + } + + if (job->dependencies[0]) + job->dependencies[0] += hw_job_id_offset; + if (job->dependencies[1]) + job->dependencies[1] += hw_job_id_offset; + + job->index += hw_job_id_offset; + + if (job->flags & JOB_FLAG_DESC_SIZE) { + new_job_header = job->next._64; + if (!job->next._64) + job->next._64 = prev_jc; + } else { + new_job_header = job->next._32; + if (!job->next._32) + job->next._32 = prev_jc; + } + dump_job_head(kctx, "Updated to:", job); + + if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_TILER) { + bool job_64 = (job->flags & JOB_FLAG_DESC_SIZE) != 0; + + if (kbasep_replay_reset_tiler_job(kctx, *job_header, + tiler_heap_free, hierarchy_mask, + default_weight, job_64) != 0) + goto out_unmap; + + } else if ((job->flags & JOB_TYPE_MASK) == JOB_TYPE_FRAGMENT) { + u64 fbd_address; + + if (job->flags & JOB_FLAG_DESC_SIZE) + fbd_address = job->fragment_fbd._64; + else + fbd_address = (u64)job->fragment_fbd._32; + + if (fbd_address & FBD_TYPE) { + if (kbasep_replay_reset_mfbd(kctx, + fbd_address & FBD_POINTER_MASK, + tiler_heap_free, + hierarchy_mask, + default_weight) != 0) + goto out_unmap; + } else { + if (kbasep_replay_reset_sfbd(kctx, + fbd_address & FBD_POINTER_MASK, + tiler_heap_free, + hierarchy_mask, + default_weight) != 0) + goto out_unmap; + } + } + + kbase_vunmap(kctx, &map); + + *job_header = new_job_header; + + return 0; + +out_unmap: + kbase_vunmap(kctx, &map); + return -EINVAL; +} + +/** + * @brief Find the highest job ID in a job chain + * + * @param[in] kctx Context pointer + * @param[in] jc Job chain start address + * @param[out] hw_job_id Highest job ID in chain + * + * @return 0 on success, error code on failure + */ +static int kbasep_replay_find_hw_job_id(struct kbase_context *kctx, + u64 jc, u16 *hw_job_id) +{ + while (jc) { + struct job_head *job; + struct kbase_vmap_struct map; + + dev_dbg(kctx->kbdev->dev, + "kbasep_replay_find_hw_job_id: parsing jc=%llx\n", jc); + + job = kbase_vmap(kctx, jc, sizeof(*job), &map); + if (!job) { + dev_err(kctx->kbdev->dev, "failed to map jc\n"); + + return -EINVAL; + } + + if (job->index > *hw_job_id) + *hw_job_id = job->index; + + if (job->flags & JOB_FLAG_DESC_SIZE) + jc = job->next._64; + else + jc = job->next._32; + + kbase_vunmap(kctx, &map); + } + + return 0; +} + +/** + * @brief Reset the status of a number of jobs + * + * This function walks the provided job chain, and calls + * kbasep_replay_reset_job for each job. It also links the job chain to the + * provided previous job chain. + * + * The function will fail if any of the jobs passed already have status of + * NOT_STARTED. + * + * @param[in] kctx Context pointer + * @param[in] jc Job chain to be processed + * @param[in] prev_jc Job chain to be added to. May be NULL + * @param[in] tiler_heap_free The value to reset Tiler Heap Free to + * @param[in] hierarchy_mask The hierarchy mask to use + * @param[in] default_weight Default hierarchy weight to write when no other + * weight is given in the FBD + * @param[in] hw_job_id_offset Offset for HW job IDs + * @param[in] fragment_chain true if this chain is the fragment chain + * + * @return 0 on success, error code otherwise + */ +static int kbasep_replay_parse_jc(struct kbase_context *kctx, + u64 jc, u64 prev_jc, + u64 tiler_heap_free, u16 hierarchy_mask, + u32 default_weight, u16 hw_job_id_offset, + bool fragment_chain) +{ + bool first_in_chain = true; + int nr_jobs = 0; + + dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: jc=%llx hw_job_id=%x\n", + jc, hw_job_id_offset); + + while (jc) { + dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_jc: parsing jc=%llx\n", jc); + + if (kbasep_replay_reset_job(kctx, &jc, prev_jc, + tiler_heap_free, hierarchy_mask, + default_weight, hw_job_id_offset, + first_in_chain, fragment_chain) != 0) + return -EINVAL; + + first_in_chain = false; + + nr_jobs++; + if (fragment_chain && + nr_jobs >= BASE_JD_REPLAY_F_CHAIN_JOB_LIMIT) { + dev_err(kctx->kbdev->dev, + "Exceeded maximum number of jobs in fragment chain\n"); + return -EINVAL; + } + } + + return 0; +} + +/** + * @brief Reset the status of a replay job, and set up dependencies + * + * This performs the actions to allow the replay job to be re-run following + * completion of the passed dependency. + * + * @param[in] katom The atom to be reset + * @param[in] dep_atom The dependency to be attached to the atom + */ +static void kbasep_replay_reset_softjob(struct kbase_jd_atom *katom, + struct kbase_jd_atom *dep_atom) +{ + katom->status = KBASE_JD_ATOM_STATE_QUEUED; + kbase_jd_katom_dep_set(&katom->dep[0], dep_atom, BASE_JD_DEP_TYPE_DATA); + list_add_tail(&katom->dep_item[0], &dep_atom->dep_head[0]); +} + +/** + * @brief Allocate an unused katom + * + * This will search the provided context for an unused katom, and will mark it + * as KBASE_JD_ATOM_STATE_QUEUED. + * + * If no atoms are available then the function will fail. + * + * @param[in] kctx Context pointer + * @return An atom ID, or -1 on failure + */ +static int kbasep_allocate_katom(struct kbase_context *kctx) +{ + struct kbase_jd_context *jctx = &kctx->jctx; + int i; + + for (i = BASE_JD_ATOM_COUNT-1; i > 0; i--) { + if (jctx->atoms[i].status == KBASE_JD_ATOM_STATE_UNUSED) { + jctx->atoms[i].status = KBASE_JD_ATOM_STATE_QUEUED; + dev_dbg(kctx->kbdev->dev, + "kbasep_allocate_katom: Allocated atom %d\n", + i); + return i; + } + } + + return -1; +} + +/** + * @brief Release a katom + * + * This will mark the provided atom as available, and remove any dependencies. + * + * For use on error path. + * + * @param[in] kctx Context pointer + * @param[in] atom_id ID of atom to release + */ +static void kbasep_release_katom(struct kbase_context *kctx, int atom_id) +{ + struct kbase_jd_context *jctx = &kctx->jctx; + + dev_dbg(kctx->kbdev->dev, "kbasep_release_katom: Released atom %d\n", + atom_id); + + while (!list_empty(&jctx->atoms[atom_id].dep_head[0])) + list_del(jctx->atoms[atom_id].dep_head[0].next); + + while (!list_empty(&jctx->atoms[atom_id].dep_head[1])) + list_del(jctx->atoms[atom_id].dep_head[1].next); + + jctx->atoms[atom_id].status = KBASE_JD_ATOM_STATE_UNUSED; +} + +static void kbasep_replay_create_atom(struct kbase_context *kctx, + struct base_jd_atom_v2 *atom, + int atom_nr, + base_jd_prio prio) +{ + atom->nr_extres = 0; + atom->extres_list.value = NULL; + atom->device_nr = 0; + atom->prio = prio; + atom->atom_number = atom_nr; + + base_jd_atom_dep_set(&atom->pre_dep[0], 0 , BASE_JD_DEP_TYPE_INVALID); + base_jd_atom_dep_set(&atom->pre_dep[1], 0 , BASE_JD_DEP_TYPE_INVALID); + + atom->udata.blob[0] = 0; + atom->udata.blob[1] = 0; +} + +/** + * @brief Create two atoms for the purpose of replaying jobs + * + * Two atoms are allocated and created. The jc pointer is not set at this + * stage. The second atom has a dependency on the first. The remaining fields + * are set up as follows : + * + * - No external resources. Any required external resources will be held by the + * replay atom. + * - device_nr is set to 0. This is not relevant as + * BASE_JD_REQ_SPECIFIC_COHERENT_GROUP should not be set. + * - Priority is inherited from the replay job. + * + * @param[out] t_atom Atom to use for tiler jobs + * @param[out] f_atom Atom to use for fragment jobs + * @param[in] prio Priority of new atom (inherited from replay soft + * job) + * @return 0 on success, error code on failure + */ +static int kbasep_replay_create_atoms(struct kbase_context *kctx, + struct base_jd_atom_v2 *t_atom, + struct base_jd_atom_v2 *f_atom, + base_jd_prio prio) +{ + int t_atom_nr, f_atom_nr; + + t_atom_nr = kbasep_allocate_katom(kctx); + if (t_atom_nr < 0) { + dev_err(kctx->kbdev->dev, "Failed to allocate katom\n"); + return -EINVAL; + } + + f_atom_nr = kbasep_allocate_katom(kctx); + if (f_atom_nr < 0) { + dev_err(kctx->kbdev->dev, "Failed to allocate katom\n"); + kbasep_release_katom(kctx, t_atom_nr); + return -EINVAL; + } + + kbasep_replay_create_atom(kctx, t_atom, t_atom_nr, prio); + kbasep_replay_create_atom(kctx, f_atom, f_atom_nr, prio); + + base_jd_atom_dep_set(&f_atom->pre_dep[0], t_atom_nr , BASE_JD_DEP_TYPE_DATA); + + return 0; +} + +#ifdef CONFIG_MALI_DEBUG +static void payload_dump(struct kbase_context *kctx, base_jd_replay_payload *payload) +{ + u64 next; + + dev_dbg(kctx->kbdev->dev, "Tiler jc list :\n"); + next = payload->tiler_jc_list; + + while (next) { + struct kbase_vmap_struct map; + base_jd_replay_jc *jc_struct; + + jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &map); + + if (!jc_struct) + return; + + dev_dbg(kctx->kbdev->dev, "* jc_struct=%p jc=%llx next=%llx\n", + jc_struct, jc_struct->jc, jc_struct->next); + + next = jc_struct->next; + + kbase_vunmap(kctx, &map); + } +} +#endif + +/** + * @brief Parse a base_jd_replay_payload provided by userspace + * + * This will read the payload from userspace, and parse the job chains. + * + * @param[in] kctx Context pointer + * @param[in] replay_atom Replay soft job atom + * @param[in] t_atom Atom to use for tiler jobs + * @param[in] f_atom Atom to use for fragment jobs + * @return 0 on success, error code on failure + */ +static int kbasep_replay_parse_payload(struct kbase_context *kctx, + struct kbase_jd_atom *replay_atom, + struct base_jd_atom_v2 *t_atom, + struct base_jd_atom_v2 *f_atom) +{ + base_jd_replay_payload *payload; + u64 next; + u64 prev_jc = 0; + u16 hw_job_id_offset = 0; + int ret = -EINVAL; + struct kbase_vmap_struct map; + + dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: replay_atom->jc = %llx sizeof(payload) = %zu\n", + replay_atom->jc, sizeof(payload)); + + payload = kbase_vmap(kctx, replay_atom->jc, sizeof(*payload), &map); + + if (!payload) { + dev_err(kctx->kbdev->dev, "kbasep_replay_parse_payload: failed to map payload into kernel space\n"); + return -EINVAL; + } + +#ifdef CONFIG_MALI_DEBUG + dev_dbg(kctx->kbdev->dev, "kbasep_replay_parse_payload: payload=%p\n", payload); + dev_dbg(kctx->kbdev->dev, "Payload structure:\n" + "tiler_jc_list = %llx\n" + "fragment_jc = %llx\n" + "tiler_heap_free = %llx\n" + "fragment_hierarchy_mask = %x\n" + "tiler_hierarchy_mask = %x\n" + "hierarchy_default_weight = %x\n" + "tiler_core_req = %x\n" + "fragment_core_req = %x\n", + payload->tiler_jc_list, + payload->fragment_jc, + payload->tiler_heap_free, + payload->fragment_hierarchy_mask, + payload->tiler_hierarchy_mask, + payload->hierarchy_default_weight, + payload->tiler_core_req, + payload->fragment_core_req); + payload_dump(kctx, payload); +#endif + + t_atom->core_req = payload->tiler_core_req | BASEP_JD_REQ_EVENT_NEVER; + f_atom->core_req = payload->fragment_core_req | BASEP_JD_REQ_EVENT_NEVER; + + /* Sanity check core requirements*/ + if ((t_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & + ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_T || + (f_atom->core_req & BASEP_JD_REQ_ATOM_TYPE & + ~BASE_JD_REQ_COHERENT_GROUP) != BASE_JD_REQ_FS || + t_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES || + f_atom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) { + dev_err(kctx->kbdev->dev, "Invalid core requirements\n"); + goto out; + } + + /* Process tiler job chains */ + next = payload->tiler_jc_list; + if (!next) { + dev_err(kctx->kbdev->dev, "Invalid tiler JC list\n"); + goto out; + } + + while (next) { + base_jd_replay_jc *jc_struct; + struct kbase_vmap_struct jc_map; + u64 jc; + + jc_struct = kbase_vmap(kctx, next, sizeof(*jc_struct), &jc_map); + + if (!jc_struct) { + dev_err(kctx->kbdev->dev, "Failed to map jc struct\n"); + goto out; + } + + jc = jc_struct->jc; + next = jc_struct->next; + if (next) + jc_struct->jc = 0; + + kbase_vunmap(kctx, &jc_map); + + if (jc) { + u16 max_hw_job_id = 0; + + if (kbasep_replay_find_hw_job_id(kctx, jc, + &max_hw_job_id) != 0) + goto out; + + if (kbasep_replay_parse_jc(kctx, jc, prev_jc, + payload->tiler_heap_free, + payload->tiler_hierarchy_mask, + payload->hierarchy_default_weight, + hw_job_id_offset, false) != 0) { + goto out; + } + + hw_job_id_offset += max_hw_job_id; + + prev_jc = jc; + } + } + t_atom->jc = prev_jc; + + /* Process fragment job chain */ + f_atom->jc = payload->fragment_jc; + if (kbasep_replay_parse_jc(kctx, payload->fragment_jc, 0, + payload->tiler_heap_free, + payload->fragment_hierarchy_mask, + payload->hierarchy_default_weight, 0, + true) != 0) { + goto out; + } + + if (!t_atom->jc || !f_atom->jc) { + dev_err(kctx->kbdev->dev, "Invalid payload\n"); + goto out; + } + + dev_dbg(kctx->kbdev->dev, "t_atom->jc=%llx f_atom->jc=%llx\n", + t_atom->jc, f_atom->jc); + ret = 0; + +out: + kbase_vunmap(kctx, &map); + + return ret; +} + +static void kbase_replay_process_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom; + struct kbase_context *kctx; + struct kbase_jd_context *jctx; + bool need_to_try_schedule_context = false; + + struct base_jd_atom_v2 t_atom, f_atom; + struct kbase_jd_atom *t_katom, *f_katom; + base_jd_prio atom_prio; + + katom = container_of(data, struct kbase_jd_atom, work); + kctx = katom->kctx; + jctx = &kctx->jctx; + + mutex_lock(&jctx->lock); + + atom_prio = kbasep_js_sched_prio_to_atom_prio(katom->sched_priority); + + if (kbasep_replay_create_atoms( + kctx, &t_atom, &f_atom, atom_prio) != 0) { + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + goto out; + } + + t_katom = &jctx->atoms[t_atom.atom_number]; + f_katom = &jctx->atoms[f_atom.atom_number]; + + if (kbasep_replay_parse_payload(kctx, katom, &t_atom, &f_atom) != 0) { + kbasep_release_katom(kctx, t_atom.atom_number); + kbasep_release_katom(kctx, f_atom.atom_number); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + goto out; + } + + kbasep_replay_reset_softjob(katom, f_katom); + + need_to_try_schedule_context |= jd_submit_atom(kctx, &t_atom, t_katom); + if (t_katom->event_code == BASE_JD_EVENT_JOB_INVALID) { + dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n"); + kbasep_release_katom(kctx, f_atom.atom_number); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + goto out; + } + need_to_try_schedule_context |= jd_submit_atom(kctx, &f_atom, f_katom); + if (f_katom->event_code == BASE_JD_EVENT_JOB_INVALID) { + dev_err(kctx->kbdev->dev, "Replay failed to submit atom\n"); + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + goto out; + } + + katom->event_code = BASE_JD_EVENT_DONE; + +out: + if (katom->event_code != BASE_JD_EVENT_DONE) { + kbase_disjoint_state_down(kctx->kbdev); + + need_to_try_schedule_context |= jd_done_nolock(katom); + } + + if (need_to_try_schedule_context) + kbase_js_sched_all(kctx->kbdev); + + mutex_unlock(&jctx->lock); +} + +/** + * @brief Check job replay fault + * + * This will read the job payload, checks fault type and source, then decides + * whether replay is required. + * + * @param[in] katom The atom to be processed + * @return true (success) if replay required or false on failure. + */ +static bool kbase_replay_fault_check(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct device *dev = kctx->kbdev->dev; + base_jd_replay_payload *payload; + u64 job_header; + u64 job_loop_detect; + struct job_head *job; + struct kbase_vmap_struct job_map; + struct kbase_vmap_struct map; + bool err = false; + + /* Replay job if fault is of type BASE_JD_EVENT_JOB_WRITE_FAULT or + * if force_replay is enabled. + */ + if (BASE_JD_EVENT_TERMINATED == katom->event_code) { + return false; + } else if (BASE_JD_EVENT_JOB_WRITE_FAULT == katom->event_code) { + return true; + } else if (BASE_JD_EVENT_FORCE_REPLAY == katom->event_code) { + katom->event_code = BASE_JD_EVENT_DATA_INVALID_FAULT; + return true; + } else if (BASE_JD_EVENT_DATA_INVALID_FAULT != katom->event_code) { + /* No replay for faults of type other than + * BASE_JD_EVENT_DATA_INVALID_FAULT. + */ + return false; + } + + /* Job fault is BASE_JD_EVENT_DATA_INVALID_FAULT, now scan fragment jc + * to find out whether the source of exception is POLYGON_LIST. Replay + * is required if the source of fault is POLYGON_LIST. + */ + payload = kbase_vmap(kctx, katom->jc, sizeof(*payload), &map); + if (!payload) { + dev_err(dev, "kbase_replay_fault_check: failed to map payload.\n"); + return false; + } + +#ifdef CONFIG_MALI_DEBUG + dev_dbg(dev, "kbase_replay_fault_check: payload=%p\n", payload); + dev_dbg(dev, "\nPayload structure:\n" + "fragment_jc = 0x%llx\n" + "fragment_hierarchy_mask = 0x%x\n" + "fragment_core_req = 0x%x\n", + payload->fragment_jc, + payload->fragment_hierarchy_mask, + payload->fragment_core_req); +#endif + /* Process fragment job chain */ + job_header = (u64) payload->fragment_jc; + job_loop_detect = job_header; + while (job_header) { + job = kbase_vmap(kctx, job_header, sizeof(*job), &job_map); + if (!job) { + dev_err(dev, "failed to map jc\n"); + /* unmap payload*/ + kbase_vunmap(kctx, &map); + return false; + } + + +#ifdef CONFIG_MALI_DEBUG + dev_dbg(dev, "\njob_head structure:\n" + "Source ID:0x%x Access:0x%x Exception:0x%x\n" + "at job addr = %p\n" + "not_complete_index = 0x%x\n" + "fault_addr = 0x%llx\n" + "flags = 0x%x\n" + "index = 0x%x\n" + "dependencies = 0x%x,0x%x\n", + JOB_SOURCE_ID(job->status), + ((job->status >> 8) & 0x3), + (job->status & 0xFF), + job, + job->not_complete_index, + job->fault_addr, + job->flags, + job->index, + job->dependencies[0], + job->dependencies[1]); +#endif + + /* Replay only when the polygon list reader caused the + * DATA_INVALID_FAULT */ + if ((BASE_JD_EVENT_DATA_INVALID_FAULT == katom->event_code) && + (JOB_POLYGON_LIST == JOB_SOURCE_ID(job->status))) { + err = true; + kbase_vunmap(kctx, &job_map); + break; + } + + /* Move on to next fragment job in the list */ + if (job->flags & JOB_FLAG_DESC_SIZE) + job_header = job->next._64; + else + job_header = job->next._32; + + kbase_vunmap(kctx, &job_map); + + /* Job chain loop detected */ + if (job_header == job_loop_detect) + break; + } + + /* unmap payload*/ + kbase_vunmap(kctx, &map); + + return err; +} + + +/** + * @brief Process a replay job + * + * Called from kbase_process_soft_job. + * + * On exit, if the job has completed, katom->event_code will have been updated. + * If the job has not completed, and is replaying jobs, then the atom status + * will have been reset to KBASE_JD_ATOM_STATE_QUEUED. + * + * @param[in] katom The atom to be processed + * @return false if the atom has completed + * true if the atom is replaying jobs + */ +bool kbase_replay_process(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + struct kbase_jd_context *jctx = &kctx->jctx; + + if (katom->event_code == BASE_JD_EVENT_DONE) { + dev_dbg(kctx->kbdev->dev, "Previous job succeeded - not replaying\n"); + + if (katom->retry_count) + kbase_disjoint_state_down(kctx->kbdev); + + return false; + } + + if (jctx->sched_info.ctx.is_dying) { + dev_dbg(kctx->kbdev->dev, "Not replaying; context is dying\n"); + + if (katom->retry_count) + kbase_disjoint_state_down(kctx->kbdev); + + return false; + } + + /* Check job exception type and source before replaying. */ + if (!kbase_replay_fault_check(katom)) { + dev_dbg(kctx->kbdev->dev, + "Replay cancelled on event %x\n", katom->event_code); + /* katom->event_code is already set to the failure code of the + * previous job. + */ + return false; + } + + dev_warn(kctx->kbdev->dev, "Replaying jobs retry=%d\n", + katom->retry_count); + + katom->retry_count++; + + if (katom->retry_count > BASEP_JD_REPLAY_LIMIT) { + dev_err(kctx->kbdev->dev, "Replay exceeded limit - failing jobs\n"); + + kbase_disjoint_state_down(kctx->kbdev); + + /* katom->event_code is already set to the failure code of the + previous job */ + return false; + } + + /* only enter the disjoint state once for the whole time while the replay is ongoing */ + if (katom->retry_count == 1) + kbase_disjoint_state_up(kctx->kbdev); + + INIT_WORK(&katom->work, kbase_replay_process_worker); + queue_work(kctx->event_workq, &katom->work); + + return true; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.c b/drivers/gpu/arm/midgard/mali_kbase_security.c new file mode 100644 index 00000000000..a0bb3529baf --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_security.c @@ -0,0 +1,76 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_security.c + * Base kernel security capability API + */ + +#include <mali_kbase.h> + +static inline bool kbasep_am_i_root(void) +{ +#if KBASE_HWCNT_DUMP_BYPASS_ROOT + return true; +#else + /* Check if root */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0) + if (uid_eq(current_euid(), GLOBAL_ROOT_UID)) + return true; +#else + if (current_euid() == 0) + return true; +#endif /*LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)*/ + return false; +#endif /*KBASE_HWCNT_DUMP_BYPASS_ROOT*/ +} + +/** + * kbase_security_has_capability - see mali_kbase_caps.h for description. + */ + +bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags) +{ + /* Assume failure */ + bool access_allowed = false; + bool audit = KBASE_SEC_FLAG_AUDIT & flags; + + KBASE_DEBUG_ASSERT(NULL != kctx); + CSTD_UNUSED(kctx); + + /* Detect unsupported flags */ + KBASE_DEBUG_ASSERT(((~KBASE_SEC_FLAG_MASK) & flags) == 0); + + /* Determine if access is allowed for the given cap */ + switch (cap) { + case KBASE_SEC_MODIFY_PRIORITY: + case KBASE_SEC_INSTR_HW_COUNTERS_COLLECT: + /* Access is granted only if the caller is privileged */ + access_allowed = kbasep_am_i_root(); + break; + } + + /* Report problem if requested */ + if (!access_allowed && audit) + dev_warn(kctx->kbdev->dev, "Security capability failure: %d, %p", cap, (void *)kctx); + + return access_allowed; +} + +KBASE_EXPORT_TEST_API(kbase_security_has_capability); diff --git a/drivers/gpu/arm/midgard/mali_kbase_security.h b/drivers/gpu/arm/midgard/mali_kbase_security.h new file mode 100644 index 00000000000..024a7ee1aab --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_security.h @@ -0,0 +1,52 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_security.h + * Base kernel security capability APIs + */ + +#ifndef _KBASE_SECURITY_H_ +#define _KBASE_SECURITY_H_ + +/* Security flags */ +#define KBASE_SEC_FLAG_NOAUDIT (0u << 0) /* Silently handle privilege failure */ +#define KBASE_SEC_FLAG_AUDIT (1u << 0) /* Write audit message on privilege failure */ +#define KBASE_SEC_FLAG_MASK (KBASE_SEC_FLAG_AUDIT) /* Mask of all valid flag bits */ + +/* List of unique capabilities that have security access privileges */ +enum kbase_security_capability { + /* Instrumentation Counters access privilege */ + KBASE_SEC_INSTR_HW_COUNTERS_COLLECT = 1, + KBASE_SEC_MODIFY_PRIORITY + /* Add additional access privileges here */ +}; + +/** + * kbase_security_has_capability - determine whether a task has a particular effective capability + * @param[in] kctx The task context. + * @param[in] cap The capability to check for. + * @param[in] flags Additional configuration information + * Such as whether to write an audit message or not. + * @return true if success (capability is allowed), false otherwise. + */ + +bool kbase_security_has_capability(struct kbase_context *kctx, enum kbase_security_capability cap, u32 flags); + +#endif /* _KBASE_SECURITY_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.c b/drivers/gpu/arm/midgard/mali_kbase_smc.c new file mode 100644 index 00000000000..aa2da3ab7e9 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_smc.c @@ -0,0 +1,77 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifdef CONFIG_ARM64 + +#include <mali_kbase.h> +#include <mali_kbase_smc.h> + + +static noinline u32 invoke_smc_fid(u32 function_id, u64 arg0, u64 arg1, + u64 arg2, u64 *res0, u64 *res1, u64 *res2) +{ + /* 3 args and 3 returns are chosen arbitrarily, + see SMC calling convention for limits */ + asm volatile( + "mov x0, %[fid]\n" + "mov x1, %[a0]\n" + "mov x2, %[a1]\n" + "mov x3, %[a2]\n" + "smc #0\n" + "str x0, [%[re0]]\n" + "str x1, [%[re1]]\n" + "str x2, [%[re2]]\n" + : [fid] "+r" (function_id), [a0] "+r" (arg0), + [a1] "+r" (arg1), [a2] "+r" (arg2) + : [re0] "r" (res0), [re1] "r" (res1), [re2] "r" (res2) + : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", + "x14", "x15", "x16", "x17"); + return function_id; +} + +void kbase_invoke_smc_fid(u32 fid) +{ + u64 res0, res1, res2; + + /* Is fast call (bit 31 set) */ + KBASE_DEBUG_ASSERT(fid & ~SMC_FAST_CALL); + /* bits 16-23 must be zero for fast calls */ + KBASE_DEBUG_ASSERT((fid & (0xFF << 16)) == 0); + + invoke_smc_fid(fid, 0, 0, 0, &res0, &res1, &res2); +} + +void kbase_invoke_smc(u32 oen, u16 function_number, u64 arg0, u64 arg1, + u64 arg2, u64 *res0, u64 *res1, u64 *res2) +{ + u32 fid = 0; + + /* Only the six bits allowed should be used. */ + KBASE_DEBUG_ASSERT((oen & ~SMC_OEN_MASK) == 0); + + fid |= SMC_FAST_CALL; /* Bit 31: Fast call */ + /* Bit 30: 1=SMC64, 0=SMC32 */ + fid |= oen; /* Bit 29:24: OEN */ + /* Bit 23:16: Must be zero for fast calls */ + fid |= (function_number); /* Bit 15:0: function number */ + + invoke_smc_fid(fid, arg0, arg1, arg2, res0, res1, res2); +} + +#endif /* CONFIG_ARM64 */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_smc.h b/drivers/gpu/arm/midgard/mali_kbase_smc.h new file mode 100644 index 00000000000..2d0f5086f75 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_smc.h @@ -0,0 +1,58 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_SMC_H_ +#define _KBASE_SMC_H_ + +#ifdef CONFIG_ARM64 + +#include <mali_kbase.h> + +#define SMC_FAST_CALL (1 << 31) + +#define SMC_OEN_OFFSET 24 +#define SMC_OEN_MASK (0x3F << SMC_OEN_OFFSET) /* 6 bits */ +#define SMC_OEN_SIP (2 << SMC_OEN_OFFSET) +#define SMC_OEN_STD (4 << SMC_OEN_OFFSET) + + +/** + * kbase_invoke_smc_fid - Does a secure monitor call with the given function_id + * @function_id: The SMC function to call, see SMC Calling convention. + */ +void kbase_invoke_smc_fid(u32 function_id); + +/** + * kbase_invoke_smc_fid - Does a secure monitor call with the given parameters. + * see SMC Calling Convention for details + * @oen: Owning Entity number (SIP, STD etc). + * @function_number: ID specifiy which function within the OEN. + * @arg0: argument 0 to pass in the SMC call. + * @arg1: argument 1 to pass in the SMC call. + * @arg2: argument 2 to pass in the SMC call. + * @res0: result 0 returned from the SMC call. + * @res1: result 1 returned from the SMC call. + * @res2: result 2 returned from the SMC call. + */ +void kbase_invoke_smc(u32 oen, u16 function_number, u64 arg0, u64 arg1, + u64 arg2, u64 *res0, u64 *res1, u64 *res2); + +#endif /* CONFIG_ARM64 */ + +#endif /* _KBASE_SMC_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_softjobs.c b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c new file mode 100644 index 00000000000..36a61308dc8 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_softjobs.c @@ -0,0 +1,442 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include <mali_kbase.h> + +#include <linux/dma-mapping.h> +#ifdef CONFIG_SYNC +#include "sync.h" +#include <linux/syscalls.h> +#include "mali_kbase_sync.h" +#endif +#include <mali_kbase_hwaccess_time.h> +#include <linux/version.h> + +/* Mask to check cache alignment of data structures */ +#define KBASE_CACHE_ALIGNMENT_MASK ((1<<L1_CACHE_SHIFT)-1) + +/** + * @file mali_kbase_softjobs.c + * + * This file implements the logic behind software only jobs that are + * executed within the driver rather than being handed over to the GPU. + */ + +static int kbase_dump_cpu_gpu_time(struct kbase_jd_atom *katom) +{ + struct kbase_va_region *reg; + phys_addr_t addr = 0; + u64 pfn; + u32 offset; + char *page; + struct timespec ts; + struct base_dump_cpu_gpu_counters data; + u64 system_time; + u64 cycle_counter; + u64 jc = katom->jc; + struct kbase_context *kctx = katom->kctx; + int pm_active_err; + + memset(&data, 0, sizeof(data)); + + /* Take the PM active reference as late as possible - otherwise, it could + * delay suspend until we process the atom (which may be at the end of a + * long chain of dependencies */ + pm_active_err = kbase_pm_context_active_handle_suspend(kctx->kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE); + if (pm_active_err) { + struct kbasep_js_device_data *js_devdata = &kctx->kbdev->js_data; + + /* We're suspended - queue this on the list of suspended jobs + * Use dep_item[1], because dep_item[0] is in use for 'waiting_soft_jobs' */ + mutex_lock(&js_devdata->runpool_mutex); + list_add_tail(&katom->dep_item[1], &js_devdata->suspended_soft_jobs_list); + mutex_unlock(&js_devdata->runpool_mutex); + + return pm_active_err; + } + + kbase_backend_get_gpu_time(kctx->kbdev, &cycle_counter, &system_time, + &ts); + + kbase_pm_context_idle(kctx->kbdev); + + data.sec = ts.tv_sec; + data.usec = ts.tv_nsec / 1000; + data.system_time = system_time; + data.cycle_counter = cycle_counter; + + pfn = jc >> PAGE_SHIFT; + offset = jc & ~PAGE_MASK; + + /* Assume this atom will be cancelled until we know otherwise */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + if (offset > 0x1000 - sizeof(data)) { + /* Wouldn't fit in the page */ + return 0; + } + + kbase_gpu_vm_lock(kctx); + reg = kbase_region_tracker_find_region_enclosing_address(kctx, jc); + if (reg && + (reg->flags & KBASE_REG_GPU_WR) && + reg->cpu_alloc && reg->cpu_alloc->pages) + addr = reg->cpu_alloc->pages[pfn - reg->start_pfn]; + + kbase_gpu_vm_unlock(kctx); + if (!addr) + return 0; + + page = kmap(pfn_to_page(PFN_DOWN(addr))); + if (!page) + return 0; + + kbase_sync_single_for_cpu(katom->kctx->kbdev, + kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) + + offset, sizeof(data), + DMA_BIDIRECTIONAL); + + memcpy(page + offset, &data, sizeof(data)); + + kbase_sync_single_for_device(katom->kctx->kbdev, + kbase_dma_addr(pfn_to_page(PFN_DOWN(addr))) + + offset, sizeof(data), + DMA_BIDIRECTIONAL); + + kunmap(pfn_to_page(PFN_DOWN(addr))); + + /* Atom was fine - mark it as done */ + katom->event_code = BASE_JD_EVENT_DONE; + + return 0; +} + +#ifdef CONFIG_SYNC + +/* Complete an atom that has returned '1' from kbase_process_soft_job (i.e. has waited) + * + * @param katom The atom to complete + */ +static void complete_soft_job(struct kbase_jd_atom *katom) +{ + struct kbase_context *kctx = katom->kctx; + + mutex_lock(&kctx->jctx.lock); + list_del(&katom->dep_item[0]); + kbase_finish_soft_job(katom); + if (jd_done_nolock(katom)) + kbase_js_sched_all(kctx->kbdev); + mutex_unlock(&kctx->jctx.lock); +} + +static enum base_jd_event_code kbase_fence_trigger(struct kbase_jd_atom *katom, int result) +{ + struct sync_pt *pt; + struct sync_timeline *timeline; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + if (!list_is_singular(&katom->fence->pt_list_head)) { +#else + if (katom->fence->num_fences != 1) { +#endif + /* Not exactly one item in the list - so it didn't (directly) come from us */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + pt = list_first_entry(&katom->fence->pt_list_head, struct sync_pt, pt_list); +#else + pt = container_of(katom->fence->cbs[0].sync_pt, struct sync_pt, base); +#endif + timeline = sync_pt_parent(pt); + + if (!kbase_sync_timeline_is_ours(timeline)) { + /* Fence has a sync_pt which isn't ours! */ + return BASE_JD_EVENT_JOB_CANCELLED; + } + + kbase_sync_signal_pt(pt, result); + + sync_timeline_signal(timeline); + + return (result < 0) ? BASE_JD_EVENT_JOB_CANCELLED : BASE_JD_EVENT_DONE; +} + +static void kbase_fence_wait_worker(struct work_struct *data) +{ + struct kbase_jd_atom *katom; + struct kbase_context *kctx; + + katom = container_of(data, struct kbase_jd_atom, work); + kctx = katom->kctx; + + complete_soft_job(katom); +} + +static void kbase_fence_wait_callback(struct sync_fence *fence, struct sync_fence_waiter *waiter) +{ + struct kbase_jd_atom *katom = container_of(waiter, struct kbase_jd_atom, sync_waiter); + struct kbase_context *kctx; + + KBASE_DEBUG_ASSERT(NULL != katom); + + kctx = katom->kctx; + + KBASE_DEBUG_ASSERT(NULL != kctx); + + /* Propagate the fence status to the atom. + * If negative then cancel this atom and its dependencies. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + if (fence->status < 0) +#else + if (atomic_read(&fence->status) < 0) +#endif + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + /* To prevent a potential deadlock we schedule the work onto the job_done_wq workqueue + * + * The issue is that we may signal the timeline while holding kctx->jctx.lock and + * the callbacks are run synchronously from sync_timeline_signal. So we simply defer the work. + */ + + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, kbase_fence_wait_worker); + queue_work(kctx->jctx.job_done_wq, &katom->work); +} + +static int kbase_fence_wait(struct kbase_jd_atom *katom) +{ + int ret; + + KBASE_DEBUG_ASSERT(NULL != katom); + KBASE_DEBUG_ASSERT(NULL != katom->kctx); + + sync_fence_waiter_init(&katom->sync_waiter, kbase_fence_wait_callback); + + ret = sync_fence_wait_async(katom->fence, &katom->sync_waiter); + + if (ret == 1) { + /* Already signalled */ + return 0; + } else if (ret < 0) { + goto cancel_atom; + } + return 1; + + cancel_atom: + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + /* We should cause the dependant jobs in the bag to be failed, + * to do this we schedule the work queue to complete this job */ + KBASE_DEBUG_ASSERT(0 == object_is_on_stack(&katom->work)); + INIT_WORK(&katom->work, kbase_fence_wait_worker); + queue_work(katom->kctx->jctx.job_done_wq, &katom->work); + return 1; +} + +static void kbase_fence_cancel_wait(struct kbase_jd_atom *katom) +{ + if (sync_fence_cancel_async(katom->fence, &katom->sync_waiter) != 0) { + /* The wait wasn't cancelled - leave the cleanup for kbase_fence_wait_callback */ + return; + } + + /* Wait was cancelled - zap the atoms */ + katom->event_code = BASE_JD_EVENT_JOB_CANCELLED; + + kbase_finish_soft_job(katom); + + if (jd_done_nolock(katom)) + kbase_js_sched_all(katom->kctx->kbdev); +} +#endif /* CONFIG_SYNC */ + +int kbase_process_soft_job(struct kbase_jd_atom *katom) +{ + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + return kbase_dump_cpu_gpu_time(katom); +#ifdef CONFIG_SYNC + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + KBASE_DEBUG_ASSERT(katom->fence != NULL); + katom->event_code = kbase_fence_trigger(katom, katom->event_code == BASE_JD_EVENT_DONE ? 0 : -EFAULT); + /* Release the reference as we don't need it any more */ + sync_fence_put(katom->fence); + katom->fence = NULL; + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + return kbase_fence_wait(katom); +#endif /* CONFIG_SYNC */ + case BASE_JD_REQ_SOFT_REPLAY: + return kbase_replay_process(katom); + } + + /* Atom is complete */ + return 0; +} + +void kbase_cancel_soft_job(struct kbase_jd_atom *katom) +{ + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { +#ifdef CONFIG_SYNC + case BASE_JD_REQ_SOFT_FENCE_WAIT: + kbase_fence_cancel_wait(katom); + break; +#endif + default: + /* This soft-job doesn't support cancellation! */ + KBASE_DEBUG_ASSERT(0); + } +} + +int kbase_prepare_soft_job(struct kbase_jd_atom *katom) +{ + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + { + if (0 != (katom->jc & KBASE_CACHE_ALIGNMENT_MASK)) + return -EINVAL; + } + break; +#ifdef CONFIG_SYNC + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + { + struct base_fence fence; + int fd; + + if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) + return -EINVAL; + + fd = kbase_stream_create_fence(fence.basep.stream_fd); + if (fd < 0) + return -EINVAL; + + katom->fence = sync_fence_fdget(fd); + + if (katom->fence == NULL) { + /* The only way the fence can be NULL is if userspace closed it for us. + * So we don't need to clear it up */ + return -EINVAL; + } + fence.basep.fd = fd; + if (0 != copy_to_user((__user void *)(uintptr_t) katom->jc, &fence, sizeof(fence))) { + katom->fence = NULL; + sys_close(fd); + return -EINVAL; + } + } + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + { + struct base_fence fence; + + if (0 != copy_from_user(&fence, (__user void *)(uintptr_t) katom->jc, sizeof(fence))) + return -EINVAL; + + /* Get a reference to the fence object */ + katom->fence = sync_fence_fdget(fence.basep.fd); + if (katom->fence == NULL) + return -EINVAL; + } + break; +#endif /* CONFIG_SYNC */ + case BASE_JD_REQ_SOFT_REPLAY: + break; + default: + /* Unsupported soft-job */ + return -EINVAL; + } + return 0; +} + +void kbase_finish_soft_job(struct kbase_jd_atom *katom) +{ + switch (katom->core_req & BASEP_JD_REQ_ATOM_TYPE) { + case BASE_JD_REQ_SOFT_DUMP_CPU_GPU_TIME: + /* Nothing to do */ + break; +#ifdef CONFIG_SYNC + case BASE_JD_REQ_SOFT_FENCE_TRIGGER: + /* If fence has not yet been signalled, do it now */ + if (katom->fence) { + kbase_fence_trigger(katom, katom->event_code == + BASE_JD_EVENT_DONE ? 0 : -EFAULT); + sync_fence_put(katom->fence); + katom->fence = NULL; + } + break; + case BASE_JD_REQ_SOFT_FENCE_WAIT: + /* Release the reference to the fence object */ + sync_fence_put(katom->fence); + katom->fence = NULL; + break; +#endif /* CONFIG_SYNC */ + } +} + +void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev) +{ + LIST_HEAD(local_suspended_soft_jobs); + struct kbase_jd_atom *tmp_iter; + struct kbase_jd_atom *katom_iter; + struct kbasep_js_device_data *js_devdata; + bool resched = false; + + KBASE_DEBUG_ASSERT(kbdev); + + js_devdata = &kbdev->js_data; + + /* Move out the entire list */ + mutex_lock(&js_devdata->runpool_mutex); + list_splice_init(&js_devdata->suspended_soft_jobs_list, + &local_suspended_soft_jobs); + mutex_unlock(&js_devdata->runpool_mutex); + + /* + * Each atom must be detached from the list and ran separately - + * it could be re-added to the old list, but this is unlikely + */ + list_for_each_entry_safe(katom_iter, tmp_iter, + &local_suspended_soft_jobs, dep_item[1]) { + struct kbase_context *kctx = katom_iter->kctx; + + mutex_lock(&kctx->jctx.lock); + + /* Remove from the global list */ + list_del(&katom_iter->dep_item[1]); + /* Remove from the context's list of waiting soft jobs */ + list_del(&katom_iter->dep_item[0]); + + if (kbase_process_soft_job(katom_iter) == 0) { + kbase_finish_soft_job(katom_iter); + resched |= jd_done_nolock(katom_iter); + } else { + /* The job has not completed */ + KBASE_DEBUG_ASSERT((katom_iter->core_req & + BASEP_JD_REQ_ATOM_TYPE) + != BASE_JD_REQ_SOFT_REPLAY); + list_add_tail(&katom_iter->dep_item[0], + &kctx->waiting_soft_jobs); + } + + mutex_unlock(&kctx->jctx.lock); + } + + if (resched) + kbase_js_sched_all(kbdev); +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.c b/drivers/gpu/arm/midgard/mali_kbase_sync.c new file mode 100644 index 00000000000..c5fb9815255 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_sync.c @@ -0,0 +1,182 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifdef CONFIG_SYNC + +#include <linux/seq_file.h> +#include "sync.h" +#include <mali_kbase.h> +#include <mali_kbase_sync.h> + +struct mali_sync_timeline { + struct sync_timeline timeline; + atomic_t counter; + atomic_t signalled; +}; + +struct mali_sync_pt { + struct sync_pt pt; + int order; + int result; +}; + +static struct mali_sync_timeline *to_mali_sync_timeline(struct sync_timeline *timeline) +{ + return container_of(timeline, struct mali_sync_timeline, timeline); +} + +static struct mali_sync_pt *to_mali_sync_pt(struct sync_pt *pt) +{ + return container_of(pt, struct mali_sync_pt, pt); +} + +static struct sync_pt *timeline_dup(struct sync_pt *pt) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_pt *new_mpt; + struct sync_pt *new_pt = sync_pt_create(sync_pt_parent(pt), sizeof(struct mali_sync_pt)); + + if (!new_pt) + return NULL; + + new_mpt = to_mali_sync_pt(new_pt); + new_mpt->order = mpt->order; + new_mpt->result = mpt->result; + + return new_pt; +} + +static int timeline_has_signaled(struct sync_pt *pt) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt)); + int result = mpt->result; + + int diff = atomic_read(&mtl->signalled) - mpt->order; + + if (diff >= 0) + return (result < 0) ? result : 1; + + return 0; +} + +static int timeline_compare(struct sync_pt *a, struct sync_pt *b) +{ + struct mali_sync_pt *ma = container_of(a, struct mali_sync_pt, pt); + struct mali_sync_pt *mb = container_of(b, struct mali_sync_pt, pt); + + int diff = ma->order - mb->order; + + if (diff == 0) + return 0; + + return (diff < 0) ? -1 : 1; +} + +static void timeline_value_str(struct sync_timeline *timeline, char *str, + int size) +{ + struct mali_sync_timeline *mtl = to_mali_sync_timeline(timeline); + + snprintf(str, size, "%d", atomic_read(&mtl->signalled)); +} + +static void pt_value_str(struct sync_pt *pt, char *str, int size) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + + snprintf(str, size, "%d(%d)", mpt->order, mpt->result); +} + +static struct sync_timeline_ops mali_timeline_ops = { + .driver_name = "Mali", + .dup = timeline_dup, + .has_signaled = timeline_has_signaled, + .compare = timeline_compare, + .timeline_value_str = timeline_value_str, + .pt_value_str = pt_value_str, +}; + +int kbase_sync_timeline_is_ours(struct sync_timeline *timeline) +{ + return timeline->ops == &mali_timeline_ops; +} + +struct sync_timeline *kbase_sync_timeline_alloc(const char *name) +{ + struct sync_timeline *tl; + struct mali_sync_timeline *mtl; + + tl = sync_timeline_create(&mali_timeline_ops, sizeof(struct mali_sync_timeline), name); + if (!tl) + return NULL; + + /* Set the counter in our private struct */ + mtl = to_mali_sync_timeline(tl); + atomic_set(&mtl->counter, 0); + atomic_set(&mtl->signalled, 0); + + return tl; +} + +struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent) +{ + struct sync_pt *pt = sync_pt_create(parent, sizeof(struct mali_sync_pt)); + struct mali_sync_timeline *mtl = to_mali_sync_timeline(parent); + struct mali_sync_pt *mpt; + + if (!pt) + return NULL; + + mpt = to_mali_sync_pt(pt); + mpt->order = atomic_inc_return(&mtl->counter); + mpt->result = 0; + + return pt; +} + +void kbase_sync_signal_pt(struct sync_pt *pt, int result) +{ + struct mali_sync_pt *mpt = to_mali_sync_pt(pt); + struct mali_sync_timeline *mtl = to_mali_sync_timeline(sync_pt_parent(pt)); + int signalled; + int diff; + + mpt->result = result; + + do { + signalled = atomic_read(&mtl->signalled); + + diff = signalled - mpt->order; + + if (diff > 0) { + /* The timeline is already at or ahead of this point. + * This should not happen unless userspace has been + * signalling fences out of order, so warn but don't + * violate the sync_pt API. + * The warning is only in debug builds to prevent + * a malicious user being able to spam dmesg. + */ +#ifdef CONFIG_MALI_DEBUG + pr_err("Fences were triggered in a different order to allocation!"); +#endif /* CONFIG_MALI_DEBUG */ + return; + } + } while (atomic_cmpxchg(&mtl->signalled, signalled, mpt->order) != signalled); +} + +#endif /* CONFIG_SYNC */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync.h b/drivers/gpu/arm/midgard/mali_kbase_sync.h new file mode 100644 index 00000000000..6d8e34d3c3a --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_sync.h @@ -0,0 +1,91 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_sync.h + * + */ + +#ifndef MALI_KBASE_SYNC_H +#define MALI_KBASE_SYNC_H + +#include "sync.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) +/* For backwards compatiblility with kernels before 3.17. After 3.17 + * sync_pt_parent is included in the kernel. */ +static inline struct sync_timeline *sync_pt_parent(struct sync_pt *pt) +{ + return pt->parent; +} +#endif + +/* + * Create a stream object. + * Built on top of timeline object. + * Exposed as a file descriptor. + * Life-time controlled via the file descriptor: + * - dup to add a ref + * - close to remove a ref + */ +int kbase_stream_create(const char *name, int *const out_fd); + +/* + * Create a fence in a stream object + */ +int kbase_stream_create_fence(int tl_fd); + +/* + * Validate a fd to be a valid fence + * No reference is taken. + * + * This function is only usable to catch unintentional user errors early, + * it does not stop malicious code changing the fd after this function returns. + */ +int kbase_fence_validate(int fd); + +/* Returns true if the specified timeline is allocated by Mali */ +int kbase_sync_timeline_is_ours(struct sync_timeline *timeline); + +/* Allocates a timeline for Mali + * + * One timeline should be allocated per API context. + */ +struct sync_timeline *kbase_sync_timeline_alloc(const char *name); + +/* Allocates a sync point within the timeline. + * + * The timeline must be the one allocated by kbase_sync_timeline_alloc + * + * Sync points must be triggered in *exactly* the same order as they are allocated. + */ +struct sync_pt *kbase_sync_pt_alloc(struct sync_timeline *parent); + +/* Signals a particular sync point + * + * Sync points must be triggered in *exactly* the same order as they are allocated. + * + * If they are signalled in the wrong order then a message will be printed in debug + * builds and otherwise attempts to signal order sync_pts will be ignored. + * + * result can be negative to indicate error, any other value is interpreted as success. + */ +void kbase_sync_signal_pt(struct sync_pt *pt, int result); + +#endif diff --git a/drivers/gpu/arm/midgard/mali_kbase_sync_user.c b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c new file mode 100644 index 00000000000..ddd0847a69c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_sync_user.c @@ -0,0 +1,157 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_kbase_sync_user.c + * + */ + +#ifdef CONFIG_SYNC + +#include <linux/sched.h> +#include <linux/fdtable.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/anon_inodes.h> +#include <linux/version.h> +#include <linux/uaccess.h> +#include <mali_kbase_sync.h> +#include <mali_base_kernel_sync.h> + +static int kbase_stream_close(struct inode *inode, struct file *file) +{ + struct sync_timeline *tl; + + tl = (struct sync_timeline *)file->private_data; + BUG_ON(!tl); + sync_timeline_destroy(tl); + return 0; +} + +static const struct file_operations stream_fops = { + .owner = THIS_MODULE, + .release = kbase_stream_close, +}; + +int kbase_stream_create(const char *name, int *const out_fd) +{ + struct sync_timeline *tl; + + BUG_ON(!out_fd); + + tl = kbase_sync_timeline_alloc(name); + if (!tl) + return -EINVAL; + + *out_fd = anon_inode_getfd(name, &stream_fops, tl, O_RDONLY | O_CLOEXEC); + + if (*out_fd < 0) { + sync_timeline_destroy(tl); + return -EINVAL; + } + + return 0; +} + +int kbase_stream_create_fence(int tl_fd) +{ + struct sync_timeline *tl; + struct sync_pt *pt; + struct sync_fence *fence; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 7, 0) + struct files_struct *files; + struct fdtable *fdt; +#endif + int fd; + struct file *tl_file; + + tl_file = fget(tl_fd); + if (tl_file == NULL) + return -EBADF; + + if (tl_file->f_op != &stream_fops) { + fd = -EBADF; + goto out; + } + + tl = tl_file->private_data; + + pt = kbase_sync_pt_alloc(tl); + if (!pt) { + fd = -EFAULT; + goto out; + } + + fence = sync_fence_create("mali_fence", pt); + if (!fence) { + sync_pt_free(pt); + fd = -EFAULT; + goto out; + } + + /* from here the fence owns the sync_pt */ + + /* create a fd representing the fence */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); + if (fd < 0) { + sync_fence_put(fence); + goto out; + } +#else + fd = get_unused_fd(); + if (fd < 0) { + sync_fence_put(fence); + goto out; + } + + files = current->files; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 4, 0) + __set_close_on_exec(fd, fdt); +#else + FD_SET(fd, fdt->close_on_exec); +#endif + spin_unlock(&files->file_lock); +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) */ + + /* bind fence to the new fd */ + sync_fence_install(fence, fd); + + out: + fput(tl_file); + + return fd; +} + +int kbase_fence_validate(int fd) +{ + struct sync_fence *fence; + + fence = sync_fence_fdget(fd); + if (!fence) + return -EINVAL; + + sync_fence_put(fence); + return 0; +} + +#endif /* CONFIG_SYNC */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.c b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c new file mode 100644 index 00000000000..871dc316c99 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.c @@ -0,0 +1,1695 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <linux/anon_inodes.h> +#include <linux/atomic.h> +#include <linux/file.h> +#include <linux/mutex.h> +#include <linux/poll.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/stringify.h> +#include <linux/timer.h> +#include <linux/wait.h> + +#include <mali_kbase.h> +#include <mali_kbase_jm.h> +#include <mali_kbase_tlstream.h> + +/*****************************************************************************/ + +/* The version of timeline stream. */ +#define KBASEP_TLSTREAM_VERSION 1 + +/* The maximum expected length of string in tracepoint descriptor. */ +#define STRLEN_MAX 64 /* bytes */ + +/* The number of nanoseconds in a second. */ +#define NSECS_IN_SEC 1000000000ull /* ns */ + +/* The number of nanoseconds to wait before autoflushing the stream. */ +#define AUTOFLUSH_TIMEOUT (2ull * NSECS_IN_SEC) /* ns */ + +/* The period of autoflush checker execution in milliseconds. */ +#define AUTOFLUSH_INTERVAL 1000 /* ms */ + +/* The maximum size of a single packet used by timeline. */ +#define PACKET_SIZE 2048 /* bytes */ + +/* The number of packets used by one timeline stream. */ +#define PACKET_COUNT 16 + +/* The number of bytes reserved for packet header. + * These value must be defined according to MIPE documentation. */ +#define PACKET_HEADER_SIZE 8 /* bytes */ + +/* The number of bytes reserved for packet sequence number. + * These value must be defined according to MIPE documentation. */ +#define PACKET_NUMBER_SIZE 4 /* bytes */ + +/* Packet header - first word. + * These values must be defined according to MIPE documentation. */ +#define PACKET_STREAMID_POS 0 +#define PACKET_STREAMID_LEN 8 +#define PACKET_RSVD1_POS (PACKET_STREAMID_POS + PACKET_STREAMID_LEN) +#define PACKET_RSVD1_LEN 8 +#define PACKET_TYPE_POS (PACKET_RSVD1_POS + PACKET_RSVD1_LEN) +#define PACKET_TYPE_LEN 3 +#define PACKET_CLASS_POS (PACKET_TYPE_POS + PACKET_TYPE_LEN) +#define PACKET_CLASS_LEN 7 +#define PACKET_FAMILY_POS (PACKET_CLASS_POS + PACKET_CLASS_LEN) +#define PACKET_FAMILY_LEN 6 + +/* Packet header - second word + * These values must be defined according to MIPE documentation. */ +#define PACKET_LENGTH_POS 0 +#define PACKET_LENGTH_LEN 24 +#define PACKET_SEQBIT_POS (PACKET_LENGTH_POS + PACKET_LENGTH_LEN) +#define PACKET_SEQBIT_LEN 1 +#define PACKET_RSVD2_POS (PACKET_SEQBIT_POS + PACKET_SEQBIT_LEN) +#define PACKET_RSVD2_LEN 7 + +/* Types of streams generated by timeline. + * Order is significant! Header streams must precede respective body streams. */ +enum tl_stream_type { + TL_STREAM_TYPE_OBJ_HEADER, + TL_STREAM_TYPE_OBJ_SUMMARY, + TL_STREAM_TYPE_OBJ, + TL_STREAM_TYPE_AUX_HEADER, + TL_STREAM_TYPE_AUX, + + TL_STREAM_TYPE_COUNT +}; + +/* Timeline packet family ids. + * Values are significant! Check MIPE documentation. */ +enum tl_packet_family { + TL_PACKET_FAMILY_CTRL = 0, /* control packets */ + TL_PACKET_FAMILY_TL = 1, /* timeline packets */ + + TL_PACKET_FAMILY_COUNT +}; + +/* Packet classes used in timeline streams. + * Values are significant! Check MIPE documentation. */ +enum tl_packet_class { + TL_PACKET_CLASS_OBJ = 0, /* timeline objects packet */ + TL_PACKET_CLASS_AUX = 1, /* auxiliary events packet */ +}; + +/* Packet types used in timeline streams. + * Values are significant! Check MIPE documentation. */ +enum tl_packet_type { + TL_PACKET_TYPE_HEADER = 0, /* stream's header/directory */ + TL_PACKET_TYPE_BODY = 1, /* stream's body */ + TL_PACKET_TYPE_SUMMARY = 2, /* stream's summary */ +}; + +/* Message ids of trace events that are recorded in the timeline stream. */ +enum tl_msg_id { + /* Timeline object events. */ + KBASE_TL_NEW_CTX, + KBASE_TL_NEW_GPU, + KBASE_TL_NEW_LPU, + KBASE_TL_NEW_ATOM, + KBASE_TL_DEL_CTX, + KBASE_TL_DEL_ATOM, + KBASE_TL_LIFELINK_LPU_GPU, + KBASE_TL_RET_GPU_CTX, + KBASE_TL_RET_ATOM_CTX, + KBASE_TL_RET_ATOM_LPU, + KBASE_TL_NRET_GPU_CTX, + KBASE_TL_NRET_ATOM_CTX, + KBASE_TL_NRET_ATOM_LPU, + + /* Timeline non-object events. */ + KBASE_AUX_PM_STATE, + KBASE_AUX_JOB_SOFTSTOP, + KBASE_AUX_PAGEFAULT, + KBASE_AUX_PAGESALLOC +}; + +/*****************************************************************************/ + +/** + * struct tl_stream - timeline stream structure + * @lock: message order lock + * @buffer: array of buffers + * @wbi: write buffer index + * @rbi: read buffer index + * @numbered: if non-zero stream's packets are sequentially numbered + * @last_write_time: timestamp indicating last write + * + * This structure holds information needed to construct proper packets in the + * timeline stream. Each message in sequence must bear timestamp that is greater + * to one in previous message in the same stream. For this reason lock is held + * throughout the process of message creation. Each stream contains set of + * buffers. Each buffer will hold one MIPE packet. In case there is no free + * space required to store incoming message the oldest buffer is discarded. + * Each packet in timeline body stream has sequence number embedded (this value + * must increment monotonically and is used by packets receiver to discover + * buffer overflows. + */ +struct tl_stream { + spinlock_t lock; + + struct { + atomic_t size; /* number of bytes in buffer */ + char data[PACKET_SIZE]; /* buffer's data */ + } buffer[PACKET_COUNT]; + + atomic_t wbi; + atomic_t rbi; + + int numbered; + u64 last_write_time; +}; + +/** + * struct tp_desc - tracepoint message descriptor structure + * @id: tracepoint ID identifying message in stream + * @id_str: human readable version of tracepoint ID + * @name: tracepoint description + * @arg_types: tracepoint's arguments types declaration + * @arg_names: comma separated list of tracepoint's arguments names + */ +struct tp_desc { + u32 id; + const char *id_str; + const char *name; + const char *arg_types; + const char *arg_names; +}; + +/*****************************************************************************/ + +/* Configuration of timeline streams generated by kernel. + * Kernel emit only streams containing either timeline object events or + * auxiliary events. All streams have stream id value of 1 (as opposed to user + * space streams that have value of 0). */ +static const struct { + enum tl_packet_family pkt_family; + enum tl_packet_class pkt_class; + enum tl_packet_type pkt_type; + unsigned int stream_id; +} tl_stream_cfg[TL_STREAM_TYPE_COUNT] = { + {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_HEADER, 1}, + {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_SUMMARY, 1}, + {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_OBJ, TL_PACKET_TYPE_BODY, 1}, + {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_HEADER, 1}, + {TL_PACKET_FAMILY_TL, TL_PACKET_CLASS_AUX, TL_PACKET_TYPE_BODY, 1} +}; + +/* The timeline streams generated by kernel. */ +static struct tl_stream *tl_stream[TL_STREAM_TYPE_COUNT]; + +/* Autoflush timer. */ +static struct timer_list autoflush_timer; + +/* If non-zero autoflush timer is active. */ +static atomic_t autoflush_timer_active; + +/* Reader lock. Only one reader is allowed to have access to the timeline + * streams at any given time. */ +static DEFINE_MUTEX(tl_reader_lock); + +/* Indicator of whether the timeline stream file descriptor is already used. */ +static atomic_t tlstream_busy = {0}; + +/* Timeline stream event queue. */ +static DECLARE_WAIT_QUEUE_HEAD(tl_event_queue); + +/* The timeline stream file operations functions. */ +static ssize_t kbasep_tlstream_read( + struct file *filp, + char __user *buffer, + size_t size, + loff_t *f_pos); +static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait); +static int kbasep_tlstream_release(struct inode *inode, struct file *filp); + +/* The timeline stream file operations structure. */ +static const struct file_operations kbasep_tlstream_fops = { + .release = kbasep_tlstream_release, + .read = kbasep_tlstream_read, + .poll = kbasep_tlstream_poll, +}; + +/* Descriptors of timeline messages transmitted in object events stream. */ +static const struct tp_desc tp_desc_obj[] = { + { + KBASE_TL_NEW_CTX, + __stringify(KBASE_TL_NEW_CTX), + "object ctx is created", + "@pI", + "ctx,ctx_nr" + }, + { + KBASE_TL_NEW_GPU, + __stringify(KBASE_TL_NEW_GPU), + "object gpu is created", + "@pII", + "gpu,gpu_id,core_count" + }, + { + KBASE_TL_NEW_LPU, + __stringify(KBASE_TL_NEW_LPU), + "object lpu is created", + "@pII", + "lpu,lpu_nr,lpu_fn" + }, + { + KBASE_TL_NEW_ATOM, + __stringify(KBASE_TL_NEW_ATOM), + "object atom is created", + "@pI", + "atom,atom_nr" + }, + { + KBASE_TL_DEL_CTX, + __stringify(KBASE_TL_DEL_CTX), + "context is destroyed", + "@p", + "context" + }, + { + KBASE_TL_DEL_ATOM, + __stringify(KBASE_TL_DEL_ATOM), + "atom is destroyed", + "@p", + "atom" + }, + { + KBASE_TL_LIFELINK_LPU_GPU, + __stringify(KBASE_TL_LIFELINK_LPU_GPU), + "lpu is deleted with gpu", + "@pp", + "lpu,gpu" + }, + { + KBASE_TL_RET_GPU_CTX, + __stringify(KBASE_TL_RET_GPU_CTX), + "gpu is retained by context", + "@pp", + "gpu,ctx" + }, + { + KBASE_TL_RET_ATOM_CTX, + __stringify(KBASE_TL_RET_ATOM_CTX), + "atom is retained by context", + "@pp", + "atom,ctx" + }, + { + KBASE_TL_RET_ATOM_LPU, + __stringify(KBASE_TL_RET_ATOM_LPU), + "atom is retained by lpu", + "@pp", + "atom,lpu" + }, + { + KBASE_TL_NRET_GPU_CTX, + __stringify(KBASE_TL_NRET_GPU_CTX), + "gpu is released by context", + "@pp", + "gpu,ctx" + }, + { + KBASE_TL_NRET_ATOM_CTX, + __stringify(KBASE_TL_NRET_ATOM_CTX), + "atom is released by context", + "@pp", + "atom,context" + }, + { + KBASE_TL_NRET_ATOM_LPU, + __stringify(KBASE_TL_NRET_ATOM_LPU), + "atom is released by lpu", + "@pp", + "atom,lpu" + }, +}; + +/* Descriptors of timeline messages transmitted in auxiliary events stream. */ +static const struct tp_desc tp_desc_aux[] = { + { + KBASE_AUX_PM_STATE, + __stringify(KBASE_AUX_PM_STATE), + "PM state", + "@IL", + "core_type,core_state_bitset" + }, + { + KBASE_AUX_JOB_SOFTSTOP, + __stringify(KBASE_AUX_JOB_SOFTSTOP), + "Job soft stop", + "@I", + "tag_id" + }, + { + KBASE_AUX_PAGEFAULT, + __stringify(KBASE_AUX_PAGEFAULT), + "Page fault", + "@II", + "as_id,page_cnt" + }, + { + KBASE_AUX_PAGESALLOC, + __stringify(KBASE_AUX_PAGESALLOC), + "Total alloc pages change", + "@l", + "page_cnt_change" + } +}; + +#if MALI_UNIT_TEST +/* Number of bytes read by user. */ +static atomic_t tlstream_bytes_collected = {0}; + +/* Number of bytes generated by tracepoint messages. */ +static atomic_t tlstream_bytes_generated = {0}; +#endif /* MALI_UNIT_TEST */ + +/*****************************************************************************/ + +/** + * kbasep_tlstream_get_timestamp - return timestamp + * + * Function returns timestamp value based on raw monotonic timer. Value will + * wrap around zero in case of overflow. + * Return: timestamp value + */ +static u64 kbasep_tlstream_get_timestamp(void) +{ + struct timespec ts; + u64 timestamp; + + getrawmonotonic(&ts); + timestamp = (u64)ts.tv_sec * NSECS_IN_SEC + ts.tv_nsec; + return timestamp; +} + +/** + * kbasep_tlstream_write_bytes - write data to message buffer + * @buffer: buffer where data will be written + * @pos: position in the buffer where to place data + * @bytes: pointer to buffer holding data + * @len: length of data to be written + * + * Return: updated position in the buffer + */ +static size_t kbasep_tlstream_write_bytes( + char *buffer, + size_t pos, + const void *bytes, + size_t len) +{ + KBASE_DEBUG_ASSERT(buffer); + KBASE_DEBUG_ASSERT(bytes); + + memcpy(&buffer[pos], bytes, len); + + return pos + len; +} + +/** + * kbasep_tlstream_write_string - write string to message buffer + * @buffer: buffer where data will be written + * @pos: position in the buffer where to place data + * @string: pointer to buffer holding the source string + * @max_write_size: number of bytes that can be stored in buffer + * + * Return: updated position in the buffer + */ +static size_t kbasep_tlstream_write_string( + char *buffer, + size_t pos, + const char *string, + size_t max_write_size) +{ + u32 string_len; + + KBASE_DEBUG_ASSERT(buffer); + KBASE_DEBUG_ASSERT(string); + /* Timeline string consists of at least string length and nul + * terminator. */ + KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char)); + max_write_size -= sizeof(string_len); + + string_len = strlcpy( + &buffer[pos + sizeof(string_len)], + string, + max_write_size); + string_len += sizeof(char); + + /* Make sure that the source string fit into the buffer. */ + KBASE_DEBUG_ASSERT(string_len <= max_write_size); + + /* Update string length. */ + memcpy(&buffer[pos], &string_len, sizeof(string_len)); + + return pos + sizeof(string_len) + string_len; +} + +/** + * kbasep_tlstream_write_timestamp - write timestamp to message buffer + * @buffer: buffer where data will be written + * @pos: position in the buffer where to place data + * + * Return: updated position in the buffer + */ +static size_t kbasep_tlstream_write_timestamp(void *buffer, size_t pos) +{ + u64 timestamp = kbasep_tlstream_get_timestamp(); + + return kbasep_tlstream_write_bytes( + buffer, pos, + ×tamp, sizeof(timestamp)); +} + +/** + * kbasep_tlstream_put_bits - put bits in a word + * @word: pointer to the words being modified + * @value: value that shall be written to given position + * @bitpos: position where value shall be written (in bits) + * @bitlen: length of value (in bits) + */ +static void kbasep_tlstream_put_bits( + u32 *word, + u32 value, + unsigned int bitpos, + unsigned int bitlen) +{ + const u32 mask = ((1 << bitlen) - 1) << bitpos; + + KBASE_DEBUG_ASSERT(word); + KBASE_DEBUG_ASSERT((0 != bitlen) && (32 >= bitlen)); + KBASE_DEBUG_ASSERT((bitpos + bitlen) <= 32); + + *word &= ~mask; + *word |= ((value << bitpos) & mask); +} + +/** + * kbasep_tlstream_packet_header_setup - setup the packet header + * @buffer: pointer to the buffer + * @pkt_family: packet's family + * @pkt_type: packet's type + * @pkt_class: packet's class + * @stream_id: stream id + * @numbered: non-zero if this stream is numbered + * + * Function sets up immutable part of packet header in the given buffer. + */ +static void kbasep_tlstream_packet_header_setup( + char *buffer, + enum tl_packet_family pkt_family, + enum tl_packet_class pkt_class, + enum tl_packet_type pkt_type, + unsigned int stream_id, + int numbered) +{ + u32 word0 = 0; + u32 word1 = 0; + + KBASE_DEBUG_ASSERT(buffer); + KBASE_DEBUG_ASSERT(pkt_family == TL_PACKET_FAMILY_TL); + KBASE_DEBUG_ASSERT( + (pkt_type == TL_PACKET_TYPE_HEADER) || + (pkt_type == TL_PACKET_TYPE_SUMMARY) || + (pkt_type == TL_PACKET_TYPE_BODY)); + KBASE_DEBUG_ASSERT( + (pkt_class == TL_PACKET_CLASS_OBJ) || + (pkt_class == TL_PACKET_CLASS_AUX)); + + kbasep_tlstream_put_bits( + &word0, pkt_family, + PACKET_FAMILY_POS, PACKET_FAMILY_LEN); + kbasep_tlstream_put_bits( + &word0, pkt_class, + PACKET_CLASS_POS, PACKET_CLASS_LEN); + kbasep_tlstream_put_bits( + &word0, pkt_type, + PACKET_TYPE_POS, PACKET_TYPE_LEN); + kbasep_tlstream_put_bits( + &word0, stream_id, + PACKET_STREAMID_POS, PACKET_STREAMID_LEN); + + if (numbered) + kbasep_tlstream_put_bits( + &word1, 1, + PACKET_SEQBIT_POS, PACKET_SEQBIT_LEN); + + memcpy(&buffer[0], &word0, sizeof(word0)); + memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); +} + +/** + * kbasep_tlstream_packet_header_update - update the packet header + * @buffer: pointer to the buffer + * @data_size: amount of data carried in this packet + * + * Function updates mutable part of packet header in the given buffer. + * Note that value of data_size must not including size of the header. + */ +static void kbasep_tlstream_packet_header_update( + char *buffer, + size_t data_size) +{ + u32 word0; + u32 word1; + + KBASE_DEBUG_ASSERT(buffer); + CSTD_UNUSED(word0); + + memcpy(&word1, &buffer[sizeof(word0)], sizeof(word1)); + + kbasep_tlstream_put_bits( + &word1, data_size, + PACKET_LENGTH_POS, PACKET_LENGTH_LEN); + + memcpy(&buffer[sizeof(word0)], &word1, sizeof(word1)); +} + +/** + * kbasep_tlstream_packet_number_update - update the packet number + * @buffer: pointer to the buffer + * @counter: value of packet counter for this packet's stream + * + * Function updates packet number embedded within the packet placed in the + * given buffer. + */ +static void kbasep_tlstream_packet_number_update(char *buffer, u32 counter) +{ + KBASE_DEBUG_ASSERT(buffer); + + memcpy(&buffer[PACKET_HEADER_SIZE], &counter, sizeof(counter)); +} + +/** + * kbasep_timeline_stream_reset - reset stream + * @stream: pointer to the stream structure + * + * Function discards all pending messages and resets packet counters. + */ +static void kbasep_timeline_stream_reset(struct tl_stream *stream) +{ + unsigned int i; + + for (i = 0; i < PACKET_COUNT; i++) { + if (stream->numbered) + atomic_set( + &stream->buffer[i].size, + PACKET_HEADER_SIZE + + PACKET_NUMBER_SIZE); + else + atomic_set(&stream->buffer[i].size, PACKET_HEADER_SIZE); + } + + atomic_set(&stream->wbi, 0); + atomic_set(&stream->rbi, 0); +} + +/** + * kbasep_timeline_stream_init - initialize timeline stream + * @stream: pointer to the stream structure + * @stream_type: stream type + */ +static void kbasep_timeline_stream_init( + struct tl_stream *stream, + enum tl_stream_type stream_type) +{ + unsigned int i; + + KBASE_DEBUG_ASSERT(stream); + KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); + + spin_lock_init(&stream->lock); + + /* All packets carrying tracepoints shall be numbered. */ + if (TL_PACKET_TYPE_BODY == tl_stream_cfg[stream_type].pkt_type) + stream->numbered = 1; + else + stream->numbered = 0; + + for (i = 0; i < PACKET_COUNT; i++) + kbasep_tlstream_packet_header_setup( + stream->buffer[i].data, + tl_stream_cfg[stream_type].pkt_family, + tl_stream_cfg[stream_type].pkt_class, + tl_stream_cfg[stream_type].pkt_type, + tl_stream_cfg[stream_type].stream_id, + stream->numbered); + + kbasep_timeline_stream_reset(tl_stream[stream_type]); +} + +/** + * kbasep_timeline_stream_term - terminate timeline stream + * @stream: pointer to the stream structure + */ +static void kbasep_timeline_stream_term(struct tl_stream *stream) +{ + KBASE_DEBUG_ASSERT(stream); +} + +/** + * kbasep_tlstream_msgbuf_submit - submit packet to the user space + * @stream: pointer to the stream structure + * @wb_idx_raw: write buffer index + * @wb_size: length of data stored in current buffer + * + * Function updates currently written buffer with packet header. Then write + * index is incremented and buffer is handled to user space. Parameters + * of new buffer are returned using provided arguments. + * + * Return: length of data in new buffer + * + * Warning: User must update the stream structure with returned value. + */ +static size_t kbasep_tlstream_msgbuf_submit( + struct tl_stream *stream, + unsigned int wb_idx_raw, + unsigned int wb_size) +{ + unsigned int rb_idx_raw = atomic_read(&stream->rbi); + unsigned int wb_idx = wb_idx_raw % PACKET_COUNT; + + kbasep_tlstream_packet_header_update( + stream->buffer[wb_idx].data, + wb_size - PACKET_HEADER_SIZE); + + if (stream->numbered) + kbasep_tlstream_packet_number_update( + stream->buffer[wb_idx].data, + wb_idx_raw); + + /* Increasing write buffer index will expose this packet to the reader. + * As stream->lock is not taken on reader side we must make sure memory + * is updated correctly before this will happen. */ + smp_wmb(); + wb_idx_raw++; + atomic_set(&stream->wbi, wb_idx_raw); + + /* Inform user that packets are ready for reading. */ + wake_up_interruptible(&tl_event_queue); + + /* Detect and mark overflow in this stream. */ + if (PACKET_COUNT == wb_idx_raw - rb_idx_raw) { + /* Reader side depends on this increment to correctly handle + * overflows. The value shall be updated only if it was not + * modified by the reader. The data holding buffer will not be + * updated before stream->lock is released, however size of the + * buffer will. Make sure this increment is globally visible + * before information about selected write buffer size. */ + atomic_cmpxchg(&stream->rbi, rb_idx_raw, rb_idx_raw + 1); + } + + wb_size = PACKET_HEADER_SIZE; + if (stream->numbered) + wb_size += PACKET_NUMBER_SIZE; + + return wb_size; +} + +/** + * kbasep_tlstream_msgbuf_acquire - lock selected stream and reserves buffer + * @stream_type: type of the stream that shall be locked + * @msg_size: message size + * @flags: pointer to store flags passed back on stream release + * + * Function will lock the stream and reserve the number of bytes requested + * in msg_size for the user. + * + * Return: pointer to the buffer where message can be stored + * + * Warning: Stream must be relased with kbasep_tlstream_msgbuf_release(). + * Only atomic operations are allowed while stream is locked + * (i.e. do not use any operation that may sleep). + */ +static char *kbasep_tlstream_msgbuf_acquire( + enum tl_stream_type stream_type, + size_t msg_size, + unsigned long *flags) +{ + struct tl_stream *stream; + unsigned int wb_idx_raw; + unsigned int wb_idx; + size_t wb_size; + + KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); + KBASE_DEBUG_ASSERT( + PACKET_SIZE - PACKET_HEADER_SIZE - PACKET_NUMBER_SIZE >= + msg_size); + + stream = tl_stream[stream_type]; + + spin_lock_irqsave(&stream->lock, *flags); + + wb_idx_raw = atomic_read(&stream->wbi); + wb_idx = wb_idx_raw % PACKET_COUNT; + wb_size = atomic_read(&stream->buffer[wb_idx].size); + + /* Select next buffer if data will not fit into current one. */ + if (PACKET_SIZE < wb_size + msg_size) { + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + } + + /* Reserve space in selected buffer. */ + atomic_set(&stream->buffer[wb_idx].size, wb_size + msg_size); + +#if MALI_UNIT_TEST + atomic_add(msg_size, &tlstream_bytes_generated); +#endif /* MALI_UNIT_TEST */ + + return &stream->buffer[wb_idx].data[wb_size]; +} + +/** + * kbasep_tlstream_msgbuf_release - unlock selected stream + * @stream_type: type of the stream that shall be locked + * @flags: value obtained during stream acquire + * + * Function releases stream that has been previously locked with a call to + * kbasep_tlstream_msgbuf_acquire(). + */ +static void kbasep_tlstream_msgbuf_release( + enum tl_stream_type stream_type, + unsigned long flags) +{ + struct tl_stream *stream; + + KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); + + stream = tl_stream[stream_type]; + stream->last_write_time = kbasep_tlstream_get_timestamp(); + + spin_unlock_irqrestore(&stream->lock, flags); +} + +/*****************************************************************************/ + +/** + * kbasep_tlstream_flush_stream - flush stream + * @stype: type of stream to be flushed + * + * Flush pending data in timeline stream. + */ +static void kbasep_tlstream_flush_stream(enum tl_stream_type stype) +{ + struct tl_stream *stream = tl_stream[stype]; + unsigned long flags; + unsigned int wb_idx_raw; + unsigned int wb_idx; + size_t wb_size; + size_t min_size = PACKET_HEADER_SIZE; + + if (stream->numbered) + min_size += PACKET_NUMBER_SIZE; + + spin_lock_irqsave(&stream->lock, flags); + + wb_idx_raw = atomic_read(&stream->wbi); + wb_idx = wb_idx_raw % PACKET_COUNT; + wb_size = atomic_read(&stream->buffer[wb_idx].size); + + if (wb_size > min_size) { + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + atomic_set(&stream->buffer[wb_idx].size, wb_size); + } + spin_unlock_irqrestore(&stream->lock, flags); +} + +/** + * kbasep_tlstream_autoflush_timer_callback - autoflush timer callback + * @data: unused + * + * Timer is executed periodically to check if any of the stream contains + * buffer ready to be submitted to user space. + */ +static void kbasep_tlstream_autoflush_timer_callback(unsigned long data) +{ + u64 timestamp = kbasep_tlstream_get_timestamp(); + enum tl_stream_type stype; + int rcode; + + CSTD_UNUSED(data); + + for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) { + struct tl_stream *stream = tl_stream[stype]; + unsigned long flags; + unsigned int wb_idx_raw; + unsigned int wb_idx; + size_t wb_size; + size_t min_size = PACKET_HEADER_SIZE; + + if (stream->numbered) + min_size += PACKET_NUMBER_SIZE; + + spin_lock_irqsave(&stream->lock, flags); + + wb_idx_raw = atomic_read(&stream->wbi); + wb_idx = wb_idx_raw % PACKET_COUNT; + wb_size = atomic_read(&stream->buffer[wb_idx].size); + + if ( + (wb_size > min_size) && + ( + timestamp - stream->last_write_time > + AUTOFLUSH_TIMEOUT)) { + + wb_size = kbasep_tlstream_msgbuf_submit( + stream, wb_idx_raw, wb_size); + wb_idx = (wb_idx_raw + 1) % PACKET_COUNT; + atomic_set(&stream->buffer[wb_idx].size, wb_size); + } + spin_unlock_irqrestore(&stream->lock, flags); + } + + if (atomic_read(&autoflush_timer_active)) + rcode = mod_timer( + &autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); +} + +/** + * kbasep_tlstream_packet_pending - check timeline streams for pending packets + * @stype: pointer to variable where stream type will be placed + * @rb_idx_raw: pointer to variable where read buffer index will be placed + * + * Function checks all streams for pending packets. It will stop as soon as + * packet ready to be submitted to user space is detected. Variables under + * pointers, passed as the parameters to this function will be updated with + * values pointing to right stream and buffer. + * + * Return: non-zero if any of timeline streams has at last one packet ready + */ +static int kbasep_tlstream_packet_pending( + enum tl_stream_type *stype, + unsigned int *rb_idx_raw) +{ + int pending = 0; + + KBASE_DEBUG_ASSERT(stype); + KBASE_DEBUG_ASSERT(rb_idx_raw); + + for ( + *stype = 0; + (*stype < TL_STREAM_TYPE_COUNT) && !pending; + (*stype)++) { + if (NULL != tl_stream[*stype]) { + *rb_idx_raw = atomic_read(&tl_stream[*stype]->rbi); + /* Read buffer index may be updated by writer in case of + * overflow. Read and write buffer indexes must be + * loaded in correct order. */ + smp_rmb(); + if (atomic_read(&tl_stream[*stype]->wbi) != *rb_idx_raw) + pending = 1; + } + } + (*stype)--; + + return pending; +} + +/** + * kbasep_tlstream_read - copy data from streams to buffer provided by user + * @filp: pointer to file structure (unused) + * @buffer: pointer to the buffer provided by user + * @size: maximum amount of data that can be stored in the buffer + * @f_pos: pointer to file offset (unused) + * + * Return: number of bytes stored in the buffer + */ +static ssize_t kbasep_tlstream_read( + struct file *filp, + char __user *buffer, + size_t size, + loff_t *f_pos) +{ + ssize_t copy_len = 0; + + KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(buffer); + KBASE_DEBUG_ASSERT(f_pos); + CSTD_UNUSED(filp); + + if ((0 > *f_pos) || (PACKET_SIZE > size)) + return -EINVAL; + + mutex_lock(&tl_reader_lock); + + while (copy_len < size) { + enum tl_stream_type stype; + unsigned int rb_idx_raw; + unsigned int rb_idx; + size_t rb_size; + + /* If we don't have any data yet, wait for packet to be + * submitted. If we already read some packets and there is no + * packet pending return back to user. */ + if (0 < copy_len) { + if (!kbasep_tlstream_packet_pending( + &stype, + &rb_idx_raw)) + break; + } else { + if (wait_event_interruptible( + tl_event_queue, + kbasep_tlstream_packet_pending( + &stype, + &rb_idx_raw))) { + copy_len = -ERESTARTSYS; + break; + } + } + + /* Check if this packet fits into the user buffer. + * If so copy its content. */ + rb_idx = rb_idx_raw % PACKET_COUNT; + rb_size = atomic_read(&tl_stream[stype]->buffer[rb_idx].size); + if (rb_size > size - copy_len) + break; + if (copy_to_user( + &buffer[copy_len], + tl_stream[stype]->buffer[rb_idx].data, + rb_size)) { + copy_len = -EFAULT; + break; + } + + /* Verify if there was no overflow in selected stream. Make sure + * that if incorrect size was used we will know about it. */ + smp_rmb(); + if (atomic_read(&tl_stream[stype]->rbi) == rb_idx_raw) { + copy_len += rb_size; + atomic_inc(&tl_stream[stype]->rbi); + +#if MALI_UNIT_TEST + atomic_add(rb_size, &tlstream_bytes_collected); +#endif /* MALI_UNIT_TEST */ + } + } + + mutex_unlock(&tl_reader_lock); + + return copy_len; +} + +/** + * kbasep_tlstream_poll - poll timeline stream for packets + * @filp: pointer to file structure + * @wait: pointer to poll table + * Return: POLLIN if data can be read without blocking, otherwise zero + */ +static unsigned int kbasep_tlstream_poll(struct file *filp, poll_table *wait) +{ + enum tl_stream_type stream_type; + unsigned int rb_idx; + + KBASE_DEBUG_ASSERT(filp); + KBASE_DEBUG_ASSERT(wait); + + poll_wait(filp, &tl_event_queue, wait); + if (kbasep_tlstream_packet_pending(&stream_type, &rb_idx)) + return POLLIN; + return 0; +} + +/** + * kbasep_tlstream_release - release timeline stream descriptor + * @inode: pointer to inode structure + * @filp: pointer to file structure + * + * Return always return zero + */ +static int kbasep_tlstream_release(struct inode *inode, struct file *filp) +{ + KBASE_DEBUG_ASSERT(inode); + KBASE_DEBUG_ASSERT(filp); + CSTD_UNUSED(inode); + CSTD_UNUSED(filp); + atomic_set(&tlstream_busy, 0); + return 0; +} + +/** + * kbasep_tlstream_timeline_header - prepare timeline header stream packet + * @stream_type: type of the stream that will carry header data + * @tp_desc: pointer to array with tracepoint descriptors + * @tp_count: number of descriptors in the given array + * + * Functions fills in information about tracepoints stored in body stream + * associated with this header stream. + */ +static void kbasep_tlstream_timeline_header( + enum tl_stream_type stream_type, + const struct tp_desc *tp_desc, + u32 tp_count) +{ + const u8 tv = KBASEP_TLSTREAM_VERSION; /* tlstream version */ + const u8 ps = sizeof(void *); /* pointer size */ + size_t msg_size = sizeof(tv) + sizeof(ps) + sizeof(tp_count); + char *buffer; + size_t pos = 0; + unsigned long flags; + unsigned int i; + + KBASE_DEBUG_ASSERT(TL_STREAM_TYPE_COUNT > stream_type); + KBASE_DEBUG_ASSERT(tp_desc); + + /* Calculate the size of the timeline message. */ + for (i = 0; i < tp_count; i++) { + msg_size += sizeof(tp_desc[i].id); + msg_size += + strnlen(tp_desc[i].id_str, STRLEN_MAX) + + sizeof(char) + sizeof(u32); + msg_size += + strnlen(tp_desc[i].name, STRLEN_MAX) + + sizeof(char) + sizeof(u32); + msg_size += + strnlen(tp_desc[i].arg_types, STRLEN_MAX) + + sizeof(char) + sizeof(u32); + msg_size += + strnlen(tp_desc[i].arg_names, STRLEN_MAX) + + sizeof(char) + sizeof(u32); + } + + KBASE_DEBUG_ASSERT(PACKET_SIZE - PACKET_HEADER_SIZE >= msg_size); + + buffer = kbasep_tlstream_msgbuf_acquire(stream_type, msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &tv, sizeof(tv)); + pos = kbasep_tlstream_write_bytes(buffer, pos, &ps, sizeof(ps)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &tp_count, sizeof(tp_count)); + + for (i = 0; i < tp_count; i++) { + pos = kbasep_tlstream_write_bytes( + buffer, pos, + &tp_desc[i].id, sizeof(tp_desc[i].id)); + pos = kbasep_tlstream_write_string( + buffer, pos, + tp_desc[i].id_str, msg_size - pos); + pos = kbasep_tlstream_write_string( + buffer, pos, + tp_desc[i].name, msg_size - pos); + pos = kbasep_tlstream_write_string( + buffer, pos, + tp_desc[i].arg_types, msg_size - pos); + pos = kbasep_tlstream_write_string( + buffer, pos, + tp_desc[i].arg_names, msg_size - pos); + } + + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(stream_type, flags); + + /* We don't expect any more data to be read in this stream. + * As header stream must be read before its associated body stream, + * make this packet visible to the user straightaway. */ + kbasep_tlstream_flush_stream(stream_type); +} + +/*****************************************************************************/ + +int kbase_tlstream_init(void) +{ + enum tl_stream_type i; + int rcode; + + /* Prepare stream structures. */ + for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { + tl_stream[i] = kmalloc(sizeof(**tl_stream), GFP_KERNEL); + if (!tl_stream[i]) + break; + kbasep_timeline_stream_init(tl_stream[i], i); + } + if (TL_STREAM_TYPE_COUNT > i) { + for (; i > 0; i--) { + kbasep_timeline_stream_term(tl_stream[i - 1]); + kfree(tl_stream[i - 1]); + } + return -ENOMEM; + } + + /* Initialize autoflush timer. */ + atomic_set(&autoflush_timer_active, 1); + setup_timer(&autoflush_timer, + kbasep_tlstream_autoflush_timer_callback, + 0); + rcode = mod_timer( + &autoflush_timer, + jiffies + msecs_to_jiffies(AUTOFLUSH_INTERVAL)); + CSTD_UNUSED(rcode); + + return 0; +} + +void kbase_tlstream_term(void) +{ + enum tl_stream_type i; + + atomic_set(&autoflush_timer_active, 0); + del_timer_sync(&autoflush_timer); + + for (i = 0; i < TL_STREAM_TYPE_COUNT; i++) { + kbasep_timeline_stream_term(tl_stream[i]); + kfree(tl_stream[i]); + } +} + +int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd) +{ + if (0 == atomic_cmpxchg(&tlstream_busy, 0, 1)) { + *fd = anon_inode_getfd( + "[mali_tlstream]", + &kbasep_tlstream_fops, + kctx, + O_RDONLY | O_CLOEXEC); + if (0 > *fd) { + atomic_set(&tlstream_busy, 0); + return *fd; + } + + /* Reset and initialize header streams. */ + kbasep_timeline_stream_reset( + tl_stream[TL_STREAM_TYPE_OBJ_HEADER]); + kbasep_timeline_stream_reset( + tl_stream[TL_STREAM_TYPE_OBJ_SUMMARY]); + kbasep_timeline_stream_reset( + tl_stream[TL_STREAM_TYPE_AUX_HEADER]); + kbasep_tlstream_timeline_header( + TL_STREAM_TYPE_OBJ_HEADER, + tp_desc_obj, + ARRAY_SIZE(tp_desc_obj)); + kbasep_tlstream_timeline_header( + TL_STREAM_TYPE_AUX_HEADER, + tp_desc_aux, + ARRAY_SIZE(tp_desc_aux)); + } else { + *fd = -EBUSY; + } + + return 0; +} + +void kbase_tlstream_flush_streams(void) +{ + enum tl_stream_type stype; + + for (stype = 0; stype < TL_STREAM_TYPE_COUNT; stype++) + kbasep_tlstream_flush_stream(stype); +} + +void kbase_tlstream_reset_body_streams(void) +{ + kbasep_timeline_stream_reset( + tl_stream[TL_STREAM_TYPE_OBJ]); + kbasep_timeline_stream_reset( + tl_stream[TL_STREAM_TYPE_AUX]); +} + +#if MALI_UNIT_TEST +void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated) +{ + KBASE_DEBUG_ASSERT(bytes_collected); + KBASE_DEBUG_ASSERT(bytes_generated); + *bytes_collected = atomic_read(&tlstream_bytes_collected); + *bytes_generated = atomic_read(&tlstream_bytes_generated); +} +#endif /* MALI_UNIT_TEST */ + +/*****************************************************************************/ + +void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr) +{ + const u32 msg_id = KBASE_TL_NEW_CTX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ_SUMMARY, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &context, sizeof(context)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &nr, sizeof(nr)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} + +void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count) +{ + const u32 msg_id = KBASE_TL_NEW_GPU; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(id) + + sizeof(core_count); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ_SUMMARY, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &gpu, sizeof(gpu)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &id, sizeof(id)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &core_count, sizeof(core_count)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} + +void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn) +{ + const u32 msg_id = KBASE_TL_NEW_LPU; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(nr) + + sizeof(fn); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ_SUMMARY, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &lpu, sizeof(lpu)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &nr, sizeof(nr)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &fn, sizeof(fn)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} + +void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu) +{ + const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(lpu) + sizeof(gpu); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ_SUMMARY, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &lpu, sizeof(lpu)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &gpu, sizeof(gpu)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ_SUMMARY, flags); +} + +/*****************************************************************************/ + +void kbase_tlstream_tl_new_ctx(void *context, u32 nr) +{ + const u32 msg_id = KBASE_TL_NEW_CTX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(context) + sizeof(nr); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &context, sizeof(context)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &nr, sizeof(nr)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_new_atom(void *atom, u32 nr) +{ + const u32 msg_id = KBASE_TL_NEW_ATOM; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(nr); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &nr, sizeof(nr)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_del_ctx(void *context) +{ + const u32 msg_id = KBASE_TL_DEL_CTX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(context); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &context, sizeof(context)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_del_atom(void *atom) +{ + const u32 msg_id = KBASE_TL_DEL_ATOM; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context) +{ + const u32 msg_id = KBASE_TL_RET_GPU_CTX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &gpu, sizeof(gpu)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &context, sizeof(context)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context) +{ + const u32 msg_id = KBASE_TL_RET_ATOM_CTX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &context, sizeof(context)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu) +{ + const u32 msg_id = KBASE_TL_RET_ATOM_LPU; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &lpu, sizeof(lpu)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context) +{ + const u32 msg_id = KBASE_TL_NRET_GPU_CTX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(gpu) + sizeof(context); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &gpu, sizeof(gpu)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &context, sizeof(context)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context) +{ + const u32 msg_id = KBASE_TL_NRET_ATOM_CTX; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(context); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &context, sizeof(context)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu) +{ + const u32 msg_id = KBASE_TL_NRET_ATOM_LPU; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(atom) + sizeof(lpu); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_OBJ, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &atom, sizeof(atom)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &lpu, sizeof(lpu)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_OBJ, flags); +} + +/*****************************************************************************/ + +void kbase_tlstream_aux_pm_state(u32 core_type, u64 state) +{ + const u32 msg_id = KBASE_AUX_PM_STATE; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(core_type) + + sizeof(state); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &core_type, sizeof(core_type)); + pos = kbasep_tlstream_write_bytes(buffer, pos, &state, sizeof(state)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + +void kbase_tlstream_aux_job_softstop(u32 js_id) +{ + const u32 msg_id = KBASE_AUX_JOB_SOFTSTOP; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(js_id); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, + msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes(buffer, pos, &js_id, sizeof(js_id)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + +void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count) +{ + const u32 msg_id = KBASE_AUX_PAGEFAULT; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(mmu_as) + + sizeof(page_count); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes(buffer, pos, &mmu_as, sizeof(mmu_as)); + pos = kbasep_tlstream_write_bytes( + buffer, pos, &page_count, sizeof(page_count)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + +void kbase_tlstream_aux_pagesalloc(s64 page_count_change) +{ + const u32 msg_id = KBASE_AUX_PAGESALLOC; + const size_t msg_size = + sizeof(msg_id) + sizeof(u64) + sizeof(page_count_change); + unsigned long flags; + char *buffer; + size_t pos = 0; + + buffer = kbasep_tlstream_msgbuf_acquire( + TL_STREAM_TYPE_AUX, msg_size, &flags); + KBASE_DEBUG_ASSERT(buffer); + + pos = kbasep_tlstream_write_bytes(buffer, pos, &msg_id, sizeof(msg_id)); + pos = kbasep_tlstream_write_timestamp(buffer, pos); + pos = kbasep_tlstream_write_bytes( + buffer, pos, + &page_count_change, sizeof(page_count_change)); + KBASE_DEBUG_ASSERT(msg_size == pos); + + kbasep_tlstream_msgbuf_release(TL_STREAM_TYPE_AUX, flags); +} + diff --git a/drivers/gpu/arm/midgard/mali_kbase_tlstream.h b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h new file mode 100644 index 00000000000..494f5c784f6 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_tlstream.h @@ -0,0 +1,284 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#if !defined(_KBASE_TLSTREAM_H) +#define _KBASE_TLSTREAM_H + +#include <mali_kbase.h> + +/*****************************************************************************/ + +/** + * kbase_tlstream_init - initialize timeline infrastructure in kernel + * Return: zero on success, negative number on error + */ +int kbase_tlstream_init(void); + +/** + * kbase_tlstream_term - terminate timeline infrastructure in kernel + * + * Timeline need have to been previously enabled with kbase_tlstream_init(). + */ +void kbase_tlstream_term(void); + +/** + * kbase_tlstream_acquire - acquire timeline stream file descriptor + * @kctx: kernel common context + * @fd: timeline stream file descriptor + * + * This descriptor is meant to be used by userspace timeline to gain access to + * kernel timeline stream. This stream is later broadcasted by user space to the + * timeline client. + * Only one entity can own the descriptor at any given time. Descriptor shall be + * closed if unused. If descriptor cannot be obtained (i.e. when it is already + * being used) argument fd will contain negative value. + * + * Return: zero on success (this does not necessarily mean that stream + * descriptor could be returned), negative number on error + */ +int kbase_tlstream_acquire(struct kbase_context *kctx, int *fd); + +/** + * kbase_tlstream_flush_streams - flush timeline streams. + * + * Function will flush pending data in all timeline streams. + */ +void kbase_tlstream_flush_streams(void); + +/** + * kbase_tlstream_reset_body_streams - reset timeline body streams. + * + * Function will discard pending data in all timeline body streams. + */ +void kbase_tlstream_reset_body_streams(void); + +#if MALI_UNIT_TEST +/** + * kbase_tlstream_test - start timeline stream data generator + * @tpw_count: number of trace point writers in each context + * @msg_delay: time delay in milliseconds between trace points written by one + * writer + * @msg_count: number of trace points written by one writer + * @aux_msg: if non-zero aux messages will be included + * + * This test starts a requested number of asynchronous writers in both IRQ and + * thread context. Each writer will generate required number of test + * tracepoints (tracepoints with embedded information about writer that + * should be verified by user space reader). Tracepoints will be emitted in + * all timeline body streams. If aux_msg is non-zero writer will also + * generate not testable tracepoints (tracepoints without information about + * writer). These tracepoints are used to check correctness of remaining + * timeline message generating functions. Writer will wait requested time + * between generating another set of messages. This call blocks until all + * writers finish. + */ +void kbase_tlstream_test( + unsigned int tpw_count, + unsigned int msg_delay, + unsigned int msg_count, + int aux_msg); + +/** + * kbase_tlstream_stats - read timeline stream statistics + * @bytes_collected: will hold number of bytes read by the user + * @bytes_generated: will hold number of bytes generated by trace points + */ +void kbase_tlstream_stats(u32 *bytes_collected, u32 *bytes_generated); +#endif /* MALI_UNIT_TEST */ + +/*****************************************************************************/ + +/** + * kbase_tlstream_tl_summary_new_ctx - create context object in timeline + * summary + * @context: name of the context object + * @nr: context number + * + * Function emits a timeline message informing about context creation. Context + * is created with context number (its attribute), that can be used to link + * kbase context with userspace context. + * This message is directed to timeline summary stream. + */ +void kbase_tlstream_tl_summary_new_ctx(void *context, u32 nr); + +/** + * kbase_tlstream_tl_summary_new_gpu - create GPU object in timeline summary + * @gpu: name of the GPU object + * @id: id value of this GPU + * @core_count: number of cores this GPU hosts + * + * Function emits a timeline message informing about GPU creation. GPU is + * created with two attributes: id and core count. + * This message is directed to timeline summary stream. + */ +void kbase_tlstream_tl_summary_new_gpu(void *gpu, u32 id, u32 core_count); + +/** + * kbase_tlstream_tl_summary_new_lpu - create LPU object in timeline summary + * @lpu: name of the Logical Processing Unit object + * @nr: sequential number assigned to this LPU + * @fn: property describing this LPU's functional abilities + * + * Function emits a timeline message informing about LPU creation. LPU is + * created with two attributes: number linking this LPU with GPU's job slot + * and function bearing information about this LPU abilities. + * This message is directed to timeline summary stream. + */ +void kbase_tlstream_tl_summary_new_lpu(void *lpu, u32 nr, u32 fn); + +/** + * kbase_tlstream_tl_summary_lifelink_lpu_gpu - lifelink LPU object to GPU + * @lpu: name of the Logical Processing Unit object + * @gpu: name of the GPU object + * + * Function emits a timeline message informing that LPU object shall be deleted + * along with GPU object. + * This message is directed to timeline summary stream. + */ +void kbase_tlstream_tl_summary_lifelink_lpu_gpu(void *lpu, void *gpu); + +/** + * kbase_tlstream_tl_new_ctx - create context object in timeline + * @context: name of the context object + * @nr: context number + * + * Function emits a timeline message informing about context creation. Context + * is created with context number (its attribute), that can be used to link + * kbase context with userspace context. + */ +void kbase_tlstream_tl_new_ctx(void *context, u32 nr); + +/** + * kbase_tlstream_tl_new_atom - create atom object in timeline + * @atom: name of the atom object + * @nr: sequential number assigned to this atom + * + * Function emits a timeline message informing about atom creation. Atom is + * created with atom number (its attribute) that links it with actual work + * bucket id understood by hardware. + */ +void kbase_tlstream_tl_new_atom(void *atom, u32 nr); + +/** + * kbase_tlstream_tl_del_ctx - destroy context object in timeline + * @context: name of the context object + * + * Function emits a timeline message informing that context object ceased to + * exist. + */ +void kbase_tlstream_tl_del_ctx(void *context); + +/** + * kbase_tlstream_tl_del_atom - destroy atom object in timeline + * @atom: name of the atom object + * + * Function emits a timeline message informing that atom object ceased to + * exist. + */ +void kbase_tlstream_tl_del_atom(void *atom); + +/** + * kbase_tlstream_tl_ret_gpu_ctx - retain GPU by context + * @gpu: name of the GPU object + * @context: name of the context object + * + * Function emits a timeline message informing that GPU object is being held + * by context and must not be deleted unless it is released. + */ +void kbase_tlstream_tl_ret_gpu_ctx(void *gpu, void *context); + +/** + * kbase_tlstream_tl_ret_atom_ctx - retain atom by context + * @atom: name of the atom object + * @context: name of the context object + * + * Function emits a timeline message informing that atom object is being held + * by context and must not be deleted unless it is released. + */ +void kbase_tlstream_tl_ret_atom_ctx(void *atom, void *context); + +/** + * kbase_tlstream_tl_ret_atom_lpu - retain atom by LPU + * @atom: name of the atom object + * @lpu: name of the Logical Processing Unit object + * + * Function emits a timeline message informing that atom object is being held + * by LPU and must not be deleted unless it is released. + */ +void kbase_tlstream_tl_ret_atom_lpu(void *atom, void *lpu); + +/** + * kbase_tlstream_tl_nret_gpu_ctx - release GPU by context + * @gpu: name of the GPU object + * @context: name of the context object + * + * Function emits a timeline message informing that GPU object is being released + * by context. + */ +void kbase_tlstream_tl_nret_gpu_ctx(void *gpu, void *context); + +/** + * kbase_tlstream_tl_nret_atom_ctx - release atom by context + * @atom: name of the atom object + * @context: name of the context object + * + * Function emits a timeline message informing that atom object is being + * released by context. + */ +void kbase_tlstream_tl_nret_atom_ctx(void *atom, void *context); + +/** + * kbase_tlstream_tl_nret_atom_lpu - release atom by LPU + * @atom: name of the atom object + * @lpu: name of the Logical Processing Unit object + * + * Function emits a timeline message informing that atom object is being + * released by LPU. + */ +void kbase_tlstream_tl_nret_atom_lpu(void *atom, void *lpu); + +/** + * kbase_tlstream_aux_pm_state - timeline message: power management state + * @core_type: core type (shader, tiler, l2 cache, l3 cache) + * @state: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF) + */ +void kbase_tlstream_aux_pm_state(u32 core_type, u64 state); + +/** + * kbase_tlstream_aux_job_softstop - soft job stop occurred + * @js_id: job slot id + */ +void kbase_tlstream_aux_job_softstop(u32 js_id); + +/** + * kbase_tlstream_aux_pagefault - timeline message: MMU page fault event + * resulting in new pages being mapped + * @mmu_as: MMU address space number + * @page_count: number of currently used pages + */ +void kbase_tlstream_aux_pagefault(u32 mmu_as, u32 page_count); + +/** + * kbase_tlstream_aux_pagesalloc - timeline message: total number of allocated + * pages is changed + * @page_count_change: number of pages to be added or subtracted (according to + * the sign) + */ +void kbase_tlstream_aux_pagesalloc(s64 page_count_change); + +#endif /* _KBASE_TLSTREAM_H */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h new file mode 100644 index 00000000000..e2e0544208c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_defs.h @@ -0,0 +1,264 @@ +/* + * + * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ + +/* + * The purpose of this header file is just to contain a list of trace code idenitifers + * + * Each identifier is wrapped in a macro, so that its string form and enum form can be created + * + * Each macro is separated with a comma, to allow insertion into an array initializer or enum definition block. + * + * This allows automatic creation of an enum and a corresponding array of strings + * + * Before #including, the includer MUST #define KBASE_TRACE_CODE_MAKE_CODE. + * After #including, the includer MUST #under KBASE_TRACE_CODE_MAKE_CODE. + * + * e.g.: + * #define KBASE_TRACE_CODE( X ) KBASE_TRACE_CODE_ ## X + * typedef enum + * { + * #define KBASE_TRACE_CODE_MAKE_CODE( X ) KBASE_TRACE_CODE( X ) + * #include "mali_kbase_trace_defs.h" + * #undef KBASE_TRACE_CODE_MAKE_CODE + * } kbase_trace_code; + * + * IMPORTANT: THIS FILE MUST NOT BE USED FOR ANY OTHER PURPOSE OTHER THAN THE ABOVE + * + * + * The use of the macro here is: + * - KBASE_TRACE_CODE_MAKE_CODE( X ) + * + * Which produces: + * - For an enum, KBASE_TRACE_CODE_X + * - For a string, "X" + * + * + * For example: + * - KBASE_TRACE_CODE_MAKE_CODE( JM_JOB_COMPLETE ) expands to: + * - KBASE_TRACE_CODE_JM_JOB_COMPLETE for the enum + * - "JM_JOB_COMPLETE" for the string + * - To use it to trace an event, do: + * - KBASE_TRACE_ADD( kbdev, JM_JOB_COMPLETE, subcode, kctx, uatom, val ); + */ + +#if 0 /* Dummy section to avoid breaking formatting */ +int dummy_array[] = { +#endif + +/* + * Core events + */ + /* no info_val, no gpu_addr, no atom */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_DESTROY), + /* no info_val, no gpu_addr, no atom */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_CTX_HWINSTR_TERM), + /* info_val == GPU_IRQ_STATUS register */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ), + /* info_val == bits cleared */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_CLEAR), + /* info_val == GPU_IRQ_STATUS register */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_IRQ_DONE), + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_SOFT_RESET), + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_HARD_RESET), + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_CLEAR), + /* GPU addr==dump address */ + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_PRFCNT_SAMPLE), + KBASE_TRACE_CODE_MAKE_CODE(CORE_GPU_CLEAN_INV_CACHES), +/* + * Job Slot management events + */ + /* info_val==irq rawstat at start */ + KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ), + /* info_val==jobs processed */ + KBASE_TRACE_CODE_MAKE_CODE(JM_IRQ_END), +/* In the following: + * + * - ctx is set if a corresponding job found (NULL otherwise, e.g. some soft-stop cases) + * - uatom==kernel-side mapped uatom address (for correlation with user-side) + */ + /* info_val==exit code; gpu_addr==chain gpuaddr */ + KBASE_TRACE_CODE_MAKE_CODE(JM_JOB_DONE), + /* gpu_addr==JS_HEAD_NEXT written, info_val==lower 32 bits of affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT), + /* gpu_addr is as follows: + * - If JS_STATUS active after soft-stop, val==gpu addr written to + * JS_HEAD on submit + * - otherwise gpu_addr==0 */ + KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP), + KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_0), + KBASE_TRACE_CODE_MAKE_CODE(JM_SOFTSTOP_1), + /* gpu_addr==JS_HEAD read */ + KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP), + /* gpu_addr==JS_HEAD read */ + KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_0), + /* gpu_addr==JS_HEAD read */ + KBASE_TRACE_CODE_MAKE_CODE(JM_HARDSTOP_1), + /* gpu_addr==JS_TAIL read */ + KBASE_TRACE_CODE_MAKE_CODE(JM_UPDATE_HEAD), +/* gpu_addr is as follows: + * - If JS_STATUS active before soft-stop, val==JS_HEAD + * - otherwise gpu_addr==0 + */ + /* gpu_addr==JS_HEAD read */ + KBASE_TRACE_CODE_MAKE_CODE(JM_CHECK_HEAD), + KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS), + KBASE_TRACE_CODE_MAKE_CODE(JM_FLUSH_WORKQS_DONE), + /* info_val == is_scheduled */ + KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_NON_SCHEDULED), + /* info_val == is_scheduled */ + KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_SCHEDULED), + KBASE_TRACE_CODE_MAKE_CODE(JM_ZAP_DONE), + /* info_val == nr jobs submitted */ + KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_SOFT_OR_HARD_STOP), + /* gpu_addr==JS_HEAD_NEXT last written */ + KBASE_TRACE_CODE_MAKE_CODE(JM_SLOT_EVICT), + KBASE_TRACE_CODE_MAKE_CODE(JM_SUBMIT_AFTER_RESET), + KBASE_TRACE_CODE_MAKE_CODE(JM_BEGIN_RESET_WORKER), + KBASE_TRACE_CODE_MAKE_CODE(JM_END_RESET_WORKER), +/* + * Job dispatch events + */ + /* gpu_addr==value to write into JS_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_DONE), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_WORKER_END), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_DONE_TRY_RUN_NEXT_JOB), + /* gpu_addr==0, info_val==0, uatom==0 */ + KBASE_TRACE_CODE_MAKE_CODE(JD_ZAP_CONTEXT), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JD_CANCEL_WORKER), +/* + * Scheduler Core events + */ + KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX_NOLOCK), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JS_ADD_JOB), + /* gpu_addr==last value written/would be written to JS_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JS_REMOVE_JOB), + KBASE_TRACE_CODE_MAKE_CODE(JS_RETAIN_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_RELEASE_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_TRY_SCHEDULE_HEAD_CTX), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_TRY_RUN_NEXT_JOB), + /* gpu_addr==value to write into JS_HEAD */ + KBASE_TRACE_CODE_MAKE_CODE(JS_JOB_DONE_RETRY_NEEDED), + /* kctx is the one being evicted, info_val == kctx to put in */ + KBASE_TRACE_CODE_MAKE_CODE(JS_FAST_START_EVICTS_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_SUBMIT_TO_BLOCKED), + /* info_val == lower 32 bits of affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_AFFINITY_CURRENT), + /* info_val == lower 32 bits of affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_CORES_FAILED), + /* info_val == lower 32 bits of affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_INUSE_FAILED), + /* info_val == lower 32 bits of rechecked affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED), + /* info_val == lower 32 bits of rechecked affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED), + /* info_val == lower 32 bits of affinity */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CORE_REF_AFFINITY_WOULD_VIOLATE), + /* info_val == the ctx attribute now on ctx */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_CTX), + /* info_val == the ctx attribute now on runpool */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_ON_RUNPOOL), + /* info_val == the ctx attribute now off ctx */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_CTX), + /* info_val == the ctx attribute now off runpool */ + KBASE_TRACE_CODE_MAKE_CODE(JS_CTX_ATTR_NOW_OFF_RUNPOOL), +/* + * Scheduler Policy events + */ + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_INIT_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TERM_CTX), + /* info_val == whether it was evicted */ + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TRY_EVICT_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_FOREACH_CTX_JOBS), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_HEAD_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_ADD_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_RUNPOOL_REMOVE_CTX), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_DEQUEUE_JOB_IRQ), + /* gpu_addr==JS_HEAD to write if the job were run */ + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_ENQUEUE_JOB), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_START), + KBASE_TRACE_CODE_MAKE_CODE(JS_POLICY_TIMER_END), +/* + * Power Management Events + */ + KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERING_UP), + KBASE_TRACE_CODE_MAKE_CODE(PM_JOB_SUBMIT_AFTER_POWERED_UP), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWRON_L2), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_PWROFF_L2), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_POWERED_L2), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE), + KBASE_TRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER), + /* PM_DESIRED_REACHED: gpu_addr == pm.gpu_in_desired_state */ + KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED), + KBASE_TRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED_TILER), + KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_INUSE), + KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_INUSE), + KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_SHADER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_REGISTER_CHANGE_TILER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_SHADER_INUSE), + KBASE_TRACE_CODE_MAKE_CODE(PM_RELEASE_CHANGE_TILER_INUSE), + KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_SHADER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_UNREQUEST_CHANGE_TILER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_SHADER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_REQUEST_CHANGE_TILER_NEEDED), + KBASE_TRACE_CODE_MAKE_CODE(PM_WAKE_WAITERS), + KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_ACTIVE), + KBASE_TRACE_CODE_MAKE_CODE(PM_CONTEXT_IDLE), + KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_ON), + KBASE_TRACE_CODE_MAKE_CODE(PM_GPU_OFF), + /* info_val == policy number, or -1 for "Already changing" */ + KBASE_TRACE_CODE_MAKE_CODE(PM_SET_POLICY), + KBASE_TRACE_CODE_MAKE_CODE(PM_CA_SET_POLICY), + /* info_val == policy number */ + KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_INIT), + /* info_val == policy number */ + KBASE_TRACE_CODE_MAKE_CODE(PM_CURRENT_POLICY_TERM), +/* Unused code just to make it easier to not have a comma at the end. + * All other codes MUST come before this */ + KBASE_TRACE_CODE_MAKE_CODE(DUMMY) + +#if 0 /* Dummy section to avoid breaking formatting */ +}; +#endif + +/* ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c new file mode 100644 index 00000000000..aac9858875a --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.c @@ -0,0 +1,232 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include <mali_kbase.h> +#include <mali_kbase_jm.h> +#include <mali_kbase_hwaccess_jm.h> + +#define CREATE_TRACE_POINTS + +#ifdef CONFIG_MALI_TRACE_TIMELINE +#include "mali_timeline.h" + +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atoms_in_flight); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_atom); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_active); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_slot_action); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_gpu_power_active); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_l2_power_active); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_event); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_slot_atom); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_pm_checktrans); +EXPORT_TRACEPOINT_SYMBOL_GPL(mali_timeline_context_active); + +struct kbase_trace_timeline_desc { + char *enum_str; + char *desc; + char *format; + char *format_desc; +}; + +static struct kbase_trace_timeline_desc kbase_trace_timeline_desc_table[] = { + #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) { #enum_val, desc, format, format_desc } + #include "mali_kbase_trace_timeline_defs.h" + #undef KBASE_TIMELINE_TRACE_CODE +}; + +#define KBASE_NR_TRACE_CODES ARRAY_SIZE(kbase_trace_timeline_desc_table) + +static void *kbasep_trace_timeline_seq_start(struct seq_file *s, loff_t *pos) +{ + if (*pos >= KBASE_NR_TRACE_CODES) + return NULL; + + return &kbase_trace_timeline_desc_table[*pos]; +} + +static void kbasep_trace_timeline_seq_stop(struct seq_file *s, void *data) +{ +} + +static void *kbasep_trace_timeline_seq_next(struct seq_file *s, void *data, loff_t *pos) +{ + (*pos)++; + + if (*pos == KBASE_NR_TRACE_CODES) + return NULL; + + return &kbase_trace_timeline_desc_table[*pos]; +} + +static int kbasep_trace_timeline_seq_show(struct seq_file *s, void *data) +{ + struct kbase_trace_timeline_desc *trace_desc = data; + + seq_printf(s, "%s#%s#%s#%s\n", trace_desc->enum_str, trace_desc->desc, trace_desc->format, trace_desc->format_desc); + return 0; +} + + +static const struct seq_operations kbasep_trace_timeline_seq_ops = { + .start = kbasep_trace_timeline_seq_start, + .next = kbasep_trace_timeline_seq_next, + .stop = kbasep_trace_timeline_seq_stop, + .show = kbasep_trace_timeline_seq_show, +}; + +static int kbasep_trace_timeline_debugfs_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &kbasep_trace_timeline_seq_ops); +} + +static const struct file_operations kbasep_trace_timeline_debugfs_fops = { + .open = kbasep_trace_timeline_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev) +{ + debugfs_create_file("mali_timeline_defs", + S_IRUGO, kbdev->mali_debugfs_directory, NULL, + &kbasep_trace_timeline_debugfs_fops); +} + +void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if (kbdev->timeline.slot_atoms_submitted[js] > 0) { + KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 1); + } else { + base_atom_id atom_number = kbase_jd_atom_id(kctx, katom); + + KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 1); + KBASE_TIMELINE_JOB_START(kctx, js, atom_number); + } + ++kbdev->timeline.slot_atoms_submitted[js]; + + KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]); +} + +void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js, + kbasep_js_atom_done_code done_code) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); + + if (done_code & KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT) { + KBASE_TIMELINE_JOB_START_NEXT(kctx, js, 0); + } else { + /* Job finished in JS_HEAD */ + base_atom_id atom_number = kbase_jd_atom_id(kctx, katom); + + KBASE_TIMELINE_JOB_START_HEAD(kctx, js, 0); + KBASE_TIMELINE_JOB_STOP(kctx, js, atom_number); + + /* see if we need to trace the job in JS_NEXT moving to JS_HEAD */ + if (kbase_backend_nr_atoms_submitted(kbdev, js)) { + struct kbase_jd_atom *next_katom; + struct kbase_context *next_kctx; + + /* Peek the next atom - note that the atom in JS_HEAD will already + * have been dequeued */ + next_katom = kbase_backend_inspect_head(kbdev, js); + WARN_ON(!next_katom); + next_kctx = next_katom->kctx; + KBASE_TIMELINE_JOB_START_NEXT(next_kctx, js, 0); + KBASE_TIMELINE_JOB_START_HEAD(next_kctx, js, 1); + KBASE_TIMELINE_JOB_START(next_kctx, js, kbase_jd_atom_id(next_kctx, next_katom)); + } + } + + --kbdev->timeline.slot_atoms_submitted[js]; + + KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, kbdev->timeline.slot_atoms_submitted[js]); +} + +void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent) +{ + int uid = 0; + int old_uid; + + /* If a producer already exists for the event, try to use their UID (multiple-producers) */ + uid = atomic_read(&kbdev->timeline.pm_event_uid[event_sent]); + old_uid = uid; + + /* Get a new non-zero UID if we don't have one yet */ + while (!uid) + uid = atomic_inc_return(&kbdev->timeline.pm_event_uid_counter); + + /* Try to use this UID */ + if (old_uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event_sent], old_uid, uid)) + /* If it changed, raced with another producer: we've lost this UID */ + uid = 0; + + KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_sent, uid); +} + +void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) +{ + int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]); + + if (uid != 0) { + if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0)) + /* If it changed, raced with another consumer: we've lost this UID */ + uid = 0; + + KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid); + } +} + +void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) +{ + int uid = atomic_read(&kbdev->timeline.pm_event_uid[event]); + + if (uid != atomic_cmpxchg(&kbdev->timeline.pm_event_uid[event], uid, 0)) + /* If it changed, raced with another consumer: we've lost this UID */ + uid = 0; + + KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event, uid); +} + +void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.power_change_lock); + /* Simply log the start of the transition */ + kbdev->timeline.l2_transitioning = true; + KBASE_TIMELINE_POWERING_L2(kbdev); +} + +void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev) +{ + lockdep_assert_held(&kbdev->pm.power_change_lock); + /* Simply log the end of the transition */ + if (kbdev->timeline.l2_transitioning) { + kbdev->timeline.l2_transitioning = false; + KBASE_TIMELINE_POWERED_L2(kbdev); + } +} + +#endif /* CONFIG_MALI_TRACE_TIMELINE */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h new file mode 100644 index 00000000000..6a3cd407e4a --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline.h @@ -0,0 +1,356 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#if !defined(_KBASE_TRACE_TIMELINE_H) +#define _KBASE_TRACE_TIMELINE_H + +#ifdef CONFIG_MALI_TRACE_TIMELINE + +enum kbase_trace_timeline_code { + #define KBASE_TIMELINE_TRACE_CODE(enum_val, desc, format, format_desc) enum_val + #include "mali_kbase_trace_timeline_defs.h" + #undef KBASE_TIMELINE_TRACE_CODE +}; + +/** Initialize Timeline DebugFS entries */ +void kbasep_trace_timeline_debugfs_init(struct kbase_device *kbdev); + +/* mali_timeline.h defines kernel tracepoints used by the KBASE_TIMELINE + * functions. + * Output is timestamped by either sched_clock() (default), local_clock(), or + * cpu_clock(), depending on /sys/kernel/debug/tracing/trace_clock */ +#include "mali_timeline.h" + +/* Trace number of atoms in flight for kctx (atoms either not completed, or in + process of being returned to user */ +#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_atoms_in_flight(ts.tv_sec, ts.tv_nsec, \ + (int)kctx->timeline.owner_tgid, \ + count); \ + } while (0) + +/* Trace atom_id being Ready to Run */ +#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_atom(ts.tv_sec, ts.tv_nsec, \ + CTX_FLOW_ATOM_READY, \ + (int)kctx->timeline.owner_tgid, \ + atom_id); \ + } while (0) + +/* Trace number of atoms submitted to job slot js + * + * NOTE: This uses a different tracepoint to the head/next/soft-stop actions, + * so that those actions can be filtered out separately from this + * + * This is because this is more useful, as we can use it to calculate general + * utilization easily and accurately */ +#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_gpu_slot_active(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_SLOT_ACTIVE, \ + (int)kctx->timeline.owner_tgid, \ + js, count); \ + } while (0) + + +/* Trace atoms present in JS_NEXT */ +#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_SLOT_NEXT, \ + (int)kctx->timeline.owner_tgid, \ + js, count); \ + } while (0) + +/* Trace atoms present in JS_HEAD */ +#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_SLOT_HEAD, \ + (int)kctx->timeline.owner_tgid, \ + js, count); \ + } while (0) + +/* Trace that a soft stop/evict from next is being attempted on a slot */ +#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_gpu_slot_action(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_SLOT_STOPPING, \ + (kctx) ? (int)kctx->timeline.owner_tgid : 0, \ + js, count); \ + } while (0) + + + +/* Trace state of overall GPU power */ +#define KBASE_TIMELINE_GPU_POWER(kbdev, active) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_POWER_ACTIVE, active); \ + } while (0) + +/* Trace state of tiler power */ +#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_POWER_TILER_ACTIVE, \ + hweight64(bitmap)); \ + } while (0) + +/* Trace number of shaders currently powered */ +#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_POWER_SHADER_ACTIVE, \ + hweight64(bitmap)); \ + } while (0) + +/* Trace state of L2 power */ +#define KBASE_TIMELINE_POWER_L2(kbdev, bitmap) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_gpu_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_SET_GPU_POWER_L2_ACTIVE, \ + hweight64(bitmap)); \ + } while (0) + +/* Trace state of L2 cache*/ +#define KBASE_TIMELINE_POWERING_L2(kbdev) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_FLOW_GPU_POWER_L2_POWERING, \ + 1); \ + } while (0) + +#define KBASE_TIMELINE_POWERED_L2(kbdev) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_l2_power_active(ts.tv_sec, ts.tv_nsec, \ + SW_FLOW_GPU_POWER_L2_ACTIVE, \ + 1); \ + } while (0) + +/* Trace kbase_pm_send_event message send */ +#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \ + SW_FLOW_PM_SEND_EVENT, \ + event_type, pm_event_id); \ + } while (0) + +/* Trace kbase_pm_worker message receive */ +#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_pm_event(ts.tv_sec, ts.tv_nsec, \ + SW_FLOW_PM_HANDLE_EVENT, \ + event_type, pm_event_id); \ + } while (0) + + +/* Trace atom_id starting in JS_HEAD */ +#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \ + HW_START_GPU_JOB_CHAIN_SW_APPROX, \ + (int)kctx->timeline.owner_tgid, \ + js, _consumerof_atom_number); \ + } while (0) + +/* Trace atom_id stopping on JS_HEAD */ +#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_slot_atom(ts.tv_sec, ts.tv_nsec, \ + HW_STOP_GPU_JOB_CHAIN_SW_APPROX, \ + (int)kctx->timeline.owner_tgid, \ + js, _producerof_atom_number_completed); \ + } while (0) + +/** Trace beginning/end of a call to kbase_pm_check_transitions_nolock from a + * certin caller */ +#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_pm_checktrans(ts.tv_sec, ts.tv_nsec, \ + trace_code, 1); \ + } while (0) + +/* Trace number of contexts active */ +#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) \ + do { \ + struct timespec ts; \ + getrawmonotonic(&ts); \ + trace_mali_timeline_context_active(ts.tv_sec, ts.tv_nsec, \ + count); \ + } while (0) + + +/* NOTE: kbase_timeline_pm_cores_func() is in mali_kbase_pm_policy.c */ + +/** + * Trace that an atom is starting on a job slot + * + * The caller must be holding kbasep_js_device_data::runpool_irq::lock + */ +void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js); + +/** + * Trace that an atom has done on a job slot + * + * 'Done' in this sense can occur either because: + * - the atom in JS_HEAD finished + * - the atom in JS_NEXT was evicted + * + * Whether the atom finished or was evicted is passed in @a done_code + * + * It is assumed that the atom has already been removed from the submit slot, + * with either: + * - kbasep_jm_dequeue_submit_slot() + * - kbasep_jm_dequeue_tail_submit_slot() + * + * The caller must be holding kbasep_js_device_data::runpool_irq::lock + */ +void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js, + kbasep_js_atom_done_code done_code); + + +/** Trace a pm event starting */ +void kbase_timeline_pm_send_event(struct kbase_device *kbdev, + enum kbase_timeline_pm_event event_sent); + +/** Trace a pm event finishing */ +void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event); + +/** Check whether a pm event was present, and if so trace finishing it */ +void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event); + +/** Trace L2 power-up start */ +void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev); + +/** Trace L2 power-up done */ +void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev); + +#else + +#define KBASE_TIMELINE_ATOMS_IN_FLIGHT(kctx, count) CSTD_NOP() + +#define KBASE_TIMELINE_ATOM_READY(kctx, atom_id) CSTD_NOP() + +#define KBASE_TIMELINE_ATOMS_SUBMITTED(kctx, js, count) CSTD_NOP() + +#define KBASE_TIMELINE_JOB_START_NEXT(kctx, js, count) CSTD_NOP() + +#define KBASE_TIMELINE_JOB_START_HEAD(kctx, js, count) CSTD_NOP() + +#define KBASE_TIMELINE_TRY_SOFT_STOP(kctx, js, count) CSTD_NOP() + +#define KBASE_TIMELINE_GPU_POWER(kbdev, active) CSTD_NOP() + +#define KBASE_TIMELINE_POWER_TILER(kbdev, bitmap) CSTD_NOP() + +#define KBASE_TIMELINE_POWER_SHADER(kbdev, bitmap) CSTD_NOP() + +#define KBASE_TIMELINE_POWER_L2(kbdev, active) CSTD_NOP() + +#define KBASE_TIMELINE_POWERING_L2(kbdev) CSTD_NOP() + +#define KBASE_TIMELINE_POWERED_L2(kbdev) CSTD_NOP() + +#define KBASE_TIMELINE_PM_SEND_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP() + +#define KBASE_TIMELINE_PM_HANDLE_EVENT(kbdev, event_type, pm_event_id) CSTD_NOP() + +#define KBASE_TIMELINE_JOB_START(kctx, js, _consumerof_atom_number) CSTD_NOP() + +#define KBASE_TIMELINE_JOB_STOP(kctx, js, _producerof_atom_number_completed) CSTD_NOP() + +#define KBASE_TIMELINE_PM_CHECKTRANS(kbdev, trace_code) CSTD_NOP() + +#define KBASE_TIMELINE_CONTEXT_ACTIVE(kbdev, count) CSTD_NOP() + +static inline void kbase_timeline_job_slot_submit(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); +} + +static inline void kbase_timeline_job_slot_done(struct kbase_device *kbdev, struct kbase_context *kctx, + struct kbase_jd_atom *katom, int js, + kbasep_js_atom_done_code done_code) +{ + lockdep_assert_held(&kbdev->js_data.runpool_irq.lock); +} + +static inline void kbase_timeline_pm_send_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event_sent) +{ +} + +static inline void kbase_timeline_pm_check_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) +{ +} + +static inline void kbase_timeline_pm_handle_event(struct kbase_device *kbdev, enum kbase_timeline_pm_event event) +{ +} + +static inline void kbase_timeline_pm_l2_transition_start(struct kbase_device *kbdev) +{ +} + +static inline void kbase_timeline_pm_l2_transition_done(struct kbase_device *kbdev) +{ +} +#endif /* CONFIG_MALI_TRACE_TIMELINE */ + +#endif /* _KBASE_TRACE_TIMELINE_H */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h new file mode 100644 index 00000000000..a7dc3faf4c7 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_trace_timeline_defs.h @@ -0,0 +1,134 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/* ***** IMPORTANT: THIS IS NOT A NORMAL HEADER FILE ***** + * ***** DO NOT INCLUDE DIRECTLY ***** + * ***** THE LACK OF HEADER GUARDS IS INTENTIONAL ***** */ + +/* + * Conventions on Event Names: + * + * - The prefix determines something about how the timeline should be + * displayed, and is split up into various parts, separated by underscores: + * - 'SW' and 'HW' as the first part will be used to determine whether a + * timeline is to do with Software or Hardware - effectively, separate + * 'channels' for Software and Hardware + * - 'START', 'STOP', 'ENTER', 'LEAVE' can be used in the second part, and + * signify related pairs of events - these are optional. + * - 'FLOW' indicates a generic event, which can use dependencies + * - This gives events such as: + * - 'SW_ENTER_FOO' + * - 'SW_LEAVE_FOO' + * - 'SW_FLOW_BAR_1' + * - 'SW_FLOW_BAR_2' + * - 'HW_START_BAZ' + * - 'HW_STOP_BAZ' + * - And an unadorned HW event: + * - 'HW_BAZ_FROZBOZ' + */ + +/* + * Conventions on parameter names: + * - anything with 'instance' in the name will have a separate timeline based + * on that instances. + * - underscored-prefixed parameters will by hidden by default on timelines + * + * Hence: + * - Different job slots have their own 'instance', based on the instance value + * - Per-context info (e.g. atoms on a context) have their own 'instance' + * (i.e. each context should be on a different timeline) + * + * Note that globally-shared resources can be tagged with a tgid, but we don't + * want an instance per context: + * - There's no point having separate Job Slot timelines for each context, that + * would be confusing - there's only really 3 job slots! + * - There's no point having separate Shader-powered timelines for each + * context, that would be confusing - all shader cores (whether it be 4, 8, + * etc) are shared in the system. + */ + + /* + * CTX events + */ + /* Separate timelines for each context 'instance'*/ + KBASE_TIMELINE_TRACE_CODE(CTX_SET_NR_ATOMS_IN_FLIGHT, "CTX: Atoms in flight", "%d,%d", "_instance_tgid,_value_number_of_atoms"), + KBASE_TIMELINE_TRACE_CODE(CTX_FLOW_ATOM_READY, "CTX: Atoms Ready to Run", "%d,%d,%d", "_instance_tgid,_consumerof_atom_number,_producerof_atom_number_ready"), + + /* + * SW Events + */ + /* Separate timelines for each slot 'instance' */ + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_ACTIVE, "SW: GPU slot active", "%d,%d,%d", "_tgid,_instance_slot,_value_number_of_atoms"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_NEXT, "SW: GPU atom in NEXT", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_next"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_HEAD, "SW: GPU atom in HEAD", "%d,%d,%d", "_tgid,_instance_slot,_value_is_an_atom_in_head"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_SLOT_STOPPING, "SW: Try Soft-Stop on GPU slot", "%d,%d,%d", "_tgid,_instance_slot,_value_is_slot_stopping"), + /* Shader and overall power is shared - can't have separate instances of + * it, just tagging with the context */ + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_ACTIVE, "SW: GPU power active", "%d,%d", "_tgid,_value_is_power_active"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_TILER_ACTIVE, "SW: GPU tiler powered", "%d,%d", "_tgid,_value_number_of_tilers"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_SHADER_ACTIVE, "SW: GPU shaders powered", "%d,%d", "_tgid,_value_number_of_shaders"), + KBASE_TIMELINE_TRACE_CODE(SW_SET_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powered", "%d,%d", "_tgid,_value_number_of_l2"), + + /* SW Power event messaging. _event_type is one from the kbase_pm_event enum */ + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_SEND_EVENT, "SW: PM Send Event", "%d,%d,%d", "_tgid,_event_type,_writerof_pm_event_id"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_HANDLE_EVENT, "SW: PM Handle Event", "%d,%d,%d", "_tgid,_event_type,_finalconsumerof_pm_event_id"), + /* SW L2 power events */ + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_POWERING, "SW: GPU L2 powering", "%d,%d", "_tgid,_writerof_l2_transitioning"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_GPU_POWER_L2_ACTIVE, "SW: GPU L2 powering done", "%d,%d", "_tgid,_finalconsumerof_l2_transitioning"), + + KBASE_TIMELINE_TRACE_CODE(SW_SET_CONTEXT_ACTIVE, "SW: Context Active", "%d,%d", "_tgid,_value_active"), + + /* + * BEGIN: Significant SW Functions that call kbase_pm_check_transitions_nolock() + */ + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_START, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweroff"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWEROFF_END, "SW: PM CheckTrans from kbase_pm_do_poweroff", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweroff"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_START, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_poweron"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_DO_POWERON_END, "SW: PM CheckTrans from kbase_pm_do_poweron", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_poweron"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_START, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_writerof_pm_checktrans_gpu_interrupt"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_GPU_INTERRUPT_END, "SW: PM CheckTrans from kbase_gpu_interrupt", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_gpu_interrupt"), + + /* + * Significant Indirect callers of kbase_pm_check_transitions_nolock() + */ + /* kbase_pm_request_cores */ + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_request_cores_shader_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_REQUEST_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_request_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_request_cores_shader_tiler"), + /* kbase_pm_release_cores */ + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_START, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_release_cores_shader_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_SHADER_TILER_END, "SW: PM CheckTrans from kbase_pm_release_cores(shader+tiler)", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_release_cores_shader_tiler"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_START, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_writerof_pm_checktrans_pm_do_shader_poweroff_callback"), + KBASE_TIMELINE_TRACE_CODE(SW_FLOW_PM_CHECKTRANS_PM_RELEASE_CORES_DEFERRED_END, "SW: PM CheckTrans from kbasep_pm_do_shader_poweroff_callback", "%d,%d", "_tgid,_finalconsumerof_pm_checktrans_pm_do_shader_poweroff_callback"), + /* + * END: SW Functions that call kbase_pm_check_transitions_nolock() + */ + + /* + * HW Events + */ + KBASE_TIMELINE_TRACE_CODE(HW_START_GPU_JOB_CHAIN_SW_APPROX, "HW: Job Chain start (SW approximated)", "%d,%d,%d", "_tgid,job_slot,_consumerof_atom_number_ready"), + KBASE_TIMELINE_TRACE_CODE(HW_STOP_GPU_JOB_CHAIN_SW_APPROX, "HW: Job Chain stop (SW approximated)", "%d,%d,%d", "_tgid,job_slot,_producerof_atom_number_completed") diff --git a/drivers/gpu/arm/midgard/mali_kbase_uku.h b/drivers/gpu/arm/midgard/mali_kbase_uku.h new file mode 100644 index 00000000000..5139014ec2c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_uku.h @@ -0,0 +1,561 @@ +/* + * + * (C) COPYRIGHT 2008-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_UKU_H_ +#define _KBASE_UKU_H_ + +#include "mali_uk.h" +#include "mali_base_kernel.h" + +/* This file needs to support being included from kernel and userside (which use different defines) */ +#if defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON +#define SUPPORT_MALI_ERROR_INJECT +#endif /* defined(CONFIG_MALI_ERROR_INJECT) || MALI_ERROR_INJECT_ON */ +#if defined(CONFIG_MALI_NO_MALI) +#define SUPPORT_MALI_NO_MALI +#elif defined(MALI_NO_MALI) +#if MALI_NO_MALI +#define SUPPORT_MALI_NO_MALI +#endif +#endif + +#if defined(SUPPORT_MALI_NO_MALI) || defined(SUPPORT_MALI_ERROR_INJECT) +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif + +#include "mali_kbase_gpuprops_types.h" + +#define BASE_UK_VERSION_MAJOR 9 +#define BASE_UK_VERSION_MINOR 0 + +struct kbase_uk_mem_alloc { + union uk_header header; + /* IN */ + u64 va_pages; + u64 commit_pages; + u64 extent; + /* IN/OUT */ + u64 flags; + /* OUT */ + u64 gpu_va; + u16 va_alignment; + u8 padding[6]; +}; + +struct kbase_uk_mem_free { + union uk_header header; + /* IN */ + u64 gpu_addr; + /* OUT */ +}; + +struct kbase_uk_mem_alias { + union uk_header header; + /* IN/OUT */ + u64 flags; + /* IN */ + u64 stride; + u64 nents; + union kbase_pointer ai; + /* OUT */ + u64 gpu_va; + u64 va_pages; +}; + +struct kbase_uk_mem_import { + union uk_header header; + /* IN */ + union kbase_pointer phandle; + u32 type; + u32 padding; + /* IN/OUT */ + u64 flags; + /* OUT */ + u64 gpu_va; + u64 va_pages; +}; + +struct kbase_uk_mem_flags_change { + union uk_header header; + /* IN */ + u64 gpu_va; + u64 flags; + u64 mask; +}; + +struct kbase_uk_job_submit { + union uk_header header; + /* IN */ + union kbase_pointer addr; + u32 nr_atoms; + u32 stride; /* bytes between atoms, i.e. sizeof(base_jd_atom_v2) */ + /* OUT */ +}; + +struct kbase_uk_post_term { + union uk_header header; +}; + +struct kbase_uk_dump_fault_term { + union uk_header header; +}; + +struct kbase_uk_sync_now { + union uk_header header; + + /* IN */ + struct base_syncset sset; + + /* OUT */ +}; + +struct kbase_uk_hwcnt_setup { + union uk_header header; + + /* IN */ + u64 dump_buffer; + u32 jm_bm; + u32 shader_bm; + u32 tiler_bm; + u32 unused_1; /* keep for backwards compatibility */ + u32 mmu_l2_bm; + u32 padding; + /* OUT */ +}; + +struct kbase_uk_hwcnt_dump { + union uk_header header; +}; + +struct kbase_uk_hwcnt_clear { + union uk_header header; +}; + +struct kbase_uk_fence_validate { + union uk_header header; + /* IN */ + s32 fd; + u32 padding; + /* OUT */ +}; + +struct kbase_uk_stream_create { + union uk_header header; + /* IN */ + char name[32]; + /* OUT */ + s32 fd; + u32 padding; +}; + +#ifdef BASE_LEGACY_UK7_SUPPORT +/** + * This structure is kept for the backward compatibility reasons. + * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE + * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call + * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having + * the function for reading cpu properties moved from base to osu. + */ +#define BASE_CPU_PROPERTY_FLAG_LITTLE_ENDIAN ((u32)0x00000001) +struct base_cpu_id_props { + /** + * CPU ID + */ + u32 id; + + /** + * CPU Part number + */ + u16 part; + /** + * ASCII code of implementer trademark + */ + u8 implementer; + + /** + * CPU Variant + */ + u8 variant; + /** + * CPU Architecture + */ + u8 arch; + + /** + * CPU revision + */ + u8 rev; + + /** + * Validity of CPU id where 0-invalid and + * 1-valid only if ALL the cpu_id props are valid + */ + u8 valid; + + u8 padding[1]; +}; + +/** + * This structure is kept for the backward compatibility reasons. + * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE + * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call + * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having + * the function for reading cpu properties moved from base to osu. + */ +struct base_cpu_props { + u32 nr_cores; /**< Number of CPU cores */ + + /** + * CPU page size as a Logarithm to Base 2. The compile-time + * equivalent is @ref OSU_CONFIG_CPU_PAGE_SIZE_LOG2 + */ + u32 cpu_page_size_log2; + + /** + * CPU L1 Data cache line size as a Logarithm to Base 2. The compile-time + * equivalent is @ref OSU_CONFIG_CPU_L1_DCACHE_LINE_SIZE_LOG2. + */ + u32 cpu_l1_dcache_line_size_log2; + + /** + * CPU L1 Data cache size, in bytes. The compile-time equivalient is + * @ref OSU_CONFIG_CPU_L1_DCACHE_SIZE. + * + * This CPU Property is mainly provided to implement OpenCL's + * clGetDeviceInfo(), which allows the CL_DEVICE_GLOBAL_MEM_CACHE_SIZE + * hint to be queried. + */ + u32 cpu_l1_dcache_size; + + /** + * CPU Property Flags bitpattern. + * + * This is a combination of bits as specified by the macros prefixed with + * 'BASE_CPU_PROPERTY_FLAG_'. + */ + u32 cpu_flags; + + /** + * Maximum clock speed in MHz. + * @usecase 'Maximum' CPU Clock Speed information is required by OpenCL's + * clGetDeviceInfo() function for the CL_DEVICE_MAX_CLOCK_FREQUENCY hint. + */ + u32 max_cpu_clock_speed_mhz; + + /** + * @brief Total memory, in bytes. + * + * This is the theoretical maximum memory available to the CPU. It is + * unlikely that a client will be able to allocate all of this memory for + * their own purposes, but this at least provides an upper bound on the + * memory available to the CPU. + * + * This is required for OpenCL's clGetDeviceInfo() call when + * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL CPU devices. + */ + u64 available_memory_size; + + /** + * CPU ID detailed info + */ + struct base_cpu_id_props cpu_id; + + u32 padding; +}; + +/** + * This structure is kept for the backward compatibility reasons. + * It shall be removed as soon as KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE + * (previously KBASE_FUNC_CPU_PROPS_REG_DUMP) ioctl call + * is removed. Removal of KBASE_FUNC_CPU_PROPS_REG_DUMP is part of having + * the function for reading cpu properties moved from base to osu. + */ +struct kbase_uk_cpuprops { + union uk_header header; + + /* IN */ + struct base_cpu_props props; + /* OUT */ +}; +#endif /* BASE_LEGACY_UK7_SUPPORT */ + +struct kbase_uk_gpuprops { + union uk_header header; + + /* IN */ + struct mali_base_gpu_props props; + /* OUT */ +}; + +struct kbase_uk_mem_query { + union uk_header header; + /* IN */ + u64 gpu_addr; +#define KBASE_MEM_QUERY_COMMIT_SIZE 1 +#define KBASE_MEM_QUERY_VA_SIZE 2 +#define KBASE_MEM_QUERY_FLAGS 3 + u64 query; + /* OUT */ + u64 value; +}; + +struct kbase_uk_mem_commit { + union uk_header header; + /* IN */ + u64 gpu_addr; + u64 pages; + /* OUT */ + u32 result_subcode; + u32 padding; +}; + +struct kbase_uk_find_cpu_offset { + union uk_header header; + /* IN */ + u64 gpu_addr; + u64 cpu_addr; + u64 size; + /* OUT */ + u64 offset; +}; + +#define KBASE_GET_VERSION_BUFFER_SIZE 64 +struct kbase_uk_get_ddk_version { + union uk_header header; + /* OUT */ + char version_buffer[KBASE_GET_VERSION_BUFFER_SIZE]; + u32 version_string_size; + u32 padding; +}; + +struct kbase_uk_disjoint_query { + union uk_header header; + /* OUT */ + u32 counter; + u32 padding; +}; + +struct kbase_uk_set_flags { + union uk_header header; + /* IN */ + u32 create_flags; + u32 padding; +}; + +#if MALI_UNIT_TEST +#define TEST_ADDR_COUNT 4 +#define KBASE_TEST_BUFFER_SIZE 128 +struct kbase_exported_test_data { + u64 test_addr[TEST_ADDR_COUNT]; /**< memory address */ + u32 test_addr_pages[TEST_ADDR_COUNT]; /**< memory size in pages */ + union kbase_pointer kctx; /**< base context created by process */ + union kbase_pointer mm; /**< pointer to process address space */ + u8 buffer1[KBASE_TEST_BUFFER_SIZE]; /**< unit test defined parameter */ + u8 buffer2[KBASE_TEST_BUFFER_SIZE]; /**< unit test defined parameter */ +}; + +struct kbase_uk_set_test_data { + union uk_header header; + /* IN */ + struct kbase_exported_test_data test_data; +}; + +#endif /* MALI_UNIT_TEST */ + +#ifdef SUPPORT_MALI_ERROR_INJECT +struct kbase_uk_error_params { + union uk_header header; + /* IN */ + struct kbase_error_params params; +}; +#endif /* SUPPORT_MALI_ERROR_INJECT */ + +#ifdef SUPPORT_MALI_NO_MALI +struct kbase_uk_model_control_params { + union uk_header header; + /* IN */ + struct kbase_model_control_params params; +}; +#endif /* SUPPORT_MALI_NO_MALI */ + +#define KBASE_MAXIMUM_EXT_RESOURCES 255 + +struct kbase_uk_ext_buff_kds_data { + union uk_header header; + union kbase_pointer external_resource; + union kbase_pointer file_descriptor; + u32 num_res; /* limited to KBASE_MAXIMUM_EXT_RESOURCES */ + u32 padding; +}; + +struct kbase_uk_profiling_controls { + union uk_header header; + u32 profiling_controls[FBDUMP_CONTROL_MAX]; +}; + +struct kbase_uk_debugfs_mem_profile_add { + union uk_header header; + u32 len; + union kbase_pointer buf; +}; + +struct kbase_uk_context_id { + union uk_header header; + /* OUT */ + int id; +}; + +#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ + defined(CONFIG_MALI_MIPE_ENABLED) +/** + * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure + * @header: UK structure header + * @fd: timeline stream file descriptor + * + * This structure is used used when performing a call to acquire kernel side + * timeline stream file descriptor. + */ +struct kbase_uk_tlstream_acquire { + union uk_header header; + /* IN */ + /* OUT */ + s32 fd; +}; + +/** + * struct kbase_uk_tlstream_flush - User/Kernel space data exchange structure + * @header: UK structure header + * + * This structure is used when performing a call to flush kernel side + * timeline streams. + */ +struct kbase_uk_tlstream_flush { + union uk_header header; + /* IN */ + /* OUT */ +}; + +#if MALI_UNIT_TEST +/** + * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure + * @header: UK structure header + * @tpw_count: number of trace point writers in each context + * @msg_delay: time delay between tracepoints from one writer in milliseconds + * @msg_count: number of trace points written by one writer + * @aux_msg: if non-zero aux messages will be included + * + * This structure is used when performing a call to start timeline stream test + * embedded in kernel. + */ +struct kbase_uk_tlstream_test { + union uk_header header; + /* IN */ + u32 tpw_count; + u32 msg_delay; + u32 msg_count; + u32 aux_msg; + /* OUT */ +}; + +/** + * struct kbase_uk_tlstream_acquire - User/Kernel space data exchange structure + * @header: UK structure header + * @bytes_collected: number of bytes read by user + * @bytes_generated: number of bytes generated by tracepoints + * + * This structure is used when performing a call to obtain timeline stream + * statistics. + */ +struct kbase_uk_tlstream_stats { + union uk_header header; /**< UK structure header. */ + /* IN */ + /* OUT */ + u32 bytes_collected; + u32 bytes_generated; +}; +#endif /* MALI_UNIT_TEST */ +#endif /* MALI_KTLSTREAM_ENABLED */ + +enum kbase_uk_function_id { + KBASE_FUNC_MEM_ALLOC = (UK_FUNC_ID + 0), + KBASE_FUNC_MEM_IMPORT = (UK_FUNC_ID + 1), + KBASE_FUNC_MEM_COMMIT = (UK_FUNC_ID + 2), + KBASE_FUNC_MEM_QUERY = (UK_FUNC_ID + 3), + KBASE_FUNC_MEM_FREE = (UK_FUNC_ID + 4), + KBASE_FUNC_MEM_FLAGS_CHANGE = (UK_FUNC_ID + 5), + KBASE_FUNC_MEM_ALIAS = (UK_FUNC_ID + 6), + +#ifdef BASE_LEGACY_UK6_SUPPORT + KBASE_FUNC_JOB_SUBMIT_UK6 = (UK_FUNC_ID + 7), +#endif /* BASE_LEGACY_UK6_SUPPORT */ + + KBASE_FUNC_SYNC = (UK_FUNC_ID + 8), + + KBASE_FUNC_POST_TERM = (UK_FUNC_ID + 9), + + KBASE_FUNC_HWCNT_SETUP = (UK_FUNC_ID + 10), + KBASE_FUNC_HWCNT_DUMP = (UK_FUNC_ID + 11), + KBASE_FUNC_HWCNT_CLEAR = (UK_FUNC_ID + 12), + +#ifdef BASE_LEGACY_UK7_SUPPORT + KBASE_FUNC_CPU_PROPS_REG_DUMP_OBSOLETE = (UK_FUNC_ID + 13), +#endif /* BASE_LEGACY_UK7_SUPPORT */ + KBASE_FUNC_GPU_PROPS_REG_DUMP = (UK_FUNC_ID + 14), + + KBASE_FUNC_FIND_CPU_OFFSET = (UK_FUNC_ID + 15), + + KBASE_FUNC_GET_VERSION = (UK_FUNC_ID + 16), + KBASE_FUNC_EXT_BUFFER_LOCK = (UK_FUNC_ID + 17), + KBASE_FUNC_SET_FLAGS = (UK_FUNC_ID + 18), + + KBASE_FUNC_SET_TEST_DATA = (UK_FUNC_ID + 19), + KBASE_FUNC_INJECT_ERROR = (UK_FUNC_ID + 20), + KBASE_FUNC_MODEL_CONTROL = (UK_FUNC_ID + 21), + + KBASE_FUNC_FENCE_VALIDATE = (UK_FUNC_ID + 23), + KBASE_FUNC_STREAM_CREATE = (UK_FUNC_ID + 24), + KBASE_FUNC_GET_PROFILING_CONTROLS = (UK_FUNC_ID + 25), + KBASE_FUNC_SET_PROFILING_CONTROLS = (UK_FUNC_ID + 26), + /* to be used only for testing + * purposes, otherwise these controls + * are set through gator API */ + + KBASE_FUNC_DEBUGFS_MEM_PROFILE_ADD = (UK_FUNC_ID + 27), + KBASE_FUNC_JOB_SUBMIT = (UK_FUNC_ID + 28), + KBASE_FUNC_DISJOINT_QUERY = (UK_FUNC_ID + 29), + + KBASE_FUNC_DUMP_FAULT_TERM = (UK_FUNC_ID + 30), + + KBASE_FUNC_GET_CONTEXT_ID = (UK_FUNC_ID + 31), + +#if (defined(MALI_KTLSTREAM_ENABLED) && MALI_KTLSTREAM_ENABLED) || \ + defined(CONFIG_MALI_MIPE_ENABLED) + KBASE_FUNC_TLSTREAM_ACQUIRE = (UK_FUNC_ID + 32), +#if MALI_UNIT_TEST + KBASE_FUNC_TLSTREAM_TEST = (UK_FUNC_ID + 33), + KBASE_FUNC_TLSTREAM_STATS = (UK_FUNC_ID + 34), +#endif /* MALI_UNIT_TEST */ + KBASE_FUNC_TLSTREAM_FLUSH = (UK_FUNC_ID + 35), +#endif /* MALI_KTLSTREAM_ENABLED */ + + KBASE_FUNC_MAX +}; + +#endif /* _KBASE_UKU_H_ */ + diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.c b/drivers/gpu/arm/midgard/mali_kbase_utility.c new file mode 100644 index 00000000000..be474ff8740 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_utility.c @@ -0,0 +1,33 @@ +/* + * + * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include <mali_kbase.h> + +bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry) +{ + struct list_head *pos = base->next; + + while (pos != base) { + if (pos == entry) + return true; + + pos = pos->next; + } + return false; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_utility.h b/drivers/gpu/arm/midgard/mali_kbase_utility.h new file mode 100644 index 00000000000..fd7252dab0d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_utility.h @@ -0,0 +1,37 @@ +/* + * + * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_UTILITY_H +#define _KBASE_UTILITY_H + +#ifndef _KBASE_H_ +#error "Don't include this file directly, use mali_kbase.h instead" +#endif + +/** Test whether the given list entry is a member of the given list. + * + * @param base The head of the list to be tested + * @param entry The list entry to be tested + * + * @return true if entry is a member of base + * false otherwise + */ +bool kbasep_list_member_of(const struct list_head *base, struct list_head *entry); + +#endif /* _KBASE_UTILITY_H */ diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.c b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c new file mode 100644 index 00000000000..d1c68702e9c --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.c @@ -0,0 +1,485 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase.h> +#include <mali_kbase_mem_linux.h> + +#define NR_CNT_BLOCKS_PER_GROUP 8 +#define NR_CNT_PER_BLOCK 64 +#define NR_BYTES_PER_CNT 4 +#define NR_BYTES_PER_HDR 16 +#define PRFCNT_EN_MASK_OFFSET 0x8 + +struct kbase_vinstr_context { + struct kbase_device *kbdev; + struct kbase_context *kctx; + struct kbase_vmap_struct vmap; + struct mutex lock; + u64 gpu_va; + void *cpu_va; + size_t dump_size; + u32 nclients; + struct list_head clients; +}; + +struct kbase_vinstr_client { + bool kernel; + void *dump_buffer; + u32 bitmap[4]; + void *accum_buffer; + size_t dump_size; + struct list_head list; +}; + +static int map_kernel_dump_buffer(struct kbase_vinstr_context *ctx) +{ + struct kbase_va_region *reg; + struct kbase_context *kctx = ctx->kctx; + u64 flags, nr_pages; + u16 va_align = 0; + + flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR; + ctx->dump_size = kbase_vinstr_dump_size(ctx); + nr_pages = PFN_UP(ctx->dump_size); + + reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, + &ctx->gpu_va, &va_align); + if (!reg) + return -ENOMEM; + + ctx->cpu_va = kbase_vmap(kctx, ctx->gpu_va, ctx->dump_size, &ctx->vmap); + if (!ctx->cpu_va) { + kbase_mem_free(kctx, ctx->gpu_va); + return -ENOMEM; + } + + return 0; +} + +static void unmap_kernel_dump_buffer(struct kbase_vinstr_context *ctx) +{ + struct kbase_context *kctx = ctx->kctx; + + kbase_vunmap(kctx, &ctx->vmap); + kbase_mem_free(kctx, ctx->gpu_va); +} + +static int map_client_accum_buffer(struct kbase_vinstr_context *ctx, + struct kbase_vinstr_client *cli) +{ + cli->dump_size = kbase_vinstr_dump_size(ctx); + cli->accum_buffer = kzalloc(cli->dump_size, GFP_KERNEL); + return !cli->accum_buffer ? -ENOMEM : 0; +} + +static void unmap_client_accum_buffer(struct kbase_vinstr_context *ctx, + struct kbase_vinstr_client *cli) +{ + kfree(cli->accum_buffer); +} + +static int create_vinstr_kctx(struct kbase_vinstr_context *ctx) +{ + struct kbase_uk_hwcnt_setup setup; + int err; + + ctx->kctx = kbase_create_context(ctx->kbdev, true); + if (!ctx->kctx) + return -ENOMEM; + + /* Map the master kernel dump buffer. The HW dumps the counters + * into this memory region. */ + err = map_kernel_dump_buffer(ctx); + if (err) { + kbase_destroy_context(ctx->kctx); + return err; + } + + setup.dump_buffer = ctx->gpu_va; + /* The GPU requires us to disable the prfcnt collection block for + * reprogramming it. This introduces jitter and disrupts existing + * clients. Therefore we enable all of them. */ + setup.jm_bm = 0xffffffff; + setup.tiler_bm = 0xffffffff; + setup.shader_bm = 0xffffffff; + setup.mmu_l2_bm = 0xffffffff; + + err = kbase_instr_hwcnt_enable(ctx->kctx, &setup); + if (err) { + unmap_kernel_dump_buffer(ctx); + kbase_destroy_context(ctx->kctx); + return err; + } + + return 0; +} + +static void destroy_vinstr_kctx(struct kbase_vinstr_context *ctx) +{ + kbase_instr_hwcnt_disable(ctx->kctx); + unmap_kernel_dump_buffer(ctx); + kbase_destroy_context(ctx->kctx); + ctx->kctx = NULL; +} + +struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev) +{ + struct kbase_vinstr_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + INIT_LIST_HEAD(&ctx->clients); + mutex_init(&ctx->lock); + ctx->kbdev = kbdev; + return ctx; +} + +void kbase_vinstr_term(struct kbase_vinstr_context *ctx) +{ + struct kbase_vinstr_client *cli; + + while (!list_empty(&ctx->clients)) { + cli = list_first_entry(&ctx->clients, + struct kbase_vinstr_client, list); + list_del(&cli->list); + unmap_client_accum_buffer(ctx, cli); + kfree(cli); + ctx->nclients--; + } + if (ctx->kctx) + destroy_vinstr_kctx(ctx); + kfree(ctx); +} + +struct kbase_vinstr_client *kbase_vinstr_attach_client(struct kbase_vinstr_context *ctx, + bool kernel, u64 dump_buffer, u32 bitmap[4]) +{ + struct kbase_vinstr_client *cli; + + cli = kmalloc(sizeof(*cli), GFP_KERNEL); + if (!cli) + return NULL; + + cli->kernel = kernel; + cli->dump_buffer = (void *)(uintptr_t)dump_buffer; + cli->bitmap[SHADER_HWCNT_BM] = bitmap[SHADER_HWCNT_BM]; + cli->bitmap[TILER_HWCNT_BM] = bitmap[TILER_HWCNT_BM]; + cli->bitmap[MMU_L2_HWCNT_BM] = bitmap[MMU_L2_HWCNT_BM]; + cli->bitmap[JM_HWCNT_BM] = bitmap[JM_HWCNT_BM]; + + mutex_lock(&ctx->lock); + /* If this is the first client, create the vinstr kbase + * context. This context is permanently resident until the + * last client exits. */ + if (!ctx->nclients) { + if (create_vinstr_kctx(ctx) < 0) { + kfree(cli); + mutex_unlock(&ctx->lock); + return NULL; + } + } + + /* The GPU resets the counter block every time there is a request + * to dump it. We need a per client kernel buffer for accumulating + * the counters. */ + if (map_client_accum_buffer(ctx, cli) < 0) { + kfree(cli); + if (!ctx->nclients) + destroy_vinstr_kctx(ctx); + mutex_unlock(&ctx->lock); + return NULL; + } + + ctx->nclients++; + list_add(&cli->list, &ctx->clients); + mutex_unlock(&ctx->lock); + + return cli; +} + +void kbase_vinstr_detach_client(struct kbase_vinstr_context *ctx, + struct kbase_vinstr_client *cli) +{ + struct kbase_vinstr_client *iter, *tmp; + + mutex_lock(&ctx->lock); + list_for_each_entry_safe(iter, tmp, &ctx->clients, list) { + if (iter == cli) { + list_del(&iter->list); + unmap_client_accum_buffer(ctx, cli); + kfree(iter); + ctx->nclients--; + if (!ctx->nclients) + destroy_vinstr_kctx(ctx); + break; + } + } + mutex_unlock(&ctx->lock); +} + +size_t kbase_vinstr_dump_size(struct kbase_vinstr_context *ctx) +{ + struct kbase_device *kbdev = ctx->kctx->kbdev; + size_t dump_size; + + if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_V4)) { + u32 nr_cg; + + nr_cg = kbdev->gpu_props.num_core_groups; + dump_size = nr_cg * NR_CNT_BLOCKS_PER_GROUP * + NR_CNT_PER_BLOCK * + NR_BYTES_PER_CNT; + } else { + /* assume v5 for now */ + u32 nr_l2, nr_sc; + + nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices; + nr_sc = kbdev->gpu_props.props.coherency_info.group[0].num_cores; + /* JM and tiler counter blocks are always present */ + dump_size = (2 + nr_l2 + nr_sc) * + NR_CNT_PER_BLOCK * + NR_BYTES_PER_CNT; + } + return dump_size; +} + +/* Accumulate counters in the dump buffer */ +static void accum_dump_buffer(void *dst, void *src, size_t dump_size) +{ + size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; + u32 *d = dst; + u32 *s = src; + size_t i, j; + + for (i = 0; i < dump_size; i += block_size) { + /* skip over the header block */ + d += NR_BYTES_PER_HDR / sizeof(u32); + s += NR_BYTES_PER_HDR / sizeof(u32); + for (j = 0; j < (block_size - NR_BYTES_PER_HDR) / sizeof(u32); j++) { + /* saturate result if addition would result in wraparound */ + if (U32_MAX - *d < *s) + *d = U32_MAX; + else + *d += *s; + d++; + s++; + } + } +} + +/* This is the Midgard v4 patch function. It copies the headers for each + * of the defined blocks from the master kernel buffer and then patches up + * the performance counter enable mask for each of the blocks to exclude + * counters that were not requested by the client. */ +static void patch_dump_buffer_hdr_v4(struct kbase_vinstr_context *ctx, + struct kbase_vinstr_client *cli) +{ + u32 *mask; + u8 *dst = cli->accum_buffer; + u8 *src = ctx->cpu_va; + u32 nr_cg = ctx->kctx->kbdev->gpu_props.num_core_groups; + size_t i, group_size, group; + enum { + SC0_BASE = 0 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, + SC1_BASE = 1 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, + SC2_BASE = 2 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, + SC3_BASE = 3 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, + TILER_BASE = 4 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, + MMU_L2_BASE = 5 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT, + JM_BASE = 7 * NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT + }; + + group_size = NR_CNT_BLOCKS_PER_GROUP * + NR_CNT_PER_BLOCK * + NR_BYTES_PER_CNT; + for (i = 0; i < nr_cg; i++) { + group = i * group_size; + /* copy shader core headers */ + memcpy(&dst[group + SC0_BASE], &src[group + SC0_BASE], + NR_BYTES_PER_HDR); + memcpy(&dst[group + SC1_BASE], &src[group + SC1_BASE], + NR_BYTES_PER_HDR); + memcpy(&dst[group + SC2_BASE], &src[group + SC2_BASE], + NR_BYTES_PER_HDR); + memcpy(&dst[group + SC3_BASE], &src[group + SC3_BASE], + NR_BYTES_PER_HDR); + + /* copy tiler header */ + memcpy(&dst[group + TILER_BASE], &src[group + TILER_BASE], + NR_BYTES_PER_HDR); + + /* copy mmu header */ + memcpy(&dst[group + MMU_L2_BASE], &src[group + MMU_L2_BASE], + NR_BYTES_PER_HDR); + + /* copy job manager header */ + memcpy(&dst[group + JM_BASE], &src[group + JM_BASE], + NR_BYTES_PER_HDR); + + /* patch the shader core enable mask */ + mask = (u32 *)&dst[group + SC0_BASE + PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[SHADER_HWCNT_BM]; + mask = (u32 *)&dst[group + SC1_BASE + PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[SHADER_HWCNT_BM]; + mask = (u32 *)&dst[group + SC2_BASE + PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[SHADER_HWCNT_BM]; + mask = (u32 *)&dst[group + SC3_BASE + PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[SHADER_HWCNT_BM]; + + /* patch the tiler core enable mask */ + mask = (u32 *)&dst[group + TILER_BASE + PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[TILER_HWCNT_BM]; + + /* patch the mmu core enable mask */ + mask = (u32 *)&dst[group + MMU_L2_BASE + PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[MMU_L2_HWCNT_BM]; + + /* patch the job manager enable mask */ + mask = (u32 *)&dst[group + JM_BASE + PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[JM_HWCNT_BM]; + } +} + +/* This is the Midgard v5 patch function. It copies the headers for each + * of the defined blocks from the master kernel buffer and then patches up + * the performance counter enable mask for each of the blocks to exclude + * counters that were not requested by the client. */ +static void patch_dump_buffer_hdr_v5(struct kbase_vinstr_context *ctx, + struct kbase_vinstr_client *cli) +{ + struct kbase_device *kbdev = ctx->kctx->kbdev; + u32 i, nr_l2, nr_sc; + u32 *mask; + u8 *dst = cli->accum_buffer; + u8 *src = ctx->cpu_va; + size_t block_size = NR_CNT_PER_BLOCK * NR_BYTES_PER_CNT; + + /* copy and patch job manager header */ + memcpy(dst, src, NR_BYTES_PER_HDR); + mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[JM_HWCNT_BM]; + dst += block_size; + src += block_size; + + /* copy and patch tiler header */ + memcpy(dst, src, NR_BYTES_PER_HDR); + mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[TILER_HWCNT_BM]; + dst += block_size; + src += block_size; + + /* copy and patch MMU/L2C headers */ + nr_l2 = kbdev->gpu_props.props.l2_props.num_l2_slices; + for (i = 0; i < nr_l2; i++) { + memcpy(dst, src, NR_BYTES_PER_HDR); + mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[MMU_L2_HWCNT_BM]; + dst += block_size; + src += block_size; + } + + /* copy and patch shader core headers */ + nr_sc = kbdev->gpu_props.props.coherency_info.group[0].num_cores; + for (i = 0; i < nr_sc; i++) { + memcpy(dst, src, NR_BYTES_PER_HDR); + mask = (u32 *)&dst[PRFCNT_EN_MASK_OFFSET]; + *mask &= cli->bitmap[SHADER_HWCNT_BM]; + dst += block_size; + src += block_size; + } +} + +static void accum_clients(struct kbase_vinstr_context *ctx) +{ + struct kbase_vinstr_client *iter; + int v4; + + v4 = kbase_hw_has_feature(ctx->kbdev, BASE_HW_FEATURE_V4); + list_for_each_entry(iter, &ctx->clients, list) { + if (v4) + patch_dump_buffer_hdr_v4(ctx, iter); + else + patch_dump_buffer_hdr_v5(ctx, iter); + accum_dump_buffer(iter->accum_buffer, ctx->cpu_va, + iter->dump_size); + } +} + +int kbase_vinstr_dump(struct kbase_vinstr_context *ctx, + struct kbase_vinstr_client *cli) +{ + int err = 0; + + if (!cli) + return -EINVAL; + + mutex_lock(&ctx->lock); + err = kbase_instr_hwcnt_request_dump(ctx->kctx); + if (err) + goto out; + + err = kbase_instr_hwcnt_wait_for_dump(ctx->kctx); + if (err) + goto out; + + accum_clients(ctx); + + if (!cli->kernel) { + if (copy_to_user((void __user *)cli->dump_buffer, + cli->accum_buffer, cli->dump_size)) { + err = -EFAULT; + goto out; + } + } else { + memcpy(cli->dump_buffer, cli->accum_buffer, cli->dump_size); + } + + memset(cli->accum_buffer, 0, cli->dump_size); +out: + mutex_unlock(&ctx->lock); + return err; +} + +int kbase_vinstr_clear(struct kbase_vinstr_context *ctx, + struct kbase_vinstr_client *cli) +{ + int err = 0; + + if (!cli) + return -EINVAL; + + mutex_lock(&ctx->lock); + err = kbase_instr_hwcnt_request_dump(ctx->kctx); + if (err) + goto out; + + err = kbase_instr_hwcnt_wait_for_dump(ctx->kctx); + if (err) + goto out; + + err = kbase_instr_hwcnt_clear(ctx->kctx); + if (err) + goto out; + + accum_clients(ctx); + + memset(cli->accum_buffer, 0, cli->dump_size); +out: + mutex_unlock(&ctx->lock); + return err; +} diff --git a/drivers/gpu/arm/midgard/mali_kbase_vinstr.h b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h new file mode 100644 index 00000000000..2e846e81a80 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_kbase_vinstr.h @@ -0,0 +1,102 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _KBASE_VINSTR_H_ +#define _KBASE_VINSTR_H_ + +enum { + SHADER_HWCNT_BM, + TILER_HWCNT_BM, + MMU_L2_HWCNT_BM, + JM_HWCNT_BM +}; + +struct kbase_vinstr_context; +struct kbase_vinstr_client; + +/** + * kbase_vinstr_init() - Initialize the vinstr core + * @kbdev: Kbase device + * + * Return: A pointer to the vinstr context on success or NULL on failure + */ +struct kbase_vinstr_context *kbase_vinstr_init(struct kbase_device *kbdev); + +/** + * kbase_vinstr_term() - Terminate the vinstr core + * @ctx: Vinstr context + */ +void kbase_vinstr_term(struct kbase_vinstr_context *ctx); + +/** + * kbase_vinstr_attach_client - Attach a client to the vinstr core + * @ctx: Vinstr context + * @kernel: True if this client is a kernel-side client, false + * otherwise + * @dump_buffer: Client's dump buffer + * @bitmap: Bitmaps describing which counters should be enabled + * + * Return: A vinstr opaque client handle or NULL or failure + */ +struct kbase_vinstr_client *kbase_vinstr_attach_client(struct kbase_vinstr_context *ctx, + bool kernel, + u64 dump_buffer, + u32 bitmap[4]); + +/** + * kbase_vinstr_detach_client - Detach a client from the vinstr core + * @ctx: Vinstr context + * @cli: Pointer to vinstr client + */ +void kbase_vinstr_detach_client(struct kbase_vinstr_context *ctx, + struct kbase_vinstr_client *cli); + +/** + * kbase_vinstr_dump_size - Get the size of the dump buffer + * @ctx: Vinstr context + * + * This is only useful for kernel-side clients to know how much + * memory they need to allocate to receive the performance counter + * memory block. + * + * Return: Returns the size of the client side buffer + */ +size_t kbase_vinstr_dump_size(struct kbase_vinstr_context *ctx); + +/** + * kbase_vinstr_dump - Performs a synchronous hardware counter dump for a given + * kbase context + * @ctx: Vinstr context + * @cli: Pointer to vinstr client + * + * Return: 0 on success + */ +int kbase_vinstr_dump(struct kbase_vinstr_context *ctx, + struct kbase_vinstr_client *cli); + +/** + * kbase_vinstr_clear - Performs a reset of the hardware counters for a given + * kbase context + * @ctx: Vinstr context + * @cli: Pointer to vinstr client + * + * Return: 0 on success + */ +int kbase_vinstr_clear(struct kbase_vinstr_context *ctx, + struct kbase_vinstr_client *cli); + +#endif /* _KBASE_VINSTR_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h new file mode 100644 index 00000000000..5d6b4021d62 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_linux_kbase_trace.h @@ -0,0 +1,201 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + +#if !defined(_TRACE_MALI_KBASE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MALI_KBASE_H + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mali + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(mali_slot_template, + TP_PROTO(int jobslot, unsigned int info_val), + TP_ARGS(jobslot, info_val), + TP_STRUCT__entry( + __field(unsigned int, jobslot) + __field(unsigned int, info_val) + ), + TP_fast_assign( + __entry->jobslot = jobslot; + __entry->info_val = info_val; + ), + TP_printk("jobslot=%u info=%u", __entry->jobslot, __entry->info_val) +); + +#define DEFINE_MALI_SLOT_EVENT(name) \ +DEFINE_EVENT(mali_slot_template, mali_##name, \ + TP_PROTO(int jobslot, unsigned int info_val), \ + TP_ARGS(jobslot, info_val)) +DEFINE_MALI_SLOT_EVENT(JM_SUBMIT); +DEFINE_MALI_SLOT_EVENT(JM_JOB_DONE); +DEFINE_MALI_SLOT_EVENT(JM_UPDATE_HEAD); +DEFINE_MALI_SLOT_EVENT(JM_CHECK_HEAD); +DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP); +DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_0); +DEFINE_MALI_SLOT_EVENT(JM_SOFTSTOP_1); +DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP); +DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_0); +DEFINE_MALI_SLOT_EVENT(JM_HARDSTOP_1); +DEFINE_MALI_SLOT_EVENT(JM_SLOT_SOFT_OR_HARD_STOP); +DEFINE_MALI_SLOT_EVENT(JM_SLOT_EVICT); +DEFINE_MALI_SLOT_EVENT(JM_BEGIN_RESET_WORKER); +DEFINE_MALI_SLOT_EVENT(JM_END_RESET_WORKER); +DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_ON_RECHECK_FAILED); +DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_SUBMIT_TO_BLOCKED); +DEFINE_MALI_SLOT_EVENT(JS_AFFINITY_CURRENT); +DEFINE_MALI_SLOT_EVENT(JD_DONE_TRY_RUN_NEXT_JOB); +DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_CORES_FAILED); +DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REGISTER_INUSE_FAILED); +DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_REQUEST_ON_RECHECK_FAILED); +DEFINE_MALI_SLOT_EVENT(JS_CORE_REF_AFFINITY_WOULD_VIOLATE); +DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_TRY_RUN_NEXT_JOB); +DEFINE_MALI_SLOT_EVENT(JS_JOB_DONE_RETRY_NEEDED); +DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB); +DEFINE_MALI_SLOT_EVENT(JS_POLICY_DEQUEUE_JOB_IRQ); +#undef DEFINE_MALI_SLOT_EVENT + +DECLARE_EVENT_CLASS(mali_refcount_template, + TP_PROTO(int refcount, unsigned int info_val), + TP_ARGS(refcount, info_val), + TP_STRUCT__entry( + __field(unsigned int, refcount) + __field(unsigned int, info_val) + ), + TP_fast_assign( + __entry->refcount = refcount; + __entry->info_val = info_val; + ), + TP_printk("refcount=%u info=%u", __entry->refcount, __entry->info_val) +); + +#define DEFINE_MALI_REFCOUNT_EVENT(name) \ +DEFINE_EVENT(mali_refcount_template, mali_##name, \ + TP_PROTO(int refcount, unsigned int info_val), \ + TP_ARGS(refcount, info_val)) +DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX_NOLOCK); +DEFINE_MALI_REFCOUNT_EVENT(JS_ADD_JOB); +DEFINE_MALI_REFCOUNT_EVENT(JS_REMOVE_JOB); +DEFINE_MALI_REFCOUNT_EVENT(JS_RETAIN_CTX); +DEFINE_MALI_REFCOUNT_EVENT(JS_RELEASE_CTX); +DEFINE_MALI_REFCOUNT_EVENT(JS_TRY_SCHEDULE_HEAD_CTX); +DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_INIT_CTX); +DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TERM_CTX); +DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_ENQUEUE_CTX); +DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_DEQUEUE_HEAD_CTX); +DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_TRY_EVICT_CTX); +DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_ADD_CTX); +DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_RUNPOOL_REMOVE_CTX); +DEFINE_MALI_REFCOUNT_EVENT(JS_POLICY_FOREACH_CTX_JOBS); +DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_ACTIVE); +DEFINE_MALI_REFCOUNT_EVENT(PM_CONTEXT_IDLE); +#undef DEFINE_MALI_REFCOUNT_EVENT + +DECLARE_EVENT_CLASS(mali_add_template, + TP_PROTO(int gpu_addr, unsigned int info_val), + TP_ARGS(gpu_addr, info_val), + TP_STRUCT__entry( + __field(unsigned int, gpu_addr) + __field(unsigned int, info_val) + ), + TP_fast_assign( + __entry->gpu_addr = gpu_addr; + __entry->info_val = info_val; + ), + TP_printk("gpu_addr=%u info=%u", __entry->gpu_addr, __entry->info_val) +); + +#define DEFINE_MALI_ADD_EVENT(name) \ +DEFINE_EVENT(mali_add_template, mali_##name, \ + TP_PROTO(int gpu_addr, unsigned int info_val), \ + TP_ARGS(gpu_addr, info_val)) +DEFINE_MALI_ADD_EVENT(CORE_CTX_DESTROY); +DEFINE_MALI_ADD_EVENT(CORE_CTX_HWINSTR_TERM); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_CLEAR); +DEFINE_MALI_ADD_EVENT(CORE_GPU_IRQ_DONE); +DEFINE_MALI_ADD_EVENT(CORE_GPU_SOFT_RESET); +DEFINE_MALI_ADD_EVENT(CORE_GPU_HARD_RESET); +DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_SAMPLE); +DEFINE_MALI_ADD_EVENT(CORE_GPU_PRFCNT_CLEAR); +DEFINE_MALI_ADD_EVENT(CORE_GPU_CLEAN_INV_CACHES); +DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER); +DEFINE_MALI_ADD_EVENT(JD_DONE_WORKER_END); +DEFINE_MALI_ADD_EVENT(JD_CANCEL_WORKER); +DEFINE_MALI_ADD_EVENT(JD_DONE); +DEFINE_MALI_ADD_EVENT(JD_CANCEL); +DEFINE_MALI_ADD_EVENT(JD_ZAP_CONTEXT); +DEFINE_MALI_ADD_EVENT(JM_IRQ); +DEFINE_MALI_ADD_EVENT(JM_IRQ_END); +DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS); +DEFINE_MALI_ADD_EVENT(JM_FLUSH_WORKQS_DONE); +DEFINE_MALI_ADD_EVENT(JM_ZAP_NON_SCHEDULED); +DEFINE_MALI_ADD_EVENT(JM_ZAP_SCHEDULED); +DEFINE_MALI_ADD_EVENT(JM_ZAP_DONE); +DEFINE_MALI_ADD_EVENT(JM_SUBMIT_AFTER_RESET); +DEFINE_MALI_ADD_EVENT(JM_JOB_COMPLETE); +DEFINE_MALI_ADD_EVENT(JS_FAST_START_EVICTS_CTX); +DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_RUNPOOL); +DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_RUNPOOL); +DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_ON_CTX); +DEFINE_MALI_ADD_EVENT(JS_CTX_ATTR_NOW_OFF_CTX); +DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_END); +DEFINE_MALI_ADD_EVENT(JS_POLICY_TIMER_START); +DEFINE_MALI_ADD_EVENT(JS_POLICY_ENQUEUE_JOB); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_DESIRED); +DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERING_UP); +DEFINE_MALI_ADD_EVENT(PM_JOB_SUBMIT_AFTER_POWERED_UP); +DEFINE_MALI_ADD_EVENT(PM_PWRON); +DEFINE_MALI_ADD_EVENT(PM_PWRON_TILER); +DEFINE_MALI_ADD_EVENT(PM_PWRON_L2); +DEFINE_MALI_ADD_EVENT(PM_PWROFF); +DEFINE_MALI_ADD_EVENT(PM_PWROFF_TILER); +DEFINE_MALI_ADD_EVENT(PM_PWROFF_L2); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_TILER); +DEFINE_MALI_ADD_EVENT(PM_CORES_POWERED_L2); +DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED); +DEFINE_MALI_ADD_EVENT(PM_DESIRED_REACHED_TILER); +DEFINE_MALI_ADD_EVENT(PM_UNREQUEST_CHANGE_SHADER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_REQUEST_CHANGE_SHADER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_NEEDED); +DEFINE_MALI_ADD_EVENT(PM_REGISTER_CHANGE_SHADER_INUSE); +DEFINE_MALI_ADD_EVENT(PM_RELEASE_CHANGE_SHADER_INUSE); +DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE); +DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE); +DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER); +DEFINE_MALI_ADD_EVENT(PM_GPU_ON); +DEFINE_MALI_ADD_EVENT(PM_GPU_OFF); +DEFINE_MALI_ADD_EVENT(PM_SET_POLICY); +DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_INIT); +DEFINE_MALI_ADD_EVENT(PM_CURRENT_POLICY_TERM); +DEFINE_MALI_ADD_EVENT(PM_CA_SET_POLICY); +DEFINE_MALI_ADD_EVENT(PM_WAKE_WAITERS); +#undef DEFINE_MALI_ADD_EVENT + +#endif /* _TRACE_MALI_KBASE_H */ + +#undef TRACE_INCLUDE_PATH +#undef linux +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_linux_kbase_trace + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/drivers/gpu/arm/midgard/mali_linux_trace.h b/drivers/gpu/arm/midgard/mali_linux_trace.h new file mode 100644 index 00000000000..fc3cf32ba4d --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_linux_trace.h @@ -0,0 +1,211 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#if !defined(_TRACE_MALI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MALI_H + +#include <linux/stringify.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mali +#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) +#define TRACE_INCLUDE_FILE mali_linux_trace + +#define MALI_JOB_SLOTS_EVENT_CHANGED + +/** + * mali_job_slots_event - called from mali_kbase_core_linux.c + * @event_id: ORed together bitfields representing a type of event, made with the GATOR_MAKE_EVENT() macro. + */ +TRACE_EVENT(mali_job_slots_event, + TP_PROTO(unsigned int event_id, unsigned int tgid, unsigned int pid, + unsigned char job_id), + TP_ARGS(event_id, tgid, pid, job_id), + TP_STRUCT__entry( + __field(unsigned int, event_id) + __field(unsigned int, tgid) + __field(unsigned int, pid) + __field(unsigned char, job_id) + ), + TP_fast_assign( + __entry->event_id = event_id; + __entry->tgid = tgid; + __entry->pid = pid; + __entry->job_id = job_id; + ), + TP_printk("event=%u tgid=%u pid=%u job_id=%u", + __entry->event_id, __entry->tgid, __entry->pid, __entry->job_id) +); + +/** + * mali_pm_status - Called by mali_kbase_pm_driver.c + * @event_id: core type (shader, tiler, l2 cache) + * @value: 64bits bitmask reporting either power status of the cores (1-ON, 0-OFF) + */ +TRACE_EVENT(mali_pm_status, + TP_PROTO(unsigned int event_id, unsigned long long value), + TP_ARGS(event_id, value), + TP_STRUCT__entry( + __field(unsigned int, event_id) + __field(unsigned long long, value) + ), + TP_fast_assign( + __entry->event_id = event_id; + __entry->value = value; + ), + TP_printk("event %u = %llu", __entry->event_id, __entry->value) +); + +/** + * mali_pm_power_on - Called by mali_kbase_pm_driver.c + * @event_id: core type (shader, tiler, l2 cache) + * @value: 64bits bitmask reporting the cores to power up + */ +TRACE_EVENT(mali_pm_power_on, + TP_PROTO(unsigned int event_id, unsigned long long value), + TP_ARGS(event_id, value), + TP_STRUCT__entry( + __field(unsigned int, event_id) + __field(unsigned long long, value) + ), + TP_fast_assign( + __entry->event_id = event_id; + __entry->value = value; + ), + TP_printk("event %u = %llu", __entry->event_id, __entry->value) +); + +/** + * mali_pm_power_off - Called by mali_kbase_pm_driver.c + * @event_id: core type (shader, tiler, l2 cache) + * @value: 64bits bitmask reporting the cores to power down + */ +TRACE_EVENT(mali_pm_power_off, + TP_PROTO(unsigned int event_id, unsigned long long value), + TP_ARGS(event_id, value), + TP_STRUCT__entry( + __field(unsigned int, event_id) + __field(unsigned long long, value) + ), + TP_fast_assign( + __entry->event_id = event_id; + __entry->value = value; + ), + TP_printk("event %u = %llu", __entry->event_id, __entry->value) +); + +/** + * mali_page_fault_insert_pages - Called by page_fault_worker() + * it reports an MMU page fault resulting in new pages being mapped. + * @event_id: MMU address space number. + * @value: number of newly allocated pages + */ +TRACE_EVENT(mali_page_fault_insert_pages, + TP_PROTO(int event_id, unsigned long value), + TP_ARGS(event_id, value), + TP_STRUCT__entry( + __field(int, event_id) + __field(unsigned long, value) + ), + TP_fast_assign( + __entry->event_id = event_id; + __entry->value = value; + ), + TP_printk("event %d = %lu", __entry->event_id, __entry->value) +); + +/** + * mali_mmu_as_in_use - Called by assign_and_activate_kctx_addr_space() + * it reports that a certain MMU address space is in use now. + * @event_id: MMU address space number. + */ +TRACE_EVENT(mali_mmu_as_in_use, + TP_PROTO(int event_id), + TP_ARGS(event_id), + TP_STRUCT__entry( + __field(int, event_id) + ), + TP_fast_assign( + __entry->event_id = event_id; + ), + TP_printk("event=%d", __entry->event_id) +); + +/** + * mali_mmu_as_released - Called by kbasep_js_runpool_release_ctx_internal() + * it reports that a certain MMU address space has been released now. + * @event_id: MMU address space number. + */ +TRACE_EVENT(mali_mmu_as_released, + TP_PROTO(int event_id), + TP_ARGS(event_id), + TP_STRUCT__entry( + __field(int, event_id) + ), + TP_fast_assign( + __entry->event_id = event_id; + ), + TP_printk("event=%d", __entry->event_id) +); + +/** + * mali_total_alloc_pages_change - Called by kbase_atomic_add_pages() + * and by kbase_atomic_sub_pages() + * it reports that the total number of allocated pages is changed. + * @event_id: number of pages to be added or subtracted (according to the sign). + */ +TRACE_EVENT(mali_total_alloc_pages_change, + TP_PROTO(long long int event_id), + TP_ARGS(event_id), + TP_STRUCT__entry( + __field(long long int, event_id) + ), + TP_fast_assign( + __entry->event_id = event_id; + ), + TP_printk("event=%lld", __entry->event_id) +); + +/** + * mali_sw_counter - not currently used + * @event_id: counter id + */ +TRACE_EVENT(mali_sw_counter, + TP_PROTO(unsigned int event_id, signed long long value), + TP_ARGS(event_id, value), + TP_STRUCT__entry( + __field(int, event_id) + __field(long long, value) + ), + TP_fast_assign( + __entry->event_id = event_id; + __entry->value = value; + ), + TP_printk("event %d = %lld", __entry->event_id, __entry->value) +); + +#endif /* _TRACE_MALI_H */ + +#undef TRACE_INCLUDE_PATH +#undef linux +#define TRACE_INCLUDE_PATH . + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/drivers/gpu/arm/midgard/mali_malisw.h b/drivers/gpu/arm/midgard/mali_malisw.h new file mode 100644 index 00000000000..99452933eab --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_malisw.h @@ -0,0 +1,131 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * Kernel-wide include for common macros and types. + */ + +#ifndef _MALISW_H_ +#define _MALISW_H_ + +#include <linux/version.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) +#define U8_MAX ((u8)~0U) +#define S8_MAX ((s8)(U8_MAX>>1)) +#define S8_MIN ((s8)(-S8_MAX - 1)) +#define U16_MAX ((u16)~0U) +#define S16_MAX ((s16)(U16_MAX>>1)) +#define S16_MIN ((s16)(-S16_MAX - 1)) +#define U32_MAX ((u32)~0U) +#define S32_MAX ((s32)(U32_MAX>>1)) +#define S32_MIN ((s32)(-S32_MAX - 1)) +#define U64_MAX ((u64)~0ULL) +#define S64_MAX ((s64)(U64_MAX>>1)) +#define S64_MIN ((s64)(-S64_MAX - 1)) +#endif /* LINUX_VERSION_CODE */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 5, 0) +#define SIZE_MAX (~(size_t)0) +#endif /* LINUX_VERSION_CODE */ + +/** + * MIN - Return the lesser of two values. + * + * As a macro it may evaluate its arguments more than once. + * Refer to MAX macro for more details + */ +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +/** + * MAX - Return the greater of two values. + * + * As a macro it may evaluate its arguments more than once. + * If called on the same two arguments as MIN it is guaranteed to return + * the one that MIN didn't return. This is significant for types where not + * all values are comparable e.g. NaNs in floating-point types. But if you want + * to retrieve the min and max of two values, consider using a conditional swap + * instead. + */ +#define MAX(x, y) ((x) < (y) ? (y) : (x)) + +/** + * @hideinitializer + * Function-like macro for suppressing unused variable warnings. Where possible + * such variables should be removed; this macro is present for cases where we + * much support API backwards compatibility. + */ +#define CSTD_UNUSED(x) ((void)(x)) + +/** + * @hideinitializer + * Function-like macro for use where "no behavior" is desired. This is useful + * when compile time macros turn a function-like macro in to a no-op, but + * where having no statement is otherwise invalid. + */ +#define CSTD_NOP(...) ((void)#__VA_ARGS__) + +/** + * Function-like macro for converting a pointer in to a u64 for storing into + * an external data structure. This is commonly used when pairing a 32-bit + * CPU with a 64-bit peripheral, such as a Midgard GPU. C's type promotion + * is complex and a straight cast does not work reliably as pointers are + * often considered as signed. + */ +#define PTR_TO_U64(x) ((uint64_t)((uintptr_t)(x))) + +/** + * @hideinitializer + * Function-like macro for stringizing a single level macro. + * @code + * #define MY_MACRO 32 + * CSTD_STR1( MY_MACRO ) + * > "MY_MACRO" + * @endcode + */ +#define CSTD_STR1(x) #x + +/** + * @hideinitializer + * Function-like macro for stringizing a macro's value. This should not be used + * if the macro is defined in a way which may have no value; use the + * alternative @c CSTD_STR2N macro should be used instead. + * @code + * #define MY_MACRO 32 + * CSTD_STR2( MY_MACRO ) + * > "32" + * @endcode + */ +#define CSTD_STR2(x) CSTD_STR1(x) + +/** + * Specify an assertion value which is evaluated at compile time. Recommended + * usage is specification of a @c static @c INLINE function containing all of + * the assertions thus: + * + * @code + * static INLINE [module]_compile_time_assertions( void ) + * { + * COMPILE_TIME_ASSERT( sizeof(uintptr_t) == sizeof(intptr_t) ); + * } + * @endcode + * + * @note Use @c static not @c STATIC. We never want to turn off this @c static + * specification for testing purposes. + */ +#define CSTD_COMPILE_TIME_ASSERT(expr) \ + do { switch (0) { case 0: case (expr):; } } while (false) + +#endif /* _MALISW_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_midg_regmap.h b/drivers/gpu/arm/midgard/mali_midg_regmap.h new file mode 100644 index 00000000000..180fea1dfd1 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_midg_regmap.h @@ -0,0 +1,542 @@ +/* + * + * (C) COPYRIGHT 2010-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _MIDGARD_REGMAP_H_ +#define _MIDGARD_REGMAP_H_ + +/* + * Begin Register Offsets + */ + +#define GPU_CONTROL_BASE 0x0000 +#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ +#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ +#define SUSPEND_SIZE 0x008 /* (RO) Fixed-function suspend buffer + size */ +#define TILER_FEATURES 0x00C /* (RO) Tiler Features */ +#define MEM_FEATURES 0x010 /* (RO) Memory system features */ +#define MMU_FEATURES 0x014 /* (RO) MMU features */ +#define AS_PRESENT 0x018 /* (RO) Address space slots present */ +#define JS_PRESENT 0x01C /* (RO) Job slots present */ +#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ +#define GPU_IRQ_MASK 0x028 /* (RW) */ +#define GPU_IRQ_STATUS 0x02C /* (RO) */ + +/* IRQ flags */ +#define GPU_FAULT (1 << 0) /* A GPU Fault has occurred */ +#define MULTIPLE_GPU_FAULTS (1 << 7) /* More than one GPU Fault occurred. */ +#define RESET_COMPLETED (1 << 8) /* Set when a reset has completed. Intended to use with SOFT_RESET + commands which may take time. */ +#define POWER_CHANGED_SINGLE (1 << 9) /* Set when a single core has finished powering up or down. */ +#define POWER_CHANGED_ALL (1 << 10) /* Set when all cores have finished powering up or down + and the power manager is idle. */ + +#define PRFCNT_SAMPLE_COMPLETED (1 << 16) /* Set when a performance count sample has completed. */ +#define CLEAN_CACHES_COMPLETED (1 << 17) /* Set when a cache clean operation has completed. */ + +#define GPU_IRQ_REG_ALL (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \ + | POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED) + +#define GPU_COMMAND 0x030 /* (WO) */ +#define GPU_STATUS 0x034 /* (RO) */ + + +#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ + +#define GPU_FAULTSTATUS 0x03C /* (RO) GPU exception type and fault status */ +#define GPU_FAULTADDRESS_LO 0x040 /* (RO) GPU exception fault address, low word */ +#define GPU_FAULTADDRESS_HI 0x044 /* (RO) GPU exception fault address, high word */ + +#define PWR_KEY 0x050 /* (WO) Power manager key register */ +#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ +#define PWR_OVERRIDE1 0x058 /* (RW) Power manager override settings */ + +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory region base address, low word */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory region base address, high word */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */ +#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable flags for Job Manager */ +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable flags for shader cores */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable flags for tiler */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable flags for MMU/L2 cache */ + +#define CYCLE_COUNT_LO 0x090 /* (RO) Cycle counter, low word */ +#define CYCLE_COUNT_HI 0x094 /* (RO) Cycle counter, high word */ +#define TIMESTAMP_LO 0x098 /* (RO) Global time stamp counter, low word */ +#define TIMESTAMP_HI 0x09C /* (RO) Global time stamp counter, high word */ + +#define THREAD_MAX_THREADS 0x0A0 /* (RO) Maximum number of threads per core */ +#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */ +#define THREAD_MAX_BARRIER_SIZE 0x0A8 /* (RO) Maximum threads waiting at a barrier */ +#define THREAD_FEATURES 0x0AC /* (RO) Thread features */ + +#define TEXTURE_FEATURES_0 0x0B0 /* (RO) Support flags for indexed texture formats 0..31 */ +#define TEXTURE_FEATURES_1 0x0B4 /* (RO) Support flags for indexed texture formats 32..63 */ +#define TEXTURE_FEATURES_2 0x0B8 /* (RO) Support flags for indexed texture formats 64..95 */ + +#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) + +#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ +#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ +#define JS2_FEATURES 0x0C8 /* (RO) Features of job slot 2 */ +#define JS3_FEATURES 0x0CC /* (RO) Features of job slot 3 */ +#define JS4_FEATURES 0x0D0 /* (RO) Features of job slot 4 */ +#define JS5_FEATURES 0x0D4 /* (RO) Features of job slot 5 */ +#define JS6_FEATURES 0x0D8 /* (RO) Features of job slot 6 */ +#define JS7_FEATURES 0x0DC /* (RO) Features of job slot 7 */ +#define JS8_FEATURES 0x0E0 /* (RO) Features of job slot 8 */ +#define JS9_FEATURES 0x0E4 /* (RO) Features of job slot 9 */ +#define JS10_FEATURES 0x0E8 /* (RO) Features of job slot 10 */ +#define JS11_FEATURES 0x0EC /* (RO) Features of job slot 11 */ +#define JS12_FEATURES 0x0F0 /* (RO) Features of job slot 12 */ +#define JS13_FEATURES 0x0F4 /* (RO) Features of job slot 13 */ +#define JS14_FEATURES 0x0F8 /* (RO) Features of job slot 14 */ +#define JS15_FEATURES 0x0FC /* (RO) Features of job slot 15 */ + +#define JS_FEATURES_REG(n) GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2)) + +#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ +#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ + +#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */ +#define TILER_PRESENT_HI 0x114 /* (RO) Tiler core present bitmap, high word */ + +#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ +#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ + + +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + +#define SHADER_PWROFF_LO 0x1C0 /* (WO) Shader core power off bitmap, low word */ +#define SHADER_PWROFF_HI 0x1C4 /* (WO) Shader core power off bitmap, high word */ + +#define TILER_PWROFF_LO 0x1D0 /* (WO) Tiler core power off bitmap, low word */ +#define TILER_PWROFF_HI 0x1D4 /* (WO) Tiler core power off bitmap, high word */ + +#define L2_PWROFF_LO 0x1E0 /* (WO) Level 2 cache power off bitmap, low word */ +#define L2_PWROFF_HI 0x1E4 /* (WO) Level 2 cache power off bitmap, high word */ + +#define SHADER_PWRTRANS_LO 0x200 /* (RO) Shader core power transition bitmap, low word */ +#define SHADER_PWRTRANS_HI 0x204 /* (RO) Shader core power transition bitmap, high word */ + +#define TILER_PWRTRANS_LO 0x210 /* (RO) Tiler core power transition bitmap, low word */ +#define TILER_PWRTRANS_HI 0x214 /* (RO) Tiler core power transition bitmap, high word */ + +#define L2_PWRTRANS_LO 0x220 /* (RO) Level 2 cache power transition bitmap, low word */ +#define L2_PWRTRANS_HI 0x224 /* (RO) Level 2 cache power transition bitmap, high word */ + +#define SHADER_PWRACTIVE_LO 0x240 /* (RO) Shader core active bitmap, low word */ +#define SHADER_PWRACTIVE_HI 0x244 /* (RO) Shader core active bitmap, high word */ + +#define TILER_PWRACTIVE_LO 0x250 /* (RO) Tiler core active bitmap, low word */ +#define TILER_PWRACTIVE_HI 0x254 /* (RO) Tiler core active bitmap, high word */ + +#define L2_PWRACTIVE_LO 0x260 /* (RO) Level 2 cache active bitmap, low word */ +#define L2_PWRACTIVE_HI 0x264 /* (RO) Level 2 cache active bitmap, high word */ + + +#define SHADER_CONFIG 0xF04 /* (RW) Shader core configuration settings (Implementation specific register) */ +#define TILER_CONFIG 0xF08 /* (RW) Tiler core configuration settings (Implementation specific register) */ +#define L2_MMU_CONFIG 0xF0C /* (RW) Configuration of the L2 cache and MMU (Implementation specific register) */ + +#define JOB_CONTROL_BASE 0x1000 + +#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + +#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ +#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ +#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ + +#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ +#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ +#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ +#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ +#define JOB_SLOT4 0xA00 /* Configuration registers for job slot 4 */ +#define JOB_SLOT5 0xA80 /* Configuration registers for job slot 5 */ +#define JOB_SLOT6 0xB00 /* Configuration registers for job slot 6 */ +#define JOB_SLOT7 0xB80 /* Configuration registers for job slot 7 */ +#define JOB_SLOT8 0xC00 /* Configuration registers for job slot 8 */ +#define JOB_SLOT9 0xC80 /* Configuration registers for job slot 9 */ +#define JOB_SLOT10 0xD00 /* Configuration registers for job slot 10 */ +#define JOB_SLOT11 0xD80 /* Configuration registers for job slot 11 */ +#define JOB_SLOT12 0xE00 /* Configuration registers for job slot 12 */ +#define JOB_SLOT13 0xE80 /* Configuration registers for job slot 13 */ +#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ +#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ + +#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) + +#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ +#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ +#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ +#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ +#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ +#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ +#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ + +#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ +#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ + +#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ +#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ + +#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ +#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ +#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ + +#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ + + +#define MEMORY_MANAGEMENT_BASE 0x2000 +#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) + +#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ +#define MMU_AS1 0x440 /* Configuration registers for address space 1 */ +#define MMU_AS2 0x480 /* Configuration registers for address space 2 */ +#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ +#define MMU_AS4 0x500 /* Configuration registers for address space 4 */ +#define MMU_AS5 0x540 /* Configuration registers for address space 5 */ +#define MMU_AS6 0x580 /* Configuration registers for address space 6 */ +#define MMU_AS7 0x5C0 /* Configuration registers for address space 7 */ +#define MMU_AS8 0x600 /* Configuration registers for address space 8 */ +#define MMU_AS9 0x640 /* Configuration registers for address space 9 */ +#define MMU_AS10 0x680 /* Configuration registers for address space 10 */ +#define MMU_AS11 0x6C0 /* Configuration registers for address space 11 */ +#define MMU_AS12 0x700 /* Configuration registers for address space 12 */ +#define MMU_AS13 0x740 /* Configuration registers for address space 13 */ +#define MMU_AS14 0x780 /* Configuration registers for address space 14 */ +#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ + +#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) + +#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ +#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ +#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ +#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ +#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ +#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ +#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ + + + +/* End Register Offsets */ + +/* + * MMU_IRQ_RAWSTAT register values. Values are valid also for + MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers. + */ + +#define MMU_PAGE_FAULT_FLAGS 16 + +/* Macros returning a bitmask to retrieve page fault or bus error flags from + * MMU registers */ +#define MMU_PAGE_FAULT(n) (1UL << (n)) +#define MMU_BUS_ERROR(n) (1UL << ((n) + MMU_PAGE_FAULT_FLAGS)) + +/* + * Begin LPAE MMU TRANSTAB register values + */ +#define AS_TRANSTAB_LPAE_ADDR_SPACE_MASK 0xfffff000 +#define AS_TRANSTAB_LPAE_ADRMODE_UNMAPPED (0u << 0) +#define AS_TRANSTAB_LPAE_ADRMODE_IDENTITY (1u << 1) +#define AS_TRANSTAB_LPAE_ADRMODE_TABLE (3u << 0) +#define AS_TRANSTAB_LPAE_READ_INNER (1u << 2) +#define AS_TRANSTAB_LPAE_SHARE_OUTER (1u << 4) + +#define AS_TRANSTAB_LPAE_ADRMODE_MASK 0x00000003 + + +/* + * Begin MMU STATUS register values + */ +#define AS_STATUS_AS_ACTIVE 0x01 + +#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK (0x7<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT (0x0<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT (0x1<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT (0x2<<3) +#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG (0x3<<3) + + +#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2<<8) +#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3<<8) + + +/* + * Begin Command Values + */ + +/* JS_COMMAND register commands */ +#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ +#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ +#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ +#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ +#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ +#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + +#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ + +/* AS_COMMAND register commands */ +#define AS_COMMAND_NOP 0x00 /* NOP Operation */ +#define AS_COMMAND_UPDATE 0x01 /* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */ +#define AS_COMMAND_LOCK 0x02 /* Issue a lock region command to all MMUs */ +#define AS_COMMAND_UNLOCK 0x03 /* Issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs + (deprecated - only for use with T60x) */ +#define AS_COMMAND_FLUSH_PT 0x04 /* Flush all L2 caches then issue a flush region command to all MMUs */ +#define AS_COMMAND_FLUSH_MEM 0x05 /* Wait for memory accesses to complete, flush all the L1s cache then + flush all L2 caches then issue a flush region command to all MMUs */ + +/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ +#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) +#define JS_CONFIG_START_FLUSH_CLEAN (1u << 8) +#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8) +#define JS_CONFIG_START_MMU (1u << 10) +#define JS_CONFIG_JOB_CHAIN_FLAG (1u << 11) +#define JS_CONFIG_END_FLUSH_NO_ACTION JS_CONFIG_START_FLUSH_NO_ACTION +#define JS_CONFIG_END_FLUSH_CLEAN (1u << 12) +#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE (3u << 12) +#define JS_CONFIG_THREAD_PRI(n) ((n) << 16) + +/* JS_STATUS register values */ + +/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h. + * The values are separated to avoid dependency of userspace and kernel code. + */ + +/* Group of values representing the job status insead a particular fault */ +#define JS_STATUS_NO_EXCEPTION_BASE 0x00 +#define JS_STATUS_INTERRUPTED (JS_STATUS_NO_EXCEPTION_BASE + 0x02) /* 0x02 means INTERRUPTED */ +#define JS_STATUS_STOPPED (JS_STATUS_NO_EXCEPTION_BASE + 0x03) /* 0x03 means STOPPED */ +#define JS_STATUS_TERMINATED (JS_STATUS_NO_EXCEPTION_BASE + 0x04) /* 0x04 means TERMINATED */ + +/* General fault values */ +#define JS_STATUS_FAULT_BASE 0x40 +#define JS_STATUS_CONFIG_FAULT (JS_STATUS_FAULT_BASE) /* 0x40 means CONFIG FAULT */ +#define JS_STATUS_POWER_FAULT (JS_STATUS_FAULT_BASE + 0x01) /* 0x41 means POWER FAULT */ +#define JS_STATUS_READ_FAULT (JS_STATUS_FAULT_BASE + 0x02) /* 0x42 means READ FAULT */ +#define JS_STATUS_WRITE_FAULT (JS_STATUS_FAULT_BASE + 0x03) /* 0x43 means WRITE FAULT */ +#define JS_STATUS_AFFINITY_FAULT (JS_STATUS_FAULT_BASE + 0x04) /* 0x44 means AFFINITY FAULT */ +#define JS_STATUS_BUS_FAULT (JS_STATUS_FAULT_BASE + 0x08) /* 0x48 means BUS FAULT */ + +/* Instruction or data faults */ +#define JS_STATUS_INSTRUCTION_FAULT_BASE 0x50 +#define JS_STATUS_INSTR_INVALID_PC (JS_STATUS_INSTRUCTION_FAULT_BASE) /* 0x50 means INSTR INVALID PC */ +#define JS_STATUS_INSTR_INVALID_ENC (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01) /* 0x51 means INSTR INVALID ENC */ +#define JS_STATUS_INSTR_TYPE_MISMATCH (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02) /* 0x52 means INSTR TYPE MISMATCH */ +#define JS_STATUS_INSTR_OPERAND_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03) /* 0x53 means INSTR OPERAND FAULT */ +#define JS_STATUS_INSTR_TLS_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04) /* 0x54 means INSTR TLS FAULT */ +#define JS_STATUS_INSTR_BARRIER_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05) /* 0x55 means INSTR BARRIER FAULT */ +#define JS_STATUS_INSTR_ALIGN_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06) /* 0x56 means INSTR ALIGN FAULT */ +/* NOTE: No fault with 0x57 code defined in spec. */ +#define JS_STATUS_DATA_INVALID_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08) /* 0x58 means DATA INVALID FAULT */ +#define JS_STATUS_TILE_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09) /* 0x59 means TILE RANGE FAULT */ +#define JS_STATUS_ADDRESS_RANGE_FAULT (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A) /* 0x5A means ADDRESS RANGE FAULT */ + +/* Other faults */ +#define JS_STATUS_MEMORY_FAULT_BASE 0x60 +#define JS_STATUS_OUT_OF_MEMORY (JS_STATUS_MEMORY_FAULT_BASE) /* 0x60 means OUT OF MEMORY */ +#define JS_STATUS_UNKNOWN 0x7F /* 0x7F means UNKNOWN */ + +/* GPU_COMMAND values */ +#define GPU_COMMAND_NOP 0x00 /* No operation, nothing happens */ +#define GPU_COMMAND_SOFT_RESET 0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */ +#define GPU_COMMAND_HARD_RESET 0x02 /* Immediately reset the entire GPU. */ +#define GPU_COMMAND_PRFCNT_CLEAR 0x03 /* Clear all performance counters, setting them all to zero. */ +#define GPU_COMMAND_PRFCNT_SAMPLE 0x04 /* Sample all performance counters, writing them out to memory */ +#define GPU_COMMAND_CYCLE_COUNT_START 0x05 /* Starts the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CYCLE_COUNT_STOP 0x06 /* Stops the cycle counter, and system timestamp propagation */ +#define GPU_COMMAND_CLEAN_CACHES 0x07 /* Clean all caches */ +#define GPU_COMMAND_CLEAN_INV_CACHES 0x08 /* Clean and invalidate all caches */ + +/* End Command Values */ + +/* GPU_STATUS values */ +#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ + +/* PRFCNT_CONFIG register values */ +#define PRFCNT_CONFIG_AS_SHIFT 4 /* address space bitmap starts from bit 4 of the register */ +#define PRFCNT_CONFIG_MODE_OFF 0 /* The performance counters are disabled. */ +#define PRFCNT_CONFIG_MODE_MANUAL 1 /* The performance counters are enabled, but are only written out when a PRFCNT_SAMPLE command is issued using the GPU_COMMAND register. */ +#define PRFCNT_CONFIG_MODE_TILE 2 /* The performance counters are enabled, and are written out each time a tile finishes rendering. */ + +/* AS<n>_MEMATTR values: */ + +/* Use GPU implementation-defined caching policy. */ +#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull +/* The attribute set to force all resources to be cached. */ +#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL 0x4Full +/* Inner write-alloc cache setup, no outer caching */ +#define AS_MEMATTR_LPAE_WRITE_ALLOC 0x4Dull +/* Set to implementation defined, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF 0x88ull +/* Set to write back memory, outer caching */ +#define AS_MEMATTR_LPAE_OUTER_WA 0x8Dull + +/* Symbol for default MEMATTR to use */ +#define AS_MEMATTR_INDEX_DEFAULT 0 + +/* HW implementation defined caching */ +#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0 +/* Force cache on */ +#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL 1 +/* Write-alloc */ +#define AS_MEMATTR_INDEX_WRITE_ALLOC 2 +/* Outer coherent, inner implementation defined policy */ +#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF 3 +/* Outer coherent, write alloc inner */ +#define AS_MEMATTR_INDEX_OUTER_WA 4 + +/* GPU_ID register */ +#define GPU_ID_VERSION_STATUS_SHIFT 0 +#define GPU_ID_VERSION_MINOR_SHIFT 4 +#define GPU_ID_VERSION_MAJOR_SHIFT 12 +#define GPU_ID_VERSION_PRODUCT_ID_SHIFT 16 +#define GPU_ID_VERSION_STATUS (0xF << GPU_ID_VERSION_STATUS_SHIFT) +#define GPU_ID_VERSION_MINOR (0xFF << GPU_ID_VERSION_MINOR_SHIFT) +#define GPU_ID_VERSION_MAJOR (0xF << GPU_ID_VERSION_MAJOR_SHIFT) +#define GPU_ID_VERSION_PRODUCT_ID (0xFFFF << GPU_ID_VERSION_PRODUCT_ID_SHIFT) + +/* Values for GPU_ID_VERSION_PRODUCT_ID bitfield */ +#define GPU_ID_PI_T60X 0x6956 +#define GPU_ID_PI_T62X 0x0620 +#define GPU_ID_PI_T76X 0x0750 +#define GPU_ID_PI_T72X 0x0720 +#define GPU_ID_PI_TFRX 0x0880 +#define GPU_ID_PI_T86X 0x0860 +#define GPU_ID_PI_T82X 0x0820 +#define GPU_ID_PI_T83X 0x0830 + +/* Values for GPU_ID_VERSION_STATUS field for PRODUCT_ID GPU_ID_PI_T60X */ +#define GPU_ID_S_15DEV0 0x1 +#define GPU_ID_S_EAC 0x2 + +/* Helper macro to create a GPU_ID assuming valid values for id, major, minor, status */ +#define GPU_ID_MAKE(id, major, minor, status) \ + (((id) << GPU_ID_VERSION_PRODUCT_ID_SHIFT) | \ + ((major) << GPU_ID_VERSION_MAJOR_SHIFT) | \ + ((minor) << GPU_ID_VERSION_MINOR_SHIFT) | \ + ((status) << GPU_ID_VERSION_STATUS_SHIFT)) + +/* End GPU_ID register */ + +/* JS<n>_FEATURES register */ + +#define JS_FEATURE_NULL_JOB (1u << 1) +#define JS_FEATURE_SET_VALUE_JOB (1u << 2) +#define JS_FEATURE_CACHE_FLUSH_JOB (1u << 3) +#define JS_FEATURE_COMPUTE_JOB (1u << 4) +#define JS_FEATURE_VERTEX_JOB (1u << 5) +#define JS_FEATURE_GEOMETRY_JOB (1u << 6) +#define JS_FEATURE_TILER_JOB (1u << 7) +#define JS_FEATURE_FUSED_JOB (1u << 8) +#define JS_FEATURE_FRAGMENT_JOB (1u << 9) + +/* End JS<n>_FEATURES register */ + +/* L2_MMU_CONFIG register */ +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT (24) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_READS_SHIFT) + +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT (26) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_OCTANT (0x1 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_QUARTER (0x2 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +#define L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_HALF (0x3 << L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES_SHIFT) +/* End L2_MMU_CONFIG register */ + +/* THREAD_* registers */ + +/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */ +#define IMPLEMENTATION_UNSPECIFIED 0 +#define IMPLEMENTATION_SILICON 1 +#define IMPLEMENTATION_FPGA 2 +#define IMPLEMENTATION_MODEL 3 + +/* Default values when registers are not supported by the implemented hardware */ +#define THREAD_MT_DEFAULT 256 +#define THREAD_MWS_DEFAULT 256 +#define THREAD_MBS_DEFAULT 256 +#define THREAD_MR_DEFAULT 1024 +#define THREAD_MTQ_DEFAULT 4 +#define THREAD_MTGS_DEFAULT 10 + +/* End THREAD_* registers */ + +/* COHERENCY_* values*/ +#define COHERENCY_ACE_LITE 0 +#define COHERENCY_ACE 1 +#define COHERENCY_NONE 0xFFFF +#define COHERENCY_FEATURE_BIT(x) (1 << (x)) +/* End COHERENCY_* values */ + +/* SHADER_CONFIG register */ + +#define SC_ALT_COUNTERS (1ul << 3) +#define SC_OVERRIDE_FWD_PIXEL_KILL (1ul << 4) +#define SC_SDC_DISABLE_OQ_DISCARD (1ul << 6) +#define SC_LS_PAUSEBUFFER_DISABLE (1ul << 16) +#define SC_ENABLE_TEXGRD_FLAGS (1ul << 25) +/* End SHADER_CONFIG register */ + +/* TILER_CONFIG register */ + +#define TC_CLOCK_GATE_OVERRIDE (1ul << 0) + +/* End TILER_CONFIG register */ + + +#endif /* _MIDGARD_REGMAP_H_ */ diff --git a/drivers/gpu/arm/midgard/mali_timeline.h b/drivers/gpu/arm/midgard/mali_timeline.h new file mode 100644 index 00000000000..d8286a36009 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_timeline.h @@ -0,0 +1,396 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mali_timeline + +#if !defined(_MALI_TIMELINE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _MALI_TIMELINE_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(mali_timeline_atoms_in_flight, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int tgid, + int count), + + TP_ARGS(ts_sec, + ts_nsec, + tgid, + count), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, tgid) + __field(int, count) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->tgid = tgid; + __entry->count = count; + ), + + TP_printk("%i,%i.%.9i,%i,%i", CTX_SET_NR_ATOMS_IN_FLIGHT, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->tgid, + __entry->count) +); + + +TRACE_EVENT(mali_timeline_atom, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int tgid, + int atom_id), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + tgid, + atom_id), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, tgid) + __field(int, atom_id) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->tgid = tgid; + __entry->atom_id = atom_id; + ), + + TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->tgid, + __entry->atom_id, + __entry->atom_id) +); + +TRACE_EVENT(mali_timeline_gpu_slot_active, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int tgid, + int js, + int count), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + tgid, + js, + count), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, tgid) + __field(int, js) + __field(int, count) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->tgid = tgid; + __entry->js = js; + __entry->count = count; + ), + + TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->tgid, + __entry->js, + __entry->count) +); + +TRACE_EVENT(mali_timeline_gpu_slot_action, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int tgid, + int js, + int count), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + tgid, + js, + count), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, tgid) + __field(int, js) + __field(int, count) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->tgid = tgid; + __entry->js = js; + __entry->count = count; + ), + + TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->tgid, + __entry->js, + __entry->count) +); + +TRACE_EVENT(mali_timeline_gpu_power_active, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int active), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + active), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, active) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->active = active; + ), + + TP_printk("%i,%i.%.9i,0,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->active) + +); + +TRACE_EVENT(mali_timeline_l2_power_active, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int state), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + state), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, state) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->state = state; + ), + + TP_printk("%i,%i.%.9i,0,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->state) + +); +TRACE_EVENT(mali_timeline_pm_event, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int pm_event_type, + unsigned int pm_event_id), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + pm_event_type, + pm_event_id), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, pm_event_type) + __field(unsigned int, pm_event_id) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->pm_event_type = pm_event_type; + __entry->pm_event_id = pm_event_id; + ), + + TP_printk("%i,%i.%.9i,0,%i,%u", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->pm_event_type, __entry->pm_event_id) + +); + +TRACE_EVENT(mali_timeline_slot_atom, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int event_type, + int tgid, + int js, + int atom_id), + + TP_ARGS(ts_sec, + ts_nsec, + event_type, + tgid, + js, + atom_id), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, event_type) + __field(int, tgid) + __field(int, js) + __field(int, atom_id) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->event_type = event_type; + __entry->tgid = tgid; + __entry->js = js; + __entry->atom_id = atom_id; + ), + + TP_printk("%i,%i.%.9i,%i,%i,%i", __entry->event_type, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->tgid, + __entry->js, + __entry->atom_id) +); + +TRACE_EVENT(mali_timeline_pm_checktrans, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int trans_code, + int trans_id), + + TP_ARGS(ts_sec, + ts_nsec, + trans_code, + trans_id), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, trans_code) + __field(int, trans_id) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->trans_code = trans_code; + __entry->trans_id = trans_id; + ), + + TP_printk("%i,%i.%.9i,0,%i", __entry->trans_code, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->trans_id) + +); + +TRACE_EVENT(mali_timeline_context_active, + + TP_PROTO(u64 ts_sec, + u32 ts_nsec, + int count), + + TP_ARGS(ts_sec, + ts_nsec, + count), + + TP_STRUCT__entry( + __field(u64, ts_sec) + __field(u32, ts_nsec) + __field(int, count) + ), + + TP_fast_assign( + __entry->ts_sec = ts_sec; + __entry->ts_nsec = ts_nsec; + __entry->count = count; + ), + + TP_printk("%i,%i.%.9i,0,%i", SW_SET_CONTEXT_ACTIVE, + (int)__entry->ts_sec, + (int)__entry->ts_nsec, + __entry->count) +); + +#endif /* _MALI_TIMELINE_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +/* This part must be outside protection */ +#include <trace/define_trace.h> + diff --git a/drivers/gpu/arm/midgard/mali_uk.h b/drivers/gpu/arm/midgard/mali_uk.h new file mode 100644 index 00000000000..841d03fb587 --- /dev/null +++ b/drivers/gpu/arm/midgard/mali_uk.h @@ -0,0 +1,141 @@ +/* + * + * (C) COPYRIGHT 2010, 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_uk.h + * Types and definitions that are common across OSs for both the user + * and kernel side of the User-Kernel interface. + */ + +#ifndef _UK_H_ +#define _UK_H_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @addtogroup base_api + * @{ + */ + +/** + * @defgroup uk_api User-Kernel Interface API + * + * The User-Kernel Interface abstracts the communication mechanism between the user and kernel-side code of device + * drivers developed as part of the Midgard DDK. Currently that includes the Base driver and the UMP driver. + * + * It exposes an OS independent API to user-side code (UKU) which routes functions calls to an OS-independent + * kernel-side API (UKK) via an OS-specific communication mechanism. + * + * This API is internal to the Midgard DDK and is not exposed to any applications. + * + * @{ + */ + +/** + * These are identifiers for kernel-side drivers implementing a UK interface, aka UKK clients. The + * UK module maps this to an OS specific device name, e.g. "gpu_base" -> "GPU0:". Specify this + * identifier to select a UKK client to the uku_open() function. + * + * When a new UKK client driver is created a new identifier needs to be added to the uk_client_id + * enumeration and the uku_open() implemenation for the various OS ports need to be updated to + * provide a mapping of the identifier to the OS specific device name. + * + */ +enum uk_client_id { + /** + * Value used to identify the Base driver UK client. + */ + UK_CLIENT_MALI_T600_BASE, + + /** The number of uk clients supported. This must be the last member of the enum */ + UK_CLIENT_COUNT +}; + +/** + * Each function callable through the UK interface has a unique number. + * Functions provided by UK clients start from number UK_FUNC_ID. + * Numbers below UK_FUNC_ID are used for internal UK functions. + */ +enum uk_func { + UKP_FUNC_ID_CHECK_VERSION, /**< UKK Core internal function */ + /** + * Each UK client numbers the functions they provide starting from + * number UK_FUNC_ID. This number is then eventually assigned to the + * id field of the union uk_header structure when preparing to make a + * UK call. See your UK client for a list of their function numbers. + */ + UK_FUNC_ID = 512 +}; + +/** + * Arguments for a UK call are stored in a structure. This structure consists + * of a fixed size header and a payload. The header carries a 32-bit number + * identifying the UK function to be called (see uk_func). When the UKK client + * receives this header and executed the requested UK function, it will use + * the same header to store the result of the function in the form of a + * int return code. The size of this structure is such that the + * first member of the payload following the header can be accessed efficiently + * on a 32 and 64-bit kernel and the structure has the same size regardless + * of a 32 or 64-bit kernel. The uk_kernel_size_type type should be defined + * accordingly in the OS specific mali_uk_os.h header file. + */ +union uk_header { + /** + * 32-bit number identifying the UK function to be called. + * Also see uk_func. + */ + u32 id; + /** + * The int return code returned by the called UK function. + * See the specification of the particular UK function you are + * calling for the meaning of the error codes returned. All + * UK functions return 0 on success. + */ + u32 ret; + /* + * Used to ensure 64-bit alignment of this union. Do not remove. + * This field is used for padding and does not need to be initialized. + */ + u64 sizer; +}; + +/** + * This structure carries a 16-bit major and minor number and is sent along with an internal UK call + * used during uku_open to identify the versions of the UK module in use by the user-side and kernel-side. + */ +struct uku_version_check_args { + union uk_header header; + /**< UK call header */ + u16 major; + /**< This field carries the user-side major version on input and the kernel-side major version on output */ + u16 minor; + /**< This field carries the user-side minor version on input and the kernel-side minor version on output. */ + u8 padding[4]; +}; + +/** @} end group uk_api */ + +/** @} *//* end group base_api */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _UK_H_ */ diff --git a/drivers/gpu/arm/midgard/platform/Kbuild b/drivers/gpu/arm/midgard/platform/Kbuild new file mode 100644 index 00000000000..558657bbced --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/Kbuild @@ -0,0 +1,21 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +ifeq ($(CONFIG_MALI_PLATFORM_THIRDPARTY),y) +# remove begin and end quotes from the Kconfig string type + platform_name := $(shell echo $(CONFIG_MALI_PLATFORM_THIRDPARTY_NAME)) + obj-y += $(platform_name)/ +endif diff --git a/drivers/gpu/arm/midgard/platform/Kconfig b/drivers/gpu/arm/midgard/platform/Kconfig new file mode 100644 index 00000000000..8fb4e917c4f --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/Kconfig @@ -0,0 +1,24 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + + +# Add your platform specific Kconfig file here +# +# "drivers/gpu/arm/midgard/platform/xxx/Kconfig" +# +# Where xxx is the platform name is the name set in MALI_PLATFORM_THIRDPARTY_NAME +# + diff --git a/drivers/gpu/arm/midgard/platform/devicetree/Kbuild b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild new file mode 100644 index 00000000000..679945bec52 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/devicetree/Kbuild @@ -0,0 +1,16 @@ +# +# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c new file mode 100644 index 00000000000..046302e8e41 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_devicetree.c @@ -0,0 +1,23 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +int kbase_platform_early_init(void) +{ + /* Nothing needed at this stage */ + return 0; +} + diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h new file mode 100644 index 00000000000..f92961c53a0 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_config_platform.h @@ -0,0 +1,86 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * Maximum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MAX (5000) +/** + * Minimum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MIN (5000) + +/** + * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock + * + * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_cpu_clk_speed_func. + * Default Value: NA + */ +#define CPU_SPEED_FUNC (NULL) + +/** + * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock + * + * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_gpu_clk_speed_func. + * Default Value: NA + */ +#define GPU_SPEED_FUNC (NULL) + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +/** Power model for IPA + * + * Attached value: pointer to @ref mali_pa_model_ops + */ +#define POWER_MODEL_CALLBACKS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; + +/** + * Secure mode switch + * + * Attached value: pointer to @ref kbase_secure_ops + */ +#define SECURE_CALLBACKS (NULL) diff --git a/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c new file mode 100644 index 00000000000..6f8a72555e3 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/devicetree/mali_kbase_runtime_pm.c @@ -0,0 +1,107 @@ +/* + * + * (C) COPYRIGHT 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <mali_kbase.h> +#include <mali_kbase_defs.h> +#include <linux/pm_runtime.h> +#include <linux/suspend.h> + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + int ret; + + dev_dbg(kbdev->dev, "pm_callback_power_on %p\n", + (void *)kbdev->dev->pm_domain); + + ret = pm_runtime_get_sync(kbdev->dev); + + dev_dbg(kbdev->dev, "pm_runtime_get returned %d\n", ret); + + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "pm_callback_power_off\n"); + + pm_runtime_put_autosuspend(kbdev->dev); +} + +int kbase_device_runtime_init(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "kbase_device_runtime_init\n"); + pm_runtime_enable(kbdev->dev); +#ifdef CONFIG_MALI_MIDGARD_DEBUG_SYS + { + int err = kbase_platform_create_sysfs_file(kbdev->dev); + + if (err) + return err; + } +#endif /* CONFIG_MALI_MIDGARD_DEBUG_SYS */ + return 0; +} + +void kbase_device_runtime_disable(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n"); + pm_runtime_disable(kbdev->dev); +} + +static int pm_callback_runtime_on(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "pm_callback_runtime_on\n"); + + return 0; +} + +static void pm_callback_runtime_off(struct kbase_device *kbdev) +{ + dev_dbg(kbdev->dev, "pm_callback_runtime_off\n"); +} + +static void pm_callback_resume(struct kbase_device *kbdev) +{ + int ret = pm_callback_runtime_on(kbdev); + + WARN_ON(ret); +} + +static void pm_callback_suspend(struct kbase_device *kbdev) +{ + pm_callback_runtime_off(kbdev); +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = pm_callback_suspend, + .power_resume_callback = pm_callback_resume, +#ifdef CONFIG_PM_RUNTIME + .power_runtime_init_callback = kbase_device_runtime_init, + .power_runtime_term_callback = kbase_device_runtime_disable, + .power_runtime_on_callback = pm_callback_runtime_on, + .power_runtime_off_callback = pm_callback_runtime_off, +#else /* CONFIG_PM_RUNTIME */ + .power_runtime_init_callback = NULL, + .power_runtime_term_callback = NULL, + .power_runtime_on_callback = NULL, + .power_runtime_off_callback = NULL, +#endif /* CONFIG_PM_RUNTIME */ +}; + + diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild b/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild new file mode 100644 index 00000000000..5b6ef37bb71 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/juno_soc/Kbuild @@ -0,0 +1,19 @@ +# +# (C) COPYRIGHT 2013-2014 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + +obj-y += mali_kbase_config_juno_soc.o + + +obj-m += juno_mali_opp.o diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c b/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c new file mode 100644 index 00000000000..ccfd8ccb012 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/juno_soc/juno_mali_opp.c @@ -0,0 +1,84 @@ +/* + * + * (C) COPYRIGHT 2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <linux/module.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/scpi_protocol.h> +#include <linux/version.h> +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) +#include <linux/pm_opp.h> +#else /* Linux >= 3.13 */ +/* In 3.13 the OPP include header file, types, and functions were all + * renamed. Use the old filename for the include, and define the new names to + * the old, when an old kernel is detected. + */ +#include <linux/opp.h> +#define dev_pm_opp_add opp_add +#endif /* Linux >= 3.13 */ + + +static int init_juno_opps_from_scpi(struct device *dev) +{ + struct scpi_opp *sopp; + int i; + + /* Hard coded for Juno. 2 is GPU domain */ + sopp = scpi_dvfs_get_opps(2); + if (IS_ERR_OR_NULL(sopp)) + return PTR_ERR(sopp); + + for (i = 0; i < sopp->count; i++) { + struct scpi_opp_entry *e = &sopp->opp[i]; + + dev_info(dev, "Mali OPP from SCPI: %u Hz @ %u mV\n", + e->freq_hz, e->volt_mv); + + dev_pm_opp_add(dev, e->freq_hz, e->volt_mv * 1000); + } + + return 0; +} + +static int juno_setup_opps(void) +{ + struct device_node *np; + struct platform_device *pdev; + int err; + + np = of_find_node_by_name(NULL, "gpu"); + if (!np) { + pr_err("Failed to find DT entry for Mali\n"); + return -EFAULT; + } + + pdev = of_find_device_by_node(np); + if (!pdev) { + pr_err("Failed to find device for Mali\n"); + of_node_put(np); + return -EFAULT; + } + + err = init_juno_opps_from_scpi(&pdev->dev); + + of_node_put(np); + + return err; +} + +module_init(juno_setup_opps); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c new file mode 100644 index 00000000000..86f2bb50caf --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_juno_soc.c @@ -0,0 +1,172 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <linux/ioport.h> +#ifdef CONFIG_DEVFREQ_THERMAL +#include <linux/devfreq_cooling.h> +#endif +#include <linux/thermal.h> +#include <mali_kbase.h> +#include <mali_kbase_defs.h> +#include <mali_kbase_config.h> + +/* Versatile Express (VE) Juno Development Platform */ + +#define HARD_RESET_AT_POWER_OFF 0 + +#ifndef CONFIG_OF +static struct kbase_io_resources io_resources = { + .job_irq_number = 65, + .mmu_irq_number = 66, + .gpu_irq_number = 64, + .io_memory_region = { + .start = 0x2D000000, + .end = 0x2D000000 + (4096 * 4) - 1} +}; +#endif + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ +#if HARD_RESET_AT_POWER_OFF + /* Cause a GPU hard reset to test whether we have actually idled the GPU + * and that we properly reconfigure the GPU on power up. + * Usually this would be dangerous, but if the GPU is working correctly it should + * be completely safe as the GPU should not be active at this point. + * However this is disabled normally because it will most likely interfere with + * bus logging etc. + */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); +#endif +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +#ifdef CONFIG_DEVFREQ_THERMAL + +#define FALLBACK_STATIC_TEMPERATURE 55000 + +static unsigned long juno_model_static_power(unsigned long voltage) +{ + struct thermal_zone_device *tz; + unsigned long temperature, temp; + unsigned long temp_squared, temp_cubed, temp_scaling_factor; + const unsigned long coefficient = (410UL << 20) / (729000000UL >> 10); + const unsigned long voltage_cubed = (voltage * voltage * voltage) >> 10; + + tz = thermal_zone_get_zone_by_name("gpu"); + if (IS_ERR(tz)) { + pr_warn_ratelimited("Error getting gpu thermal zone (%ld), not yet ready?\n", + PTR_ERR(tz)); + temperature = FALLBACK_STATIC_TEMPERATURE; + } else { + int ret; + + ret = tz->ops->get_temp(tz, &temperature); + if (ret) { + pr_warn_ratelimited("Error reading temperature for gpu thermal zone: %d\n", + ret); + temperature = FALLBACK_STATIC_TEMPERATURE; + } + } + + /* Calculate the temperature scaling factor. To be applied to the + * voltage scaled power. + */ + temp = temperature / 1000; + temp_squared = temp * temp; + temp_cubed = temp_squared * temp; + temp_scaling_factor = + (2 * temp_cubed) + - (80 * temp_squared) + + (4700 * temp) + + 32000; + + return (((coefficient * voltage_cubed) >> 20) + * temp_scaling_factor) + / 1000000; +} + +static unsigned long juno_model_dynamic_power(unsigned long freq, + unsigned long voltage) +{ + /* The inputs: freq (f) is in Hz, and voltage (v) in mV. + * The coefficient (c) is in mW/(MHz mV mV). + * + * This function calculates the dynamic power after this formula: + * Pdyn (mW) = c (mW/(MHz*mV*mV)) * v (mV) * v (mV) * f (MHz) + */ + const unsigned long v2 = (voltage * voltage) / 1000; /* m*(V*V) */ + const unsigned long f_mhz = freq / 1000000; /* MHz */ + const unsigned long coefficient = 3600; /* mW/(MHz*mV*mV) */ + + return (coefficient * v2 * f_mhz) / 1000000; /* mW */ +} + +struct devfreq_cooling_ops juno_model_ops = { + .get_static_power = juno_model_static_power, + .get_dynamic_power = juno_model_dynamic_power, +}; + +#endif /* CONFIG_DEVFREQ_THERMAL */ + +static int juno_secure_mode_enable(struct kbase_device *kbdev) +{ + /* TODO: enable secure mode */ + /*dev_err(kbdev->dev, "SWITCHING TO SECURE\n");*/ + return 0; /* all ok */ +} + +static int juno_secure_mode_disable(struct kbase_device *kbdev) +{ + /* TODO: Turn off secure mode and reset GPU */ + /*dev_err(kbdev->dev, "SWITCHING TO NON-SECURE\n");*/ + return 0; /* all ok */ +} + +struct kbase_secure_ops juno_secure_ops = { + .secure_mode_enable = juno_secure_mode_enable, + .secure_mode_disable = juno_secure_mode_disable, +}; + +static struct kbase_platform_config versatile_platform_config = { +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} + +int kbase_platform_early_init(void) +{ + /* Nothing needed at this stage */ + return 0; +} diff --git a/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h new file mode 100644 index 00000000000..fa5e9e9a5b1 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/juno_soc/mali_kbase_config_platform.h @@ -0,0 +1,94 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * Maximum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MAX 600000 +/** + * Minimum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MIN 600000 + +/** + * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock + * + * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_cpu_clk_speed_func. + * Default Value: NA + */ +#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed) + +/** + * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock + * + * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_gpu_clk_speed_func. + * Default Value: NA + */ +#define GPU_SPEED_FUNC (NULL) + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +/** Power model for IPA + * + * Attached value: pointer to @ref mali_pa_model_ops + */ +#ifdef CONFIG_DEVFREQ_THERMAL +#define POWER_MODEL_CALLBACKS (&juno_model_ops) +#else +#define POWER_MODEL_CALLBACKS (NULL) +#endif + +/** + * Secure mode switch + * + * Attached value: pointer to @ref kbase_secure_ops + */ +#define SECURE_CALLBACKS (&juno_secure_ops) + +extern struct kbase_pm_callback_conf pm_callbacks; +#ifdef CONFIG_DEVFREQ_THERMAL +extern struct devfreq_cooling_ops juno_model_ops; +#endif +extern struct kbase_secure_ops juno_secure_ops; diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h new file mode 100644 index 00000000000..7cb3be7f78c --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_common.h @@ -0,0 +1,26 @@ +/* + * + * (C) COPYRIGHT 2010-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * @brief Entry point to transfer control to a platform for early initialization + * + * This function is called early on in the initialization during execution of + * @ref kbase_driver_init. + * + * @return Zero to indicate success non-zero for failure. + */ +int kbase_platform_early_init(void); diff --git a/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h new file mode 100644 index 00000000000..01f9dfce93c --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/mali_kbase_platform_fake.h @@ -0,0 +1,38 @@ +/* + * + * (C) COPYRIGHT 2010-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifdef CONFIG_MALI_PLATFORM_FAKE + +/** + * kbase_platform_fake_register - Entry point for fake platform registration + * + * This function is called early on in the initialization during execution of + * kbase_driver_init. + * + * Return: 0 to indicate success, non-zero for failure. + */ +int kbase_platform_fake_register(void); + +/** + * kbase_platform_fake_unregister - Entry point for fake platform unregistration + * + * This function is called in the termination during execution of + * kbase_driver_exit. + */ +void kbase_platform_fake_unregister(void); + +#endif /* CONFIG_MALI_PLATFORM_FAKE */ diff --git a/drivers/gpu/arm/midgard/platform/vexpress/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild new file mode 100644 index 00000000000..084a1561343 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress/Kbuild @@ -0,0 +1,18 @@ +# +# (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +obj-y += mali_kbase_config_vexpress.o +obj-y += mali_kbase_cpu_vexpress.o diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h new file mode 100644 index 00000000000..ac5060af6a7 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_platform.h @@ -0,0 +1,97 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include "mali_kbase_cpu_vexpress.h" + +/** + * Maximum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MAX (5000) +/** + * Minimum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MIN (5000) + +/** + * Values used for determining the GPU frequency based on the LogicTile type + * Used by the function kbase_get_platform_logic_tile_type + */ +#define VE_VIRTEX6_GPU_FREQ_MIN 5000 +#define VE_VIRTEX6_GPU_FREQ_MAX 5000 +#define VE_VIRTEX7_GPU_FREQ_MIN 40000 +#define VE_VIRTEX7_GPU_FREQ_MAX 40000 + +/** + * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock + * + * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_cpu_clk_speed_func. + * Default Value: NA + */ +#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed) + +/** + * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock + * + * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_gpu_clk_speed_func. + * Default Value: NA + */ +#define GPU_SPEED_FUNC (NULL) + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +/** Power model for IPA + * + * Attached value: pointer to @ref mali_pa_model_ops + */ +#define POWER_MODEL_CALLBACKS (NULL) + +/** + * Secure mode switch + * + * Attached value: pointer to @ref kbase_secure_ops + */ +#define SECURE_CALLBACKS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c new file mode 100644 index 00000000000..687b1a8c043 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_config_vexpress.c @@ -0,0 +1,85 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include <linux/ioport.h> +#include <mali_kbase.h> +#include <mali_kbase_defs.h> +#include <mali_kbase_config.h> +#include "mali_kbase_cpu_vexpress.h" +#include "mali_kbase_config_platform.h" + +#define HARD_RESET_AT_POWER_OFF 0 + +#ifndef CONFIG_OF +static struct kbase_io_resources io_resources = { + .job_irq_number = 68, + .mmu_irq_number = 69, + .gpu_irq_number = 70, + .io_memory_region = { + .start = 0xFC010000, + .end = 0xFC010000 + (4096 * 4) - 1 + } +}; +#endif /* CONFIG_OF */ + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ +#if HARD_RESET_AT_POWER_OFF + /* Cause a GPU hard reset to test whether we have actually idled the GPU + * and that we properly reconfigure the GPU on power up. + * Usually this would be dangerous, but if the GPU is working correctly it should + * be completely safe as the GPU should not be active at this point. + * However this is disabled normally because it will most likely interfere with + * bus logging etc. + */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); +#endif +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +static struct kbase_platform_config versatile_platform_config = { +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} + + +int kbase_platform_early_init(void) +{ + /* Nothing needed at this stage */ + return 0; +} diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c new file mode 100644 index 00000000000..9bc51f1e2da --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.c @@ -0,0 +1,210 @@ +/* + * + * (C) COPYRIGHT 2011-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <linux/io.h> +#include <mali_kbase.h> +#include "mali_kbase_cpu_vexpress.h" + +#define HZ_IN_MHZ (1000000) + +#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000) +#define MOTHERBOARD_SYS_CFG_START (0x10000000) +#define SYS_CFGDATA_OFFSET (0x000000A0) +#define SYS_CFGCTRL_OFFSET (0x000000A4) +#define SYS_CFGSTAT_OFFSET (0x000000A8) + +#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31) +#define READ_REG_BIT_VALUE (0 << 30) +#define DCC_DEFAULT_BIT_VALUE (0 << 26) +#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20) +#define SITE_DEFAULT_BIT_VALUE (1 << 16) +#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12) +#define DEVICE_DEFAULT_BIT_VALUE (2 << 0) +#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0) +#define SYS_CFG_ERROR_BIT_VALUE (1 << 1) + +#define FEED_REG_BIT_MASK (0x0F) +#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03) +#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07) +#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B) +#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F) +#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13) + +/* the following three values used for reading + * HBI value of the LogicTile daughterboard */ +#define VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 (0x10000000) +#define VE_SYS_PROC_ID1_OFFSET (0x00000088) +#define VE_LOGIC_TILE_HBI_MASK (0x00000FFF) + +#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos)) + +#define CPU_CLOCK_SPEED_UNDEFINED (0) + +static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED; + +static DEFINE_RAW_SPINLOCK(syscfg_lock); +/** + * kbase_get_vendor_specific_cpu_clock_speed -Retrieves the CPU clock speed + * @cpu_clock - the value of CPU clock speed in MHz + * + * Returns 0 on success, error code otherwise. + * + * The implementation is platform specific. +*/ +int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock) +{ + int err = 0; + u32 reg_val = 0; + u32 osc2_value = 0; + u32 pa_divide = 0; + u32 pb_divide = 0; + u32 pc_divide = 0; + void __iomem *syscfg_reg = NULL; + void __iomem *scc_reg = NULL; + + if (CPU_CLOCK_SPEED_UNDEFINED != cpu_clock_speed) { + *cpu_clock = cpu_clock_speed; + return 0; + } + + /* Init the value in case something goes wrong */ + *cpu_clock = 0; + + /* Map CPU register into virtual memory */ + syscfg_reg = ioremap(MOTHERBOARD_SYS_CFG_START, 0x1000); + if (syscfg_reg == NULL) { + err = -EIO; + goto syscfg_reg_map_failed; + } + + scc_reg = ioremap(CORETILE_EXPRESS_A9X4_SCC_START, 0x1000); + if (scc_reg == NULL) { + err = -EIO; + goto scc_reg_map_failed; + } + + raw_spin_lock(&syscfg_lock); + + /* Read SYS regs - OSC2 */ + reg_val = readl(syscfg_reg + SYS_CFGCTRL_OFFSET); + + /* Check if there is any other undergoing request */ + if (reg_val & SYS_CFGCTRL_START_BIT_VALUE) { + err = -EBUSY; + goto ongoing_request; + } + /* Reset the CGFGSTAT reg */ + writel(0, (syscfg_reg + SYS_CFGSTAT_OFFSET)); + + writel(SYS_CFGCTRL_START_BIT_VALUE | READ_REG_BIT_VALUE | + DCC_DEFAULT_BIT_VALUE | + SYS_CFG_OSC_FUNC_BIT_VALUE | + SITE_DEFAULT_BIT_VALUE | + BOARD_STACK_POS_DEFAULT_BIT_VALUE | + DEVICE_DEFAULT_BIT_VALUE, + (syscfg_reg + SYS_CFGCTRL_OFFSET)); + /* Wait for the transaction to complete */ + while (!(readl(syscfg_reg + SYS_CFGSTAT_OFFSET) & + SYS_CFG_COMPLETE_BIT_VALUE)) + ; + /* Read SYS_CFGSTAT Register to get the status of submitted + * transaction */ + reg_val = readl(syscfg_reg + SYS_CFGSTAT_OFFSET); + + if (reg_val & SYS_CFG_ERROR_BIT_VALUE) { + /* Error while setting register */ + err = -EIO; + goto set_reg_error; + } + + osc2_value = readl(syscfg_reg + SYS_CFGDATA_OFFSET); + /* Read the SCC CFGRW0 register */ + reg_val = readl(scc_reg); + + /* + * Select the appropriate feed: + * CFGRW0[0] - CLKOB + * CFGRW0[1] - CLKOC + * CFGRW0[2] - FACLK (CLK)B FROM AXICLK PLL) + */ + /* Calculate the FCLK */ + if (IS_SINGLE_BIT_SET(reg_val, 0)) { + /* CFGRW0[0] - CLKOB */ + /* CFGRW0[6:3] */ + pa_divide = ((reg_val & (FEED_REG_BIT_MASK << + FCLK_PA_DIVIDE_BIT_SHIFT)) >> + FCLK_PA_DIVIDE_BIT_SHIFT); + /* CFGRW0[10:7] */ + pb_divide = ((reg_val & (FEED_REG_BIT_MASK << + FCLK_PB_DIVIDE_BIT_SHIFT)) >> + FCLK_PB_DIVIDE_BIT_SHIFT); + *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1); + } else if (IS_SINGLE_BIT_SET(reg_val, 1)) { + /* CFGRW0[1] - CLKOC */ + /* CFGRW0[6:3] */ + pa_divide = ((reg_val & (FEED_REG_BIT_MASK << + FCLK_PA_DIVIDE_BIT_SHIFT)) >> + FCLK_PA_DIVIDE_BIT_SHIFT); + /* CFGRW0[14:11] */ + pc_divide = ((reg_val & (FEED_REG_BIT_MASK << + FCLK_PC_DIVIDE_BIT_SHIFT)) >> + FCLK_PC_DIVIDE_BIT_SHIFT); + *cpu_clock = osc2_value * (pa_divide + 1) / (pc_divide + 1); + } else if (IS_SINGLE_BIT_SET(reg_val, 2)) { + /* CFGRW0[2] - FACLK */ + /* CFGRW0[18:15] */ + pa_divide = ((reg_val & (FEED_REG_BIT_MASK << + AXICLK_PA_DIVIDE_BIT_SHIFT)) >> + AXICLK_PA_DIVIDE_BIT_SHIFT); + /* CFGRW0[22:19] */ + pb_divide = ((reg_val & (FEED_REG_BIT_MASK << + AXICLK_PB_DIVIDE_BIT_SHIFT)) >> + AXICLK_PB_DIVIDE_BIT_SHIFT); + *cpu_clock = osc2_value * (pa_divide + 1) / (pb_divide + 1); + } else { + err = -EIO; + } + +set_reg_error: +ongoing_request: + raw_spin_unlock(&syscfg_lock); + *cpu_clock /= HZ_IN_MHZ; + + if (!err) + cpu_clock_speed = *cpu_clock; + + iounmap(scc_reg); + +scc_reg_map_failed: + iounmap(syscfg_reg); + +syscfg_reg_map_failed: + + return err; +} + +u32 kbase_get_platform_logic_tile_type(void) +{ + void __iomem *syscfg_reg = NULL; + u32 sys_procid1 = 0; + + syscfg_reg = ioremap(VE_MOTHERBOARD_PERIPHERALS_SMB_CS7 + VE_SYS_PROC_ID1_OFFSET, 4); + + sys_procid1 = (NULL != syscfg_reg) ? readl(syscfg_reg) : 0; + + return sys_procid1 & VE_LOGIC_TILE_HBI_MASK; +} diff --git a/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h new file mode 100644 index 00000000000..ef9bfd72161 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress/mali_kbase_cpu_vexpress.h @@ -0,0 +1,48 @@ +/* + * + * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_CPU_VEXPRESS_H_ +#define _KBASE_CPU_VEXPRESS_H_ + +/** + * Versatile Express implementation of @ref kbase_cpu_clk_speed_func. + */ +int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock); + +/** + * kbase_get_platform_logic_tile_type - determines which LogicTile type + * is used by Versatile Express + * + * When platform_config build parameter is specified as vexpress, i.e., + * platform_config=vexpress, GPU frequency may vary dependent on the + * particular platform. The GPU frequency depends on the LogicTile type. + * + * This function is called by kbase_common_device_init to determine + * which LogicTile type is used by the platform by reading the HBI value + * of the daughterboard which holds the LogicTile: + * + * 0x192 HBI0192 Virtex-5 + * 0x217 HBI0217 Virtex-6 + * 0x247 HBI0247 Virtex-7 + * + * Return: HBI value of the logic tile daughterboard, zero if not accessible + */ +u32 kbase_get_platform_logic_tile_type(void); + +#endif /* _KBASE_CPU_VEXPRESS_H_ */ diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild new file mode 100644 index 00000000000..d9bfabc28b6 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/Kbuild @@ -0,0 +1,16 @@ +# +# (C) COPYRIGHT 2013 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + +obj-y += mali_kbase_config_vexpress.o diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h new file mode 100644 index 00000000000..11c320ecc1f --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h @@ -0,0 +1,86 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * Maximum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MAX 5000 +/** + * Minimum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MIN 5000 + +/** + * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock + * + * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_cpu_clk_speed_func. + * Default Value: NA + */ +#define CPU_SPEED_FUNC (&kbase_cpuprops_get_default_clock_speed) + +/** + * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock + * + * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_gpu_clk_speed_func. + * Default Value: NA + */ +#define GPU_SPEED_FUNC (NULL) + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +/** Power model for IPA + * + * Attached value: pointer to @ref mali_pa_model_ops + */ +#define POWER_MODEL_CALLBACKS (NULL) + +/** + * Secure mode switch + * + * Attached value: pointer to @ref kbase_secure_ops + */ +#define SECURE_CALLBACKS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c new file mode 100644 index 00000000000..3ff0930fb4a --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_1xv7_a57/mali_kbase_config_vexpress.c @@ -0,0 +1,79 @@ +/* + * + * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include <linux/ioport.h> +#include <mali_kbase.h> +#include <mali_kbase_defs.h> +#include <mali_kbase_config.h> + +#define HARD_RESET_AT_POWER_OFF 0 + +#ifndef CONFIG_OF +static struct kbase_io_resources io_resources = { + .job_irq_number = 68, + .mmu_irq_number = 69, + .gpu_irq_number = 70, + .io_memory_region = { + .start = 0x2f010000, + .end = 0x2f010000 + (4096 * 4) - 1} +}; +#endif + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ +#if HARD_RESET_AT_POWER_OFF + /* Cause a GPU hard reset to test whether we have actually idled the GPU + * and that we properly reconfigure the GPU on power up. + * Usually this would be dangerous, but if the GPU is working correctly it should + * be completely safe as the GPU should not be active at this point. + * However this is disabled normally because it will most likely interfere with + * bus logging etc. + */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); +#endif +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +static struct kbase_platform_config versatile_platform_config = { +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} + +int kbase_platform_early_init(void) +{ + /* Nothing needed at this stage */ + return 0; +} diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild new file mode 100644 index 00000000000..0cb41ce8952 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/Kbuild @@ -0,0 +1,18 @@ +# +# (C) COPYRIGHT 2012 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + + +obj-y += mali_kbase_config_vexpress.o +obj-y += mali_kbase_cpu_vexpress.o diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h new file mode 100644 index 00000000000..9bc2985fef0 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h @@ -0,0 +1,88 @@ +/* + * + * (C) COPYRIGHT 2014-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#include "mali_kbase_cpu_vexpress.h" + +/** + * Maximum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MAX 10000 +/** + * Minimum frequency GPU will be clocked at. Given in kHz. + * This must be specified as there is no default value. + * + * Attached value: number in kHz + * Default value: NA + */ +#define GPU_FREQ_KHZ_MIN 10000 + +/** + * CPU_SPEED_FUNC - A pointer to a function that calculates the CPU clock + * + * CPU clock speed of the platform is in MHz - see kbase_cpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_cpu_clk_speed_func. + * Default Value: NA + */ +#define CPU_SPEED_FUNC (&kbase_get_vexpress_cpu_clock_speed) + +/** + * GPU_SPEED_FUNC - A pointer to a function that calculates the GPU clock + * + * GPU clock speed of the platform in MHz - see kbase_gpu_clk_speed_func + * for the function prototype. + * + * Attached value: A kbase_gpu_clk_speed_func. + * Default Value: NA + */ +#define GPU_SPEED_FUNC (NULL) + +/** + * Power management configuration + * + * Attached value: pointer to @ref kbase_pm_callback_conf + * Default value: See @ref kbase_pm_callback_conf + */ +#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) + +/** + * Platform specific configuration functions + * + * Attached value: pointer to @ref kbase_platform_funcs_conf + * Default value: See @ref kbase_platform_funcs_conf + */ +#define PLATFORM_FUNCS (NULL) + +/** Power model for IPA + * + * Attached value: pointer to @ref mali_pa_model_ops + */ +#define POWER_MODEL_CALLBACKS (NULL) + +/** + * Secure mode switch + * + * Attached value: pointer to @ref kbase_secure_ops + */ +#define SECURE_CALLBACKS (NULL) + +extern struct kbase_pm_callback_conf pm_callbacks; diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c new file mode 100644 index 00000000000..76ffe4a1e59 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_vexpress.c @@ -0,0 +1,83 @@ +/* + * + * (C) COPYRIGHT 2011-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include <linux/ioport.h> +#include <mali_kbase.h> +#include <mali_kbase_defs.h> +#include <mali_kbase_config.h> +#include "mali_kbase_cpu_vexpress.h" + +#define HARD_RESET_AT_POWER_OFF 0 + +#ifndef CONFIG_OF +static struct kbase_io_resources io_resources = { + .job_irq_number = 75, + .mmu_irq_number = 76, + .gpu_irq_number = 77, + .io_memory_region = { + .start = 0x2F000000, + .end = 0x2F000000 + (4096 * 4) - 1} +}; +#endif + +static int pm_callback_power_on(struct kbase_device *kbdev) +{ + /* Nothing is needed on VExpress, but we may have destroyed GPU state (if the below HARD_RESET code is active) */ + return 1; +} + +static void pm_callback_power_off(struct kbase_device *kbdev) +{ +#if HARD_RESET_AT_POWER_OFF + /* Cause a GPU hard reset to test whether we have actually idled the GPU + * and that we properly reconfigure the GPU on power up. + * Usually this would be dangerous, but if the GPU is working correctly it should + * be completely safe as the GPU should not be active at this point. + * However this is disabled normally because it will most likely interfere with + * bus logging etc. + */ + KBASE_TRACE_ADD(kbdev, CORE_GPU_HARD_RESET, NULL, NULL, 0u, 0); + kbase_os_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), GPU_COMMAND_HARD_RESET); +#endif +} + +struct kbase_pm_callback_conf pm_callbacks = { + .power_on_callback = pm_callback_power_on, + .power_off_callback = pm_callback_power_off, + .power_suspend_callback = NULL, + .power_resume_callback = NULL +}; + +static struct kbase_platform_config versatile_platform_config = { +#ifndef CONFIG_OF + .io_resources = &io_resources +#endif +}; + +struct kbase_platform_config *kbase_get_platform_config(void) +{ + return &versatile_platform_config; +} + +int kbase_platform_early_init(void) +{ + /* Nothing needed at this stage */ + return 0; +} + diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c new file mode 100644 index 00000000000..816dff49835 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.c @@ -0,0 +1,71 @@ +/* + * + * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#include <linux/io.h> +#include <mali_kbase.h> +#include "mali_kbase_cpu_vexpress.h" + +#define HZ_IN_MHZ (1000000) + +#define CORETILE_EXPRESS_A9X4_SCC_START (0x100E2000) +#define MOTHERBOARD_SYS_CFG_START (0x10000000) +#define SYS_CFGDATA_OFFSET (0x000000A0) +#define SYS_CFGCTRL_OFFSET (0x000000A4) +#define SYS_CFGSTAT_OFFSET (0x000000A8) + +#define SYS_CFGCTRL_START_BIT_VALUE (1 << 31) +#define READ_REG_BIT_VALUE (0 << 30) +#define DCC_DEFAULT_BIT_VALUE (0 << 26) +#define SYS_CFG_OSC_FUNC_BIT_VALUE (1 << 20) +#define SITE_DEFAULT_BIT_VALUE (1 << 16) +#define BOARD_STACK_POS_DEFAULT_BIT_VALUE (0 << 12) +#define DEVICE_DEFAULT_BIT_VALUE (2 << 0) +#define SYS_CFG_COMPLETE_BIT_VALUE (1 << 0) +#define SYS_CFG_ERROR_BIT_VALUE (1 << 1) + +#define FEED_REG_BIT_MASK (0x0F) +#define FCLK_PA_DIVIDE_BIT_SHIFT (0x03) +#define FCLK_PB_DIVIDE_BIT_SHIFT (0x07) +#define FCLK_PC_DIVIDE_BIT_SHIFT (0x0B) +#define AXICLK_PA_DIVIDE_BIT_SHIFT (0x0F) +#define AXICLK_PB_DIVIDE_BIT_SHIFT (0x13) + +#define IS_SINGLE_BIT_SET(val, pos) (val&(1<<pos)) + +#define CPU_CLOCK_SPEED_UNDEFINED 0 + +#define CPU_CLOCK_SPEED_6XV7 50 + +static u32 cpu_clock_speed = CPU_CLOCK_SPEED_UNDEFINED; + +static DEFINE_RAW_SPINLOCK(syscfg_lock); +/** + * kbase_get_vendor_specific_cpu_clock_speed + * @brief Retrieves the CPU clock speed. + * The implementation is platform specific. + * @param[out] cpu_clock - the value of CPU clock speed in MHz + * @return 0 on success, 1 otherwise +*/ +int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock) +{ + /* TODO: MIDBASE-2873 - Provide runtime detection of CPU clock freq for 6XV7 board */ + *cpu_clock = CPU_CLOCK_SPEED_6XV7; + + return 0; +} diff --git a/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h new file mode 100644 index 00000000000..23647ccb087 --- /dev/null +++ b/drivers/gpu/arm/midgard/platform/vexpress_6xvirtex7_10mhz/mali_kbase_cpu_vexpress.h @@ -0,0 +1,28 @@ +/* + * + * (C) COPYRIGHT 2012-2013, 2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +#ifndef _KBASE_CPU_VEXPRESS_H_ +#define _KBASE_CPU_VEXPRESS_H_ + +/** + * Versatile Express implementation of @ref kbase_cpu_clk_speed_func. + */ +int kbase_get_vexpress_cpu_clock_speed(u32 *cpu_clock); + +#endif /* _KBASE_CPU_VEXPRESS_H_ */ diff --git a/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h new file mode 100644 index 00000000000..5fa9b39c4bc --- /dev/null +++ b/drivers/gpu/arm/midgard/platform_dummy/mali_ukk_os.h @@ -0,0 +1,53 @@ +/* + * + * (C) COPYRIGHT 2010, 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + + + +/** + * @file mali_ukk_os.h + * Types and definitions that are common for Linux OSs for the kernel side of the + * User-Kernel interface. + */ + +#ifndef _UKK_OS_H_ /* Linux version */ +#define _UKK_OS_H_ + +#include <linux/fs.h> + +/** + * @addtogroup uk_api User-Kernel Interface API + * @{ + */ + +/** + * @addtogroup uk_api_kernel UKK (Kernel side) + * @{ + */ + +/** + * Internal OS specific data structure associated with each UKK session. Part + * of a ukk_session object. + */ +typedef struct ukkp_session { + int dummy; /**< No internal OS specific data at this time */ +} ukkp_session; + +/** @} end group uk_api_kernel */ + +/** @} end group uk_api */ + +#endif /* _UKK_OS_H__ */ diff --git a/drivers/gpu/drm/pl111/Kbuild b/drivers/gpu/drm/pl111/Kbuild new file mode 100644 index 00000000000..f10d58c70df --- /dev/null +++ b/drivers/gpu/drm/pl111/Kbuild @@ -0,0 +1,28 @@ +# +# (C) COPYRIGHT ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + +pl111_drm-y += pl111_drm_device.o \ + pl111_drm_connector.o \ + pl111_drm_crtc.o \ + pl111_drm_cursor.o \ + pl111_drm_dma_buf.o \ + pl111_drm_encoder.o \ + pl111_drm_fb.o \ + pl111_drm_gem.o \ + pl111_drm_pl111.o \ + pl111_drm_platform.o \ + pl111_drm_suspend.o \ + pl111_drm_vma.o + +obj-$(CONFIG_DRM_PL111) += pl111_drm.o diff --git a/drivers/gpu/drm/pl111/Kconfig b/drivers/gpu/drm/pl111/Kconfig new file mode 100644 index 00000000000..60b465c56c5 --- /dev/null +++ b/drivers/gpu/drm/pl111/Kconfig @@ -0,0 +1,23 @@ +# +# (C) COPYRIGHT ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + +config DRM_PL111 + tristate "DRM Support for PL111 CLCD Controller" + depends on DRM + select DRM_KMS_HELPER + select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE + help + Choose this option for DRM support for the PL111 CLCD controller. + If M is selected the module will be called pl111_drm. + diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile new file mode 100644 index 00000000000..2869f587266 --- /dev/null +++ b/drivers/gpu/drm/pl111/Makefile @@ -0,0 +1,32 @@ +# +# (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU licence. +# +# A copy of the licence is included with the program, and can also be obtained +# from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301, USA. +# +# + + +# linux build system bootstrap for out-of-tree module + +# default to building for the host +ARCH ?= $(shell uname -m) + +ifeq ($(KDIR),) +$(error Must specify KDIR to point to the kernel to target)) +endif + +all: pl111_drm + +pl111_drm: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) EXTRA_CFLAGS="-I$(CURDIR)/../../../include" CONFIG_DMA_SHARED_BUFFER_USES_KDS=y CONFIG_DRM_PL111=m + +clean: + $(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean + diff --git a/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/drivers/gpu/drm/pl111/pl111_clcd_ext.h new file mode 100644 index 00000000000..d3e0086ff02 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_clcd_ext.h @@ -0,0 +1,95 @@ +/* + * + * (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + +/** + * pl111_clcd_ext.h + * Extended CLCD register definitions + */ + +#ifndef PL111_CLCD_EXT_H_ +#define PL111_CLCD_EXT_H_ + +/* + * PL111 cursor register definitions not defined in the kernel's clcd header. + * + * TODO MIDEGL-1718: move to include/linux/amba/clcd.h + */ + +#define CLCD_CRSR_IMAGE 0x00000800 +#define CLCD_CRSR_IMAGE_MAX_WORDS 256 +#define CLCD_CRSR_IMAGE_WORDS_PER_LINE 4 +#define CLCD_CRSR_IMAGE_PIXELS_PER_WORD 16 + +#define CLCD_CRSR_LBBP_COLOR_MASK 0x00000003 +#define CLCD_CRSR_LBBP_BACKGROUND 0x0 +#define CLCD_CRSR_LBBP_FOREGROUND 0x1 +#define CLCD_CRSR_LBBP_TRANSPARENT 0x2 +#define CLCD_CRSR_LBBP_INVERSE 0x3 + + +#define CLCD_CRSR_CTRL 0x00000c00 +#define CLCD_CRSR_CONFIG 0x00000c04 +#define CLCD_CRSR_PALETTE_0 0x00000c08 +#define CLCD_CRSR_PALETTE_1 0x00000c0c +#define CLCD_CRSR_XY 0x00000c10 +#define CLCD_CRSR_CLIP 0x00000c14 +#define CLCD_CRSR_IMSC 0x00000c20 +#define CLCD_CRSR_ICR 0x00000c24 +#define CLCD_CRSR_RIS 0x00000c28 +#define CLCD_MIS 0x00000c2c + +#define CRSR_CTRL_CRSR_ON (1 << 0) +#define CRSR_CTRL_CRSR_MAX 3 +#define CRSR_CTRL_CRSR_NUM_SHIFT 4 +#define CRSR_CTRL_CRSR_NUM_MASK \ + (CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT) +#define CRSR_CTRL_CURSOR_0 0 +#define CRSR_CTRL_CURSOR_1 1 +#define CRSR_CTRL_CURSOR_2 2 +#define CRSR_CTRL_CURSOR_3 3 + +#define CRSR_CONFIG_CRSR_SIZE (1 << 0) +#define CRSR_CONFIG_CRSR_FRAME_SYNC (1 << 1) + +#define CRSR_PALETTE_RED_SHIFT 0 +#define CRSR_PALETTE_GREEN_SHIFT 8 +#define CRSR_PALETTE_BLUE_SHIFT 16 + +#define CRSR_PALETTE_RED_MASK 0x000000ff +#define CRSR_PALETTE_GREEN_MASK 0x0000ff00 +#define CRSR_PALETTE_BLUE_MASK 0x00ff0000 +#define CRSR_PALETTE_MASK (~0xff000000) + +#define CRSR_XY_MASK 0x000003ff +#define CRSR_XY_X_SHIFT 0 +#define CRSR_XY_Y_SHIFT 16 + +#define CRSR_XY_X_MASK CRSR_XY_MASK +#define CRSR_XY_Y_MASK (CRSR_XY_MASK << CRSR_XY_Y_SHIFT) + +#define CRSR_CLIP_MASK 0x3f +#define CRSR_CLIP_X_SHIFT 0 +#define CRSR_CLIP_Y_SHIFT 8 + +#define CRSR_CLIP_X_MASK CRSR_CLIP_MASK +#define CRSR_CLIP_Y_MASK (CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT) + +#define CRSR_IMSC_CRSR_IM (1<<0) +#define CRSR_ICR_CRSR_IC (1<<0) +#define CRSR_RIS_CRSR_RIS (1<<0) +#define CRSR_MIS_CRSR_MIS (1<<0) + +#endif /* PL111_CLCD_EXT_H_ */ diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h new file mode 100644 index 00000000000..64d87b60ff6 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm.h @@ -0,0 +1,270 @@ +/* + * + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +#ifndef _PL111_DRM_H_ +#define _PL111_DRM_H_ + +#define DRIVER_AUTHOR "ARM Ltd." +#define DRIVER_NAME "pl111_drm" +#define DRIVER_DESC "DRM module for PL111" +#define DRIVER_LICENCE "GPL" +#define DRIVER_ALIAS "platform:pl111_drm" +#define DRIVER_DATE "20101111" +#define DRIVER_VERSION "0.2" +#define DRIVER_MAJOR 2 +#define DRIVER_MINOR 1 +#define DRIVER_PATCHLEVEL 1 + +/* + * Number of flips allowed in flight at any one time. Any more flips requested + * beyond this value will cause the caller to block until earlier flips have + * completed. + * + * For performance reasons, this must be greater than the number of buffers + * used in the rendering pipeline. Note that the rendering pipeline can contain + * different types of buffer, e.g.: + * - 2 final framebuffers + * - >2 geometry buffers for GPU use-cases + * - >2 vertex buffers for GPU use-cases + * + * For example, a system using 5 geometry buffers could have 5 flips in flight, + * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater. + * + * Whilst there may be more intermediate buffers (such as vertex/geometry) than + * final framebuffers, KDS is used to ensure that GPU rendering waits for the + * next off-screen buffer, so it doesn't overwrite an on-screen buffer and + * produce tearing. + */ + +/* + * Here, we choose a conservative value. A lower value is most likely + * suitable for GPU use-cases. + */ +#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16 + +#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2) + +struct pl111_drm_flip_resource; + +struct pl111_gem_bo_dma { + dma_addr_t fb_dev_addr; + void *fb_cpu_addr; +}; + +struct pl111_gem_bo_shm { + struct page **pages; + dma_addr_t *dma_addrs; +}; + +struct pl111_gem_bo { + struct drm_gem_object gem_object; + u32 type; + union { + struct pl111_gem_bo_dma dma; + struct pl111_gem_bo_shm shm; + } backing_data; + struct sg_table *sgt; +}; + +extern struct pl111_drm_dev_private priv; + +struct pl111_drm_framebuffer { + struct drm_framebuffer fb; + struct pl111_gem_bo *bo; +}; + +struct pl111_drm_flip_resource { +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + /* This is the kds set associated to the dma_buf we want to flip */ + struct kds_resource_set *kds_res_set; +#endif + struct drm_framebuffer *fb; + struct drm_crtc *crtc; + struct list_head link; + bool page_flip; + struct drm_pending_vblank_event *event; +}; + +struct pl111_drm_crtc { + struct drm_crtc crtc; + int crtc_index; + +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + /* This protects "old_kds_res_set" and "displaying_fb" */ + spinlock_t current_displaying_lock; + /* + * When a buffer is displayed its associated kds resource + * will be obtained and stored here. Every time a buffer + * flip is completed this old kds set is released and assigned + * the kds set of the new buffer. + */ + struct kds_resource_set *old_kds_res_set; + /* + * Stores which frame buffer is currently being displayed by + * this CRTC or NULL if nothing is being displayed. It is used + * to tell whether we need to obtain a set of kds resources for + * exported buffer objects. + */ + struct drm_framebuffer *displaying_fb; +#endif + struct drm_display_mode *new_mode; + struct drm_display_mode *current_mode; + int last_bpp; + + /* + * This spinlock protects "update_queue", "current_update_res" + * and calls to do_flip_to_res() which updates the CLCD base + * registers. + */ + spinlock_t base_update_lock; + /* + * The resource that caused a base address update. Only one can be + * pending, hence it's != NULL if there's a pending update + */ + struct pl111_drm_flip_resource *current_update_res; + /* Queue of things waiting to update the base address */ + struct list_head update_queue; + + void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res, + struct drm_framebuffer *fb); +}; + +struct pl111_drm_connector { + struct drm_connector connector; +}; + +struct pl111_drm_encoder { + struct drm_encoder encoder; +}; + +struct pl111_drm_dev_private { + struct pl111_drm_crtc *pl111_crtc; + + struct amba_device *amba_dev; + unsigned long mmio_start; + __u32 mmio_len; + void *regs; + struct clk *clk; +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + struct kds_callback kds_cb; + struct kds_callback kds_obtain_current_cb; +#endif + /* + * Number of flips that were started in show_framebuffer_on_crtc(), + * but haven't completed yet - because we do deferred flipping + */ + atomic_t nr_flips_in_flight; + wait_queue_head_t wait_for_flips; + + /* + * Used to prevent race between pl111_dma_buf_release and + * drm_gem_prime_handle_to_fd + */ + struct mutex export_dma_buf_lock; + + uint32_t number_crtcs; + + /* Cache for flip resources used to avoid kmalloc on each page flip */ + struct kmem_cache *page_flip_slab; +}; + +enum pl111_cursor_size { + CURSOR_32X32, + CURSOR_64X64 +}; + +enum pl111_cursor_sync { + CURSOR_SYNC_NONE, + CURSOR_SYNC_VSYNC +}; + + +/** + * Buffer allocation function which is more flexible than dumb_create(), + * it allows passing driver specific flags to control the kind of buffer + * to be allocated. + */ +int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); + +/****** TODO MIDEGL-1718: this should be moved to uapi/include/drm/pl111_drm.h ********/ + +/* + * Parameters for different buffer objects: + * bit [0]: backing storage + * (0 -> SHM) + * (1 -> DMA) + * bit [2:1]: kind of mapping + * (0x0 -> uncached) + * (0x1 -> write combine) + * (0x2 -> cached) + */ +#define PL111_BOT_MASK (0x7) +#define PL111_BOT_SHM (0x0 << 0) +#define PL111_BOT_DMA (0x1 << 0) +#define PL111_BOT_UNCACHED (0x0 << 1) +#define PL111_BOT_WC (0x1 << 1) +#define PL111_BOT_CACHED (0x2 << 1) + +/** + * User-desired buffer creation information structure. + * + * @size: user-desired memory allocation size. + * - this size value would be page-aligned internally. + * @flags: user request for setting memory type or cache attributes as a bit op + * - PL111_BOT_DMA / PL111_BOT_SHM + * - PL111_BOT_UNCACHED / PL111_BOT_WC / PL111_BOT_CACHED + * @handle: returned a handle to created gem object. + * - this handle will be set by gem module of kernel side. + */ +struct drm_pl111_gem_create { + uint32_t height; + uint32_t width; + uint32_t bpp; + uint32_t flags; + /* handle, pitch, size will be returned */ + uint32_t handle; + uint32_t pitch; + uint64_t size; +}; + +#define DRM_PL111_GEM_CREATE 0x00 + +#define DRM_IOCTL_PL111_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_PL111_GEM_CREATE, struct drm_pl111_gem_create) +/****************************************************************************/ + +#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \ + (container_of(drm_fb, struct pl111_drm_framebuffer, fb)) + +#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \ + (container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo) + +#define PL111_BO_FROM_GEM(gem_obj) \ + container_of(gem_obj, struct pl111_gem_bo, gem_object) + +#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc) + +#define PL111_ENCODER_FROM_ENCODER(x) \ + container_of(x, struct pl111_drm_encoder, encoder) + +#define PL111_CONNECTOR_FROM_CONNECTOR(x) \ + container_of(x, struct pl111_drm_connector, connector) + +#include "pl111_drm_funcs.h" + +#endif /* _PL111_DRM_H_ */ diff --git a/drivers/gpu/drm/pl111/pl111_drm_connector.c b/drivers/gpu/drm/pl111/pl111_drm_connector.c new file mode 100644 index 00000000000..c7c3a226a86 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_connector.c @@ -0,0 +1,170 @@ +/* + * + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_connector.c + * Implementation of the connector functions for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> + +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> + +#include "pl111_drm.h" + + +static struct { + int w, h, type; +} pl111_drm_modes[] = { + { 640, 480, DRM_MODE_TYPE_PREFERRED}, + { 800, 600, 0}, + {1024, 768, 0}, + { -1, -1, -1} +}; + +void pl111_connector_destroy(struct drm_connector *connector) +{ + struct pl111_drm_connector *pl111_connector = + PL111_CONNECTOR_FROM_CONNECTOR(connector); + + DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector); + + drm_sysfs_connector_remove(connector); + drm_connector_cleanup(connector); + kfree(pl111_connector); +} + +enum drm_connector_status pl111_connector_detect(struct drm_connector + *connector, bool force) +{ + DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector); + return connector_status_connected; +} + +void pl111_connector_dpms(struct drm_connector *connector, int mode) +{ + DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector); +} + +struct drm_encoder * +pl111_connector_helper_best_encoder(struct drm_connector *connector) +{ + DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector); + + if (connector->encoder != NULL) { + return connector->encoder; /* Return attached encoder */ + } else { + /* + * If there is no attached encoder we choose the best candidate + * from the list. + * For PL111 there is only one encoder so we return the first + * one we find. + * Other h/w would require a suitable criterion below. + */ + struct drm_encoder *encoder = NULL; + struct drm_device *dev = connector->dev; + + list_for_each_entry(encoder, &dev->mode_config.encoder_list, + head) { + if (1) { /* criterion ? */ + break; + } + } + return encoder; /* return best candidate encoder */ + } +} + +int pl111_connector_helper_get_modes(struct drm_connector *connector) +{ + int i = 0; + int count = 0; + + DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector); + + while (pl111_drm_modes[i].w != -1) { + struct drm_display_mode *mode = + drm_mode_find_dmt(connector->dev, + pl111_drm_modes[i].w, + pl111_drm_modes[i].h, + 60 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + , false +#endif + ); + + if (mode != NULL) { + mode->type |= pl111_drm_modes[i].type; + drm_mode_probed_add(connector, mode); + count++; + } + + i++; + } + + DRM_DEBUG_KMS("found %d modes\n", count); + + return count; +} + +int pl111_connector_helper_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector); + return MODE_OK; +} + +const struct drm_connector_funcs connector_funcs = { + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = pl111_connector_destroy, + .detect = pl111_connector_detect, + .dpms = pl111_connector_dpms, +}; + +const struct drm_connector_helper_funcs connector_helper_funcs = { + .get_modes = pl111_connector_helper_get_modes, + .mode_valid = pl111_connector_helper_mode_valid, + .best_encoder = pl111_connector_helper_best_encoder, +}; + +struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev) +{ + struct pl111_drm_connector *pl111_connector; + + pl111_connector = kzalloc(sizeof(struct pl111_drm_connector), + GFP_KERNEL); + + if (pl111_connector == NULL) { + pr_err("Failed to allocated pl111_drm_connector\n"); + return NULL; + } + + drm_connector_init(dev, &pl111_connector->connector, &connector_funcs, + DRM_MODE_CONNECTOR_DVII); + + drm_connector_helper_add(&pl111_connector->connector, + &connector_helper_funcs); + + drm_sysfs_connector_add(&pl111_connector->connector); + + return pl111_connector; +} + diff --git a/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/drivers/gpu/drm/pl111/pl111_drm_crtc.c new file mode 100644 index 00000000000..ede07ff9fc0 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_crtc.c @@ -0,0 +1,449 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_crtc.c + * Implementation of the CRTC functions for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> + +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> + +#include "pl111_drm.h" + +static int pl111_crtc_num; + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0)) +#define export_dma_buf export_dma_buf +#else +#define export_dma_buf dma_buf +#endif + +void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc) +{ + struct drm_device *dev = pl111_crtc->crtc.dev; + struct pl111_drm_flip_resource *old_flip_res; + struct pl111_gem_bo *bo; + unsigned long irq_flags; + int flips_in_flight; +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + unsigned long flags; +#endif + + spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags); + + /* + * Cache the flip resource that caused the IRQ since it will be + * dispatched later. Early return if the IRQ isn't associated to + * a base register update. + * + * TODO MIDBASE-2790: disable IRQs when a flip is not pending. + */ + old_flip_res = pl111_crtc->current_update_res; + if (!old_flip_res) { + spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags); + return; + } + pl111_crtc->current_update_res = NULL; + + /* Prepare the next flip (if any) of the queue as soon as possible. */ + if (!list_empty(&pl111_crtc->update_queue)) { + struct pl111_drm_flip_resource *flip_res; + /* Remove the head of the list */ + flip_res = list_first_entry(&pl111_crtc->update_queue, + struct pl111_drm_flip_resource, link); + list_del(&flip_res->link); + do_flip_to_res(flip_res); + /* + * current_update_res will be set, so guarentees that + * another flip_res coming in gets queued instead of + * handled immediately + */ + } + spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags); + + /* Finalize properly the flip that caused the IRQ */ + DRM_DEBUG_KMS("DRM Finalizing old_flip_res=%p\n", old_flip_res); + + bo = PL111_BO_FROM_FRAMEBUFFER(old_flip_res->fb); +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags); + release_kds_resource_and_display(old_flip_res); + spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags); +#endif + /* Release DMA buffer on this flip */ + + if (bo->gem_object.export_dma_buf != NULL) + dma_buf_put(bo->gem_object.export_dma_buf); + + drm_handle_vblank(dev, pl111_crtc->crtc_index); + + /* Wake up any processes waiting for page flip event */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 8, 0)) + if (old_flip_res->event) { + spin_lock_bh(&dev->event_lock); + drm_send_vblank_event(dev, pl111_crtc->crtc_index, + old_flip_res->event); + spin_unlock_bh(&dev->event_lock); + } +#else + if (old_flip_res->event) { + struct drm_pending_vblank_event *e = old_flip_res->event; + struct timeval now; + unsigned int seq; + + DRM_DEBUG_KMS("%s: wake up page flip event (%p)\n", __func__, + old_flip_res->event); + + spin_lock_bh(&dev->event_lock); + seq = drm_vblank_count_and_time(dev, pl111_crtc->crtc_index, + &now); + e->pipe = pl111_crtc->crtc_index; + e->event.sequence = seq; + e->event.tv_sec = now.tv_sec; + e->event.tv_usec = now.tv_usec; + + list_add_tail(&e->base.link, + &e->base.file_priv->event_list); + + wake_up_interruptible(&e->base.file_priv->event_wait); + spin_unlock_bh(&dev->event_lock); + } +#endif + + drm_vblank_put(dev, pl111_crtc->crtc_index); + + /* + * workqueue.c:process_one_work(): + * "It is permissible to free the struct work_struct from + * inside the function that is called from it" + */ + kmem_cache_free(priv.page_flip_slab, old_flip_res); + + flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight); + if (flips_in_flight == 0 || + flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1)) + wake_up(&priv.wait_for_flips); + + DRM_DEBUG_KMS("DRM release flip_res=%p\n", old_flip_res); +} + +void show_framebuffer_on_crtc_cb(void *cb1, void *cb2) +{ + struct pl111_drm_flip_resource *flip_res = cb1; + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc); + + pl111_crtc->show_framebuffer_cb(cb1, cb2); +} + +int show_framebuffer_on_crtc(struct drm_crtc *crtc, + struct drm_framebuffer *fb, bool page_flip, + struct drm_pending_vblank_event *event) +{ + struct pl111_gem_bo *bo; + struct pl111_drm_flip_resource *flip_res; + int flips_in_flight; + int old_flips_in_flight; + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0)) + crtc->fb = fb; +#else + crtc->primary->fb = fb; +#endif + + bo = PL111_BO_FROM_FRAMEBUFFER(fb); + if (bo == NULL) { + DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n"); + return -EINVAL; + } + + /* If this is a full modeset, wait for all outstanding flips to complete + * before continuing. This avoids unnecessary complication from being + * able to queue up multiple modesets and queues of mixed modesets and + * page flips. + * + * Modesets should be uncommon and will not be performant anyway, so + * making them synchronous should have negligible performance impact. + */ + if (!page_flip) { + int ret = wait_event_killable(priv.wait_for_flips, + atomic_read(&priv.nr_flips_in_flight) == 0); + if (ret) + return ret; + } + + /* + * There can be more 'early display' flips in flight than there are + * buffers, and there is (currently) no explicit bound on the number of + * flips. Hence, we need a new allocation for each one. + * + * Note: this could be optimized down if we knew a bound on the flips, + * since an application can only have so many buffers in flight to be + * useful/not hog all the memory + */ + flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL); + if (flip_res == NULL) { + pr_err("kmem_cache_alloc failed to alloc - flip ignored\n"); + return -ENOMEM; + } + + /* + * increment flips in flight, whilst blocking when we reach + * NR_FLIPS_IN_FLIGHT_THRESHOLD + */ + do { + /* + * Note: use of assign-and-then-compare in the condition to set + * flips_in_flight + */ + int ret = wait_event_killable(priv.wait_for_flips, + (flips_in_flight = + atomic_read(&priv.nr_flips_in_flight)) + < NR_FLIPS_IN_FLIGHT_THRESHOLD); + if (ret != 0) { + kmem_cache_free(priv.page_flip_slab, flip_res); + return ret; + } + + old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight, + flips_in_flight, flips_in_flight + 1); + } while (old_flips_in_flight != flips_in_flight); + + flip_res->fb = fb; + flip_res->crtc = crtc; + flip_res->page_flip = page_flip; + flip_res->event = event; + INIT_LIST_HEAD(&flip_res->link); + DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res); +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + if (bo->gem_object.export_dma_buf != NULL) { + struct dma_buf *buf = bo->gem_object.export_dma_buf; + unsigned long shared[1] = { 0 }; + struct kds_resource *resource_list[1] = { + get_dma_buf_kds_resource(buf) }; + int err; + + get_dma_buf(buf); + DRM_DEBUG_KMS("Got dma_buf %p\n", buf); + + /* Wait for the KDS resource associated with this buffer */ + err = kds_async_waitall(&flip_res->kds_res_set, + &priv.kds_cb, flip_res, fb, 1, shared, + resource_list); + BUG_ON(err); + } else { + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc); + + DRM_DEBUG_KMS("No dma_buf for this flip\n"); + + /* No dma-buf attached so just call the callback directly */ + flip_res->kds_res_set = NULL; + pl111_crtc->show_framebuffer_cb(flip_res, fb); + } +#else + if (bo->gem_object.export_dma_buf != NULL) { + struct dma_buf *buf = bo->gem_object.export_dma_buf; + + get_dma_buf(buf); + DRM_DEBUG_KMS("Got dma_buf %p\n", buf); + } else { + DRM_DEBUG_KMS("No dma_buf for this flip\n"); + } + + /* No dma-buf attached to this so just call the callback directly */ + { + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc); + pl111_crtc->show_framebuffer_cb(flip_res, fb); + } +#endif + + /* For the same reasons as the wait at the start of this function, + * wait for the modeset to complete before continuing. + */ + if (!page_flip) { + int ret = wait_event_killable(priv.wait_for_flips, + flips_in_flight == 0); + if (ret) + return ret; + } + + return 0; +} + +int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0)) + struct drm_pending_vblank_event *event) +#else + struct drm_pending_vblank_event *event, + uint32_t flags) +#endif +{ + DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n", + __func__, crtc, fb, event); + return show_framebuffer_on_crtc(crtc, fb, true, event); +} + +int pl111_crtc_helper_mode_set(struct drm_crtc *crtc, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode, + int x, int y, struct drm_framebuffer *old_fb) +{ + int ret; + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc); + struct drm_display_mode *duplicated_mode; + + DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n", + adjusted_mode->hdisplay, adjusted_mode->vdisplay, +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0)) + crtc->fb->bits_per_pixel); +#else + crtc->primary->fb->bits_per_pixel); +#endif + + duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode); + if (!duplicated_mode) + return -ENOMEM; + + pl111_crtc->new_mode = duplicated_mode; +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0)) + ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL); +#else + ret = show_framebuffer_on_crtc(crtc, crtc->primary->fb, false, NULL); +#endif + if (ret != 0) { + pl111_crtc->new_mode = pl111_crtc->current_mode; + drm_mode_destroy(crtc->dev, duplicated_mode); + } + + return ret; +} + +void pl111_crtc_helper_prepare(struct drm_crtc *crtc) +{ + DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc); +} + +void pl111_crtc_helper_commit(struct drm_crtc *crtc) +{ + DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc); +} + +bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc, +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)) + const struct drm_display_mode *mode, +#else + struct drm_display_mode *mode, +#endif + struct drm_display_mode *adjusted_mode) +{ + DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc); + +#ifdef CONFIG_ARCH_VEXPRESS + /* + * 1024x768 with more than 16 bits per pixel may not work + * correctly on Versatile Express due to bandwidth issues + */ + if (mode->hdisplay == 1024 && mode->vdisplay == 768 && +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 14, 0)) + crtc->fb->bits_per_pixel > 16) { +#else + crtc->primary->fb->bits_per_pixel > 16) { +#endif + DRM_INFO("*WARNING* 1024x768 at > 16 bpp may suffer corruption\n"); + } +#endif + + return true; +} + +void pl111_crtc_helper_disable(struct drm_crtc *crtc) +{ + int ret; + + DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc); + + /* don't disable crtc until no flips in flight as irq will be disabled */ + ret = wait_event_killable(priv.wait_for_flips, atomic_read(&priv.nr_flips_in_flight) == 0); + if(ret) { + pr_err("pl111_crtc_helper_disable failed\n"); + return; + } + clcd_disable(crtc); +} + +void pl111_crtc_destroy(struct drm_crtc *crtc) +{ + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc); + + DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc); + + drm_crtc_cleanup(crtc); + kfree(pl111_crtc); +} + +const struct drm_crtc_funcs crtc_funcs = { + .cursor_set = pl111_crtc_cursor_set, + .cursor_move = pl111_crtc_cursor_move, + .set_config = drm_crtc_helper_set_config, + .page_flip = pl111_crtc_page_flip, + .destroy = pl111_crtc_destroy +}; + +const struct drm_crtc_helper_funcs crtc_helper_funcs = { + .mode_set = pl111_crtc_helper_mode_set, + .prepare = pl111_crtc_helper_prepare, + .commit = pl111_crtc_helper_commit, + .mode_fixup = pl111_crtc_helper_mode_fixup, + .disable = pl111_crtc_helper_disable, +}; + +struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev) +{ + struct pl111_drm_crtc *pl111_crtc; + + pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL); + if (pl111_crtc == NULL) { + pr_err("Failed to allocated pl111_drm_crtc\n"); + return NULL; + } + + drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs); + drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs); + + pl111_crtc->crtc_index = pl111_crtc_num; + pl111_crtc_num++; + pl111_crtc->crtc.enabled = 0; + pl111_crtc->last_bpp = 0; + pl111_crtc->current_update_res = NULL; +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + pl111_crtc->displaying_fb = NULL; + pl111_crtc->old_kds_res_set = NULL; + spin_lock_init(&pl111_crtc->current_displaying_lock); +#endif + pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal; + INIT_LIST_HEAD(&pl111_crtc->update_queue); + spin_lock_init(&pl111_crtc->base_update_lock); + + return pl111_crtc; +} + diff --git a/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/drivers/gpu/drm/pl111/pl111_drm_cursor.c new file mode 100644 index 00000000000..4bf20fec246 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_cursor.c @@ -0,0 +1,331 @@ +/* + * + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_cursor.c + * Implementation of cursor functions for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> + +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> +#include "pl111_clcd_ext.h" +#include "pl111_drm.h" + +#define PL111_MAX_CURSOR_WIDTH (64) +#define PL111_MAX_CURSOR_HEIGHT (64) + +#define ARGB_2_LBBP_BINARY_THRESHOLD (1 << 7) +#define ARGB_ALPHA_SHIFT 24 +#define ARGB_ALPHA_MASK (0xff << ARGB_ALPHA_SHIFT) +#define ARGB_RED_SHIFT 16 +#define ARGB_RED_MASK (0xff << ARGB_RED_SHIFT) +#define ARGB_GREEN_SHIFT 8 +#define ARGB_GREEN_MASK (0xff << ARGB_GREEN_SHIFT) +#define ARGB_BLUE_SHIFT 0 +#define ARGB_BLUE_MASK (0xff << ARGB_BLUE_SHIFT) + + +void pl111_set_cursor_size(enum pl111_cursor_size size) +{ + u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG); + + if (size == CURSOR_64X64) + reg_data |= CRSR_CONFIG_CRSR_SIZE; + else + reg_data &= ~CRSR_CONFIG_CRSR_SIZE; + + writel(reg_data, priv.regs + CLCD_CRSR_CONFIG); +} + +void pl111_set_cursor_sync(enum pl111_cursor_sync sync) +{ + u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG); + + if (sync == CURSOR_SYNC_VSYNC) + reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC; + else + reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC; + + writel(reg_data, priv.regs + CLCD_CRSR_CONFIG); +} + +void pl111_set_cursor(u32 cursor) +{ + u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL); + + reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT); + reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT; + + writel(reg_data, priv.regs + CLCD_CRSR_CTRL); +} + +void pl111_set_cursor_enable(bool enable) +{ + u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL); + + if (enable) + reg_data |= CRSR_CTRL_CRSR_ON; + else + reg_data &= ~CRSR_CTRL_CRSR_ON; + + writel(reg_data, priv.regs + CLCD_CRSR_CTRL); +} + +void pl111_set_cursor_position(u32 x, u32 y) +{ + u32 reg_data = (x & CRSR_XY_MASK) | + ((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT); + + writel(reg_data, priv.regs + CLCD_CRSR_XY); +} + +void pl111_set_cursor_clipping(u32 x, u32 y) +{ + u32 reg_data; + + /* + * Do not allow setting clipping values larger than + * the cursor size since the cursor is already fully hidden + * when x,y = PL111_MAX_CURSOR_WIDTH. + */ + if (x > PL111_MAX_CURSOR_WIDTH) + x = PL111_MAX_CURSOR_WIDTH; + if (y > PL111_MAX_CURSOR_WIDTH) + y = PL111_MAX_CURSOR_WIDTH; + + reg_data = (x & CRSR_CLIP_MASK) | + ((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT); + + writel(reg_data, priv.regs + CLCD_CRSR_CLIP); +} + +void pl111_set_cursor_palette(u32 color0, u32 color1) +{ + writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0); + writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1); +} + +void pl111_cursor_enable(void) +{ + pl111_set_cursor_sync(CURSOR_SYNC_VSYNC); + pl111_set_cursor_size(CURSOR_64X64); + pl111_set_cursor_palette(0x0, 0x00ffffff); + pl111_set_cursor_enable(true); +} + +void pl111_cursor_disable(void) +{ + pl111_set_cursor_enable(false); +} + +/* shift required to locate pixel into the correct position in + * a cursor LBBP word, indexed by x mod 16. + */ +static const unsigned char +x_mod_16_to_value_shift[CLCD_CRSR_IMAGE_PIXELS_PER_WORD] = { + 6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24 +}; + +/* Pack the pixel value into its correct position in the buffer as specified + * for LBBP */ +static inline void +set_lbbp_pixel(uint32_t *buffer, unsigned int x, unsigned int y, + uint32_t value) +{ + u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE; + uint32_t shift; + uint32_t data; + + shift = x_mod_16_to_value_shift[x % CLCD_CRSR_IMAGE_PIXELS_PER_WORD]; + + /* Get the word containing this pixel */ + cursor_ram = cursor_ram + (x >> CLCD_CRSR_IMAGE_WORDS_PER_LINE) + (y << 2); + + /* Update pixel in cursor RAM */ + data = readl(cursor_ram); + data &= ~(CLCD_CRSR_LBBP_COLOR_MASK << shift); + data |= value << shift; + writel(data, cursor_ram); +} + +static u32 pl111_argb_to_lbbp(u32 argb_pix) +{ + u32 lbbp_pix = CLCD_CRSR_LBBP_TRANSPARENT; + u32 alpha = (argb_pix & ARGB_ALPHA_MASK) >> ARGB_ALPHA_SHIFT; + u32 red = (argb_pix & ARGB_RED_MASK) >> ARGB_RED_SHIFT; + u32 green = (argb_pix & ARGB_GREEN_MASK) >> ARGB_GREEN_SHIFT; + u32 blue = (argb_pix & ARGB_BLUE_MASK) >> ARGB_BLUE_SHIFT; + + /* + * Converting from 8 pixel transparency to binary transparency + * it's the best we can achieve. + */ + if (alpha & ARGB_2_LBBP_BINARY_THRESHOLD) { + u32 gray, max, min; + + /* + * Convert to gray using the lightness method: + * gray = [max(R,G,B) + min(R,G,B)]/2 + */ + min = min(red, green); + min = min(min, blue); + max = max(red, green); + max = max(max, blue); + gray = (min + max) >> 1; /* divide by 2 */ + /* Apply binary threshold to the gray value calculated */ + if (gray & ARGB_2_LBBP_BINARY_THRESHOLD) + lbbp_pix = CLCD_CRSR_LBBP_FOREGROUND; + else + lbbp_pix = CLCD_CRSR_LBBP_BACKGROUND; + } + + return lbbp_pix; +} + +/* + * The PL111 hardware cursor supports only LBBP which is a 2bpp format but + * the cursor format from userspace is ARGB8888 so we need to convert + * to LBBP here. + */ +static void pl111_set_cursor_image(u32 *data) +{ +#ifdef ARGB_LBBP_CONVERSION_DEBUG + /* Add 1 on width to insert trailing NULL */ + char string_cursor[PL111_MAX_CURSOR_WIDTH + 1]; +#endif /* ARGB_LBBP_CONVERSION_DEBUG */ + unsigned int x; + unsigned int y; + + for (y = 0; y < PL111_MAX_CURSOR_HEIGHT; y++) { + for (x = 0; x < PL111_MAX_CURSOR_WIDTH; x++) { + u32 value = pl111_argb_to_lbbp(*data); + +#ifdef ARGB_LBBP_CONVERSION_DEBUG + if (value == CLCD_CRSR_LBBP_TRANSPARENT) + string_cursor[x] = 'T'; + else if (value == CLCD_CRSR_LBBP_FOREGROUND) + string_cursor[x] = 'F'; + else if (value == CLCD_CRSR_LBBP_INVERSE) + string_cursor[x] = 'I'; + else + string_cursor[x] = 'B'; + +#endif /* ARGB_LBBP_CONVERSION_DEBUG */ + set_lbbp_pixel(data, x, y, value); + ++data; + } +#ifdef ARGB_LBBP_CONVERSION_DEBUG + string_cursor[PL111_MAX_CURSOR_WIDTH] = '\0'; + DRM_INFO("%s\n", string_cursor); +#endif /* ARGB_LBBP_CONVERSION_DEBUG */ + } +} + +int pl111_crtc_cursor_set(struct drm_crtc *crtc, + struct drm_file *file_priv, + uint32_t handle, + uint32_t width, + uint32_t height) +{ + struct drm_gem_object *obj; + struct pl111_gem_bo *bo; + + DRM_DEBUG_KMS("handle = %u, width = %u, height = %u\n", + handle, width, height); + + if (!handle) { + pl111_cursor_disable(); + return 0; + } + + if ((width != PL111_MAX_CURSOR_WIDTH) || + (height != PL111_MAX_CURSOR_HEIGHT)) + return -EINVAL; + + obj = drm_gem_object_lookup(crtc->dev, file_priv, handle); + if (!obj) { + DRM_ERROR("Cannot find cursor object for handle = %d\n", + handle); + return -ENOENT; + } + + /* + * We expect a PL111_MAX_CURSOR_WIDTH x PL111_MAX_CURSOR_HEIGHT + * ARGB888 buffer object in the input. + * + */ + if (obj->size < (PL111_MAX_CURSOR_WIDTH * PL111_MAX_CURSOR_HEIGHT * 4)) { + DRM_ERROR("Cannot set cursor with an obj size = %d\n", + obj->size); + drm_gem_object_unreference_unlocked(obj); + return -EINVAL; + } + + bo = PL111_BO_FROM_GEM(obj); + if (!(bo->type & PL111_BOT_DMA)) { + DRM_ERROR("Tried to set cursor with non DMA backed obj = %p\n", + obj); + drm_gem_object_unreference_unlocked(obj); + return -EINVAL; + } + + pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr); + + /* + * Since we copy the contents of the buffer to the HW cursor internal + * memory this GEM object is not needed anymore. + */ + drm_gem_object_unreference_unlocked(obj); + + pl111_cursor_enable(); + + return 0; +} + +int pl111_crtc_cursor_move(struct drm_crtc *crtc, + int x, int y) +{ + int x_clip = 0; + int y_clip = 0; + + DRM_DEBUG("x %d y %d\n", x, y); + + /* + * The cursor image is clipped automatically at the screen limits when + * it extends beyond the screen image to the right or bottom but + * we must clip it using pl111 HW features for negative values. + */ + if (x < 0) { + x_clip = -x; + x = 0; + } + if (y < 0) { + y_clip = -y; + y = 0; + } + + pl111_set_cursor_clipping(x_clip, y_clip); + pl111_set_cursor_position(x, y); + + return 0; +} diff --git a/drivers/gpu/drm/pl111/pl111_drm_device.c b/drivers/gpu/drm/pl111/pl111_drm_device.c new file mode 100644 index 00000000000..af92498784f --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_device.c @@ -0,0 +1,336 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_device.c + * Implementation of the Linux device driver entrypoints for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> +#include <linux/slab.h> + +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> + +#include "pl111_drm.h" + +struct pl111_drm_dev_private priv; + +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS +static void initial_kds_obtained(void *cb1, void *cb2) +{ + wait_queue_head_t *wait = (wait_queue_head_t *) cb1; + bool *cb_has_called = (bool *) cb2; + + *cb_has_called = true; + wake_up(wait); +} + +/* Must be called from within current_displaying_lock spinlock */ +void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res) +{ + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc); + pl111_crtc->displaying_fb = flip_res->fb; + + /* Release the previous buffer */ + if (pl111_crtc->old_kds_res_set != NULL) { + /* + * Can flip to the same buffer, but must not release the current + * resource set + */ + BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set); + kds_resource_set_release(&pl111_crtc->old_kds_res_set); + } + /* Record the current buffer, to release on the next buffer flip */ + pl111_crtc->old_kds_res_set = flip_res->kds_res_set; +} +#endif + +void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv) +{ + DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev); +} + +void pl111_drm_lastclose(struct drm_device *dev) +{ + DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev); +} + +/* + * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank + * and disable_vblank are just no op callbacks. + */ +static int pl111_enable_vblank(struct drm_device *dev, int crtc) +{ + DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc); + return 0; +} + +static void pl111_disable_vblank(struct drm_device *dev, int crtc) +{ + DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc); +} + +struct drm_mode_config_funcs mode_config_funcs = { + .fb_create = pl111_fb_create, +}; + +static int pl111_modeset_init(struct drm_device *dev) +{ + struct drm_mode_config *mode_config; + struct pl111_drm_dev_private *priv = dev->dev_private; + struct pl111_drm_connector *pl111_connector; + struct pl111_drm_encoder *pl111_encoder; + int ret = 0; + + if (priv == NULL) + return -EINVAL; + + drm_mode_config_init(dev); + mode_config = &dev->mode_config; + mode_config->funcs = &mode_config_funcs; + mode_config->min_width = 1; + mode_config->max_width = 1024; + mode_config->min_height = 1; + mode_config->max_height = 768; + + priv->pl111_crtc = pl111_crtc_create(dev); + if (priv->pl111_crtc == NULL) { + pr_err("Failed to create pl111_drm_crtc\n"); + ret = -ENOMEM; + goto out_config; + } + + priv->number_crtcs = 1; + + pl111_connector = pl111_connector_create(dev); + if (pl111_connector == NULL) { + pr_err("Failed to create pl111_drm_connector\n"); + ret = -ENOMEM; + goto out_config; + } + + pl111_encoder = pl111_encoder_create(dev, 1); + if (pl111_encoder == NULL) { + pr_err("Failed to create pl111_drm_encoder\n"); + ret = -ENOMEM; + goto out_config; + } + + ret = drm_mode_connector_attach_encoder(&pl111_connector->connector, + &pl111_encoder->encoder); + if (ret != 0) { + DRM_ERROR("Failed to attach encoder\n"); + goto out_config; + } + + pl111_connector->connector.encoder = &pl111_encoder->encoder; + + goto finish; + +out_config: + drm_mode_config_cleanup(dev); +finish: + DRM_DEBUG("%s returned %d\n", __func__, ret); + return ret; +} + +static void pl111_modeset_fini(struct drm_device *dev) +{ + drm_mode_config_cleanup(dev); +} + +static int pl111_drm_load(struct drm_device *dev, unsigned long chipset) +{ + int ret = 0; + + pr_info("DRM %s\n", __func__); + + mutex_init(&priv.export_dma_buf_lock); + atomic_set(&priv.nr_flips_in_flight, 0); + init_waitqueue_head(&priv.wait_for_flips); +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb); + if (ret != 0) { + pr_err("Failed to initialise KDS callback\n"); + goto finish; + } + + ret = kds_callback_init(&priv.kds_obtain_current_cb, 1, + initial_kds_obtained); + if (ret != 0) { + pr_err("Failed to init KDS obtain callback\n"); + kds_callback_term(&priv.kds_cb); + goto finish; + } +#endif + + /* Create a cache for page flips */ + priv.page_flip_slab = kmem_cache_create("page flip slab", + sizeof(struct pl111_drm_flip_resource), 0, 0, NULL); + if (priv.page_flip_slab == NULL) { + DRM_ERROR("Failed to create slab\n"); + ret = -ENOMEM; + goto out_kds_callbacks; + } + + dev->dev_private = &priv; + + ret = pl111_modeset_init(dev); + if (ret != 0) { + pr_err("Failed to init modeset\n"); + goto out_slab; + } + + ret = pl111_device_init(dev); + if (ret != 0) { + DRM_ERROR("Failed to init MMIO and IRQ\n"); + goto out_modeset; + } + + ret = drm_vblank_init(dev, 1); + if (ret != 0) { + DRM_ERROR("Failed to init vblank\n"); + goto out_vblank; + } + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(3, 13, 0)) + platform_set_drvdata(dev->platformdev, dev); +#endif + + goto finish; + +out_vblank: + pl111_device_fini(dev); +out_modeset: + pl111_modeset_fini(dev); +out_slab: + kmem_cache_destroy(priv.page_flip_slab); +out_kds_callbacks: +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + kds_callback_term(&priv.kds_obtain_current_cb); + kds_callback_term(&priv.kds_cb); +#endif +finish: + DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret); + return ret; +} + +static int pl111_drm_unload(struct drm_device *dev) +{ + pr_info("DRM %s\n", __func__); + + kmem_cache_destroy(priv.page_flip_slab); + + drm_vblank_cleanup(dev); + pl111_modeset_fini(dev); + pl111_device_fini(dev); + +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + kds_callback_term(&priv.kds_obtain_current_cb); + kds_callback_term(&priv.kds_cb); +#endif + return 0; +} + +static struct vm_operations_struct pl111_gem_vm_ops = { + .fault = pl111_gem_fault, +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +#else + .open = pl111_gem_vm_open, + .close = pl111_gem_vm_close, +#endif +}; + +static const struct file_operations drm_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .mmap = pl111_gem_mmap, + .poll = drm_poll, + .read = drm_read, +}; + +static struct drm_ioctl_desc pl111_ioctls[] = { + DRM_IOCTL_DEF_DRV(PL111_GEM_CREATE, pl111_drm_gem_create_ioctl, + DRM_CONTROL_ALLOW | DRM_UNLOCKED), +}; + +static struct drm_driver driver = { + .driver_features = + DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME, + .load = pl111_drm_load, + .unload = pl111_drm_unload, + .context_dtor = NULL, + .preclose = pl111_drm_preclose, + .lastclose = pl111_drm_lastclose, + .suspend = pl111_drm_suspend, + .resume = pl111_drm_resume, + .get_vblank_counter = drm_vblank_count, + .enable_vblank = pl111_enable_vblank, + .disable_vblank = pl111_disable_vblank, + .ioctls = pl111_ioctls, + .fops = &drm_fops, + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, + .dumb_create = pl111_dumb_create, + .dumb_destroy = pl111_dumb_destroy, + .dumb_map_offset = pl111_dumb_map_offset, + .gem_free_object = pl111_gem_free_object, + .gem_vm_ops = &pl111_gem_vm_ops, + .prime_handle_to_fd = &pl111_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_export = &pl111_gem_prime_export, + .gem_prime_import = &pl111_gem_prime_import, +}; + +int pl111_drm_init(struct platform_device *dev) +{ + int ret; + pr_info("DRM %s\n", __func__); + pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n", + DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR); + driver.num_ioctls = ARRAY_SIZE(pl111_ioctls); + ret = 0; +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 15, 0)) + driver.kdriver.platform_device = dev; +#endif + return drm_platform_init(&driver, dev); + +} + +void pl111_drm_exit(struct platform_device *dev) +{ + pr_info("DRM %s\n", __func__); + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0)) + drm_platform_exit(&driver, dev); +#else + drm_put_dev(platform_get_drvdata(dev)); +#endif +} diff --git a/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c new file mode 100644 index 00000000000..1131f46b27d --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c @@ -0,0 +1,625 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_dma_buf.c + * Implementation of the dma_buf functions for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> + +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> + +#include "pl111_drm.h" + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0)) +#define export_dma_buf export_dma_buf +#else +#define export_dma_buf dma_buf +#endif + +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS +static void obtain_kds_if_currently_displayed(struct drm_device *dev, + struct pl111_gem_bo *bo, + struct dma_buf *dma_buf) +{ + unsigned long shared[1] = { 0 }; + struct kds_resource *resource_list[1]; + struct kds_resource_set *kds_res_set; + struct drm_crtc *crtc; + bool cb_has_called = false; + unsigned long flags; + int err; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); + + DRM_DEBUG_KMS("Obtaining initial KDS res for bo:%p dma_buf:%p\n", + bo, dma_buf); + + resource_list[0] = get_dma_buf_kds_resource(dma_buf); + get_dma_buf(dma_buf); + + /* + * Can't use kds_waitall(), because kbase will be let through due to + * locked ignore' + */ + err = kds_async_waitall(&kds_res_set, + &priv.kds_obtain_current_cb, &wake, + &cb_has_called, 1, shared, resource_list); + BUG_ON(err); + wait_event(wake, cb_has_called == true); + + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc); + spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags); + if (pl111_crtc->displaying_fb) { + struct pl111_drm_framebuffer *pl111_fb; + struct drm_framebuffer *fb = pl111_crtc->displaying_fb; + + pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb); + + if (pl111_fb->bo == bo) { + DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo); + DRM_DEBUG_KMS(" is being displayed, keeping\n"); + /* There shouldn't be a previous buffer to release */ + BUG_ON(pl111_crtc->old_kds_res_set); + + if (kds_res_set == NULL) { + err = kds_async_waitall(&kds_res_set, + &priv.kds_obtain_current_cb, + &wake, &cb_has_called, + 1, shared, resource_list); + BUG_ON(err); + wait_event(wake, cb_has_called == true); + } + + /* Current buffer will need releasing on next flip */ + pl111_crtc->old_kds_res_set = kds_res_set; + + /* + * Clear kds_res_set, so a new kds_res_set is allocated + * for additional CRTCs + */ + kds_res_set = NULL; + } + } + spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags); + } + + /* kds_res_set will be NULL here if any CRTCs are displaying fb */ + if (kds_res_set != NULL) { + DRM_DEBUG_KMS("Initial KDS resource for bo %p", bo); + DRM_DEBUG_KMS(" not being displayed, discarding\n"); + /* They're not being displayed, release them */ + kds_resource_set_release(&kds_res_set); + } + + dma_buf_put(dma_buf); +} +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)) + +static int pl111_dma_buf_mmap(struct dma_buf *buffer, + struct vm_area_struct *vma) +{ + struct drm_gem_object *obj = buffer->priv; + struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj); + struct drm_device *dev = obj->dev; + int ret; + + DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer); + + mutex_lock(&dev->struct_mutex); + ret = drm_gem_mmap_obj(obj, obj->size, vma); + mutex_unlock(&dev->struct_mutex); + if (ret) + return ret; + + return pl111_bo_mmap(obj, bo, vma, buffer->size); +} + +#else + +static int pl111_dma_buf_mmap(struct dma_buf *buffer, + struct vm_area_struct *vma) +{ + struct drm_gem_object *obj = buffer->priv; + struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj); + struct drm_device *dev = obj->dev; + + DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer); + + mutex_lock(&dev->struct_mutex); + + /* Check for valid size. */ + if (obj->size < vma->vm_end - vma->vm_start) + return -EINVAL; + + BUG_ON(!dev->driver->gem_vm_ops); + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0)) + vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; +#else + vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND; +#endif + + vma->vm_ops = dev->driver->gem_vm_ops; + vma->vm_private_data = obj; + + /* Take a ref for this mapping of the object, so that the fault + * handler can dereference the mmap offset's pointer to the object. + * This reference is cleaned up by the corresponding vm_close + * (which should happen whether the vma was created by this call, or + * by a vm_open due to mremap or partial unmap or whatever). + */ + drm_gem_object_reference(obj); + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)) + pl111_drm_vm_open_locked(dev, vma); +#else + drm_vm_open_locked(dev, vma); +#endif + + mutex_unlock(&dev->struct_mutex); + + return pl111_bo_mmap(obj, bo, vma, buffer->size); +} + +#endif /* KERNEL_VERSION */ + +static void pl111_dma_buf_release(struct dma_buf *buf) +{ + /* + * Need to release the dma_buf's reference on the gem object it was + * exported from, and also clear the gem object's export_dma_buf + * pointer to this dma_buf as it no longer exists + */ + struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv; + struct pl111_gem_bo *bo; +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + struct drm_crtc *crtc; + unsigned long flags; +#endif + bo = PL111_BO_FROM_GEM(obj); + + DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj); + +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list, + head) { + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc); + spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags); + if (pl111_crtc->displaying_fb) { + struct pl111_drm_framebuffer *pl111_fb; + struct drm_framebuffer *fb = pl111_crtc->displaying_fb; + + pl111_fb = PL111_FB_FROM_FRAMEBUFFER(fb); + if (pl111_fb->bo == bo) { + kds_resource_set_release(&pl111_crtc->old_kds_res_set); + pl111_crtc->old_kds_res_set = NULL; + } + } + spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags); + } +#endif + mutex_lock(&priv.export_dma_buf_lock); + + obj->export_dma_buf = NULL; + drm_gem_object_unreference_unlocked(obj); + + mutex_unlock(&priv.export_dma_buf_lock); +} + +static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev, + struct dma_buf_attachment *attach) +{ + DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf, + dev, attach); + + attach->priv = dev; + + return 0; +} + +static void pl111_dma_buf_detach(struct dma_buf *buf, + struct dma_buf_attachment *attach) +{ + DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf, + attach); +} + +/* Heavily from exynos_drm_dmabuf.c */ +static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment + *attach, + enum dma_data_direction + direction) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj); + struct drm_device *dev = obj->dev; + int size, n_pages, nents; + struct scatterlist *s, *sg; + struct sg_table *sgt; + int ret, i; + + DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf, + attach, bo); + + /* + * Nothing to do, if we are trying to map a dmabuf that has been imported. + * Just return the existing sgt. + */ + if (obj->import_attach) { + BUG_ON(!bo->sgt); + return bo->sgt; + } + + size = obj->size; + n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + + if (bo->type & PL111_BOT_DMA) { + sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { + DRM_ERROR("Failed to allocate sg_table\n"); + return ERR_PTR(-ENOMEM); + } + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (ret < 0) { + DRM_ERROR("Failed to allocate page table\n"); + return ERR_PTR(-ENOMEM); + } + sg_dma_len(sgt->sgl) = size; + /* We use DMA coherent mappings for PL111_BOT_DMA so we must + * use the virtual address returned at buffer allocation */ + sg_set_buf(sgt->sgl, bo->backing_data.dma.fb_cpu_addr, size); + sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr; + } else { /* PL111_BOT_SHM */ + struct page **pages; + int pg = 0; + + mutex_lock(&dev->struct_mutex); + pages = get_pages(obj); + if (IS_ERR(pages)) { + dev_err(obj->dev->dev, "could not get pages: %ld\n", + PTR_ERR(pages)); + return ERR_CAST(pages); + } + sgt = drm_prime_pages_to_sg(pages, n_pages); + if (sgt == NULL) + return ERR_PTR(-ENOMEM); + + pl111_gem_sync_to_dma(bo); + + /* + * At this point the pages have been dma-mapped by either + * get_pages() for non cached maps or pl111_gem_sync_to_dma() + * for cached. So the physical addresses can be assigned + * to the sg entries. + * drm_prime_pages_to_sg() may have combined contiguous pages + * into chunks so we assign the physical address of the first + * page of a chunk to the chunk and check that the physical + * addresses of the rest of the pages in that chunk are also + * contiguous. + */ + sg = sgt->sgl; + nents = sgt->nents; + + for_each_sg(sg, s, nents, i) { + int j, n_pages_in_chunk = sg_dma_len(s) >> PAGE_SHIFT; + + sg_dma_address(s) = bo->backing_data.shm.dma_addrs[pg]; + + for (j = pg+1; j < pg+n_pages_in_chunk; j++) { + BUG_ON(bo->backing_data.shm.dma_addrs[j] != + bo->backing_data.shm.dma_addrs[j-1]+PAGE_SIZE); + } + + pg += n_pages_in_chunk; + } + + mutex_unlock(&dev->struct_mutex); + } + bo->sgt = sgt; + return sgt; +} + +static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction direction) +{ + struct drm_gem_object *obj = attach->dmabuf->priv; + struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj); + + DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p (bo=%p)\n", attach->dmabuf, + attach, bo); + + sg_free_table(sgt); + kfree(sgt); + bo->sgt = NULL; +} + +/* + * There isn't any operation here that can sleep or fail so this callback can + * be used for both kmap and kmap_atomic implementations. + */ +static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long pageno) +{ + struct pl111_gem_bo *bo = dma_buf->priv; + void *vaddr = NULL; + + /* Make sure we cannot access outside the memory range */ + if (((pageno + 1) << PAGE_SHIFT) > bo->gem_object.size) + return NULL; + + if (bo->type & PL111_BOT_DMA) { + vaddr = (bo->backing_data.dma.fb_cpu_addr + + (pageno << PAGE_SHIFT)); + } else { + vaddr = page_address(bo->backing_data.shm.pages[pageno]); + } + + return vaddr; +} + +/* + * Find a scatterlist that starts in "start" and has "len" + * or return a NULL dma_handle. + */ +static dma_addr_t pl111_find_matching_sg(struct sg_table *sgt, size_t start, + size_t len) +{ + struct scatterlist *sg; + unsigned int count; + size_t size = 0; + dma_addr_t dma_handle = 0; + + /* Find a scatterlist that starts in "start" and has "len" + * or return error */ + for_each_sg(sgt->sgl, sg, sgt->nents, count) { + if ((size == start) && (len == sg_dma_len(sg))) { + dma_handle = sg_dma_address(sg); + break; + } + size += sg_dma_len(sg); + } + return dma_handle; +} + +static int pl111_dma_buf_begin_cpu(struct dma_buf *dma_buf, + size_t start, size_t len, + enum dma_data_direction dir) +{ + struct pl111_gem_bo *bo = dma_buf->priv; + struct sg_table *sgt = bo->sgt; + dma_addr_t dma_handle; + + if ((start + len) > bo->gem_object.size) + return -EINVAL; + + if (!(bo->type & PL111_BOT_SHM)) { + struct device *dev = bo->gem_object.dev->dev; + + dma_handle = pl111_find_matching_sg(sgt, start, len); + if (!dma_handle) + return -EINVAL; + + dma_sync_single_range_for_cpu(dev, dma_handle, 0, len, dir); + } + /* PL111_BOT_DMA uses coherents mappings, no need to sync */ + return 0; +} + +static void pl111_dma_buf_end_cpu(struct dma_buf *dma_buf, + size_t start, size_t len, + enum dma_data_direction dir) +{ + struct pl111_gem_bo *bo = dma_buf->priv; + struct sg_table *sgt = bo->sgt; + dma_addr_t dma_handle; + + if ((start + len) > bo->gem_object.size) + return; + + if (!(bo->type & PL111_BOT_DMA)) { + struct device *dev = bo->gem_object.dev->dev; + + dma_handle = pl111_find_matching_sg(sgt, start, len); + if (!dma_handle) + return; + + dma_sync_single_range_for_device(dev, dma_handle, 0, len, dir); + } + /* PL111_BOT_DMA uses coherents mappings, no need to sync */ +} + +static struct dma_buf_ops pl111_dma_buf_ops = { + .release = &pl111_dma_buf_release, + .attach = &pl111_dma_buf_attach, + .detach = &pl111_dma_buf_detach, + .map_dma_buf = &pl111_dma_buf_map_dma_buf, + .unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf, + .kmap_atomic = &pl111_dma_buf_kmap, + .kmap = &pl111_dma_buf_kmap, + .begin_cpu_access = &pl111_dma_buf_begin_cpu, + .end_cpu_access = &pl111_dma_buf_end_cpu, + .mmap = &pl111_dma_buf_mmap, +}; + +struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct dma_buf_attachment *attachment; + struct drm_gem_object *obj; + struct pl111_gem_bo *bo; + struct scatterlist *sgl; + struct sg_table *sgt; + dma_addr_t cont_phys; + int ret = 0; + int i; + + DRM_DEBUG_KMS("DRM %s on dev=%p dma_buf=%p\n", __func__, dev, dma_buf); + + /* is this one of own objects? */ + if (dma_buf->ops == &pl111_dma_buf_ops) { + obj = dma_buf->priv; + /* is it from our device? */ + if (obj->dev == dev) { + /* + * Importing dmabuf exported from our own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + drm_gem_object_reference(obj); + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)) + /* before v3.10.0 we assume the caller has taken a ref on the dma_buf + * we don't want it for self-imported buffers so drop it here */ + dma_buf_put(dma_buf); +#endif + + return obj; + } + } + + attachment = dma_buf_attach(dma_buf, dev->dev); + if (IS_ERR(attachment)) + return ERR_CAST(attachment); + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)) + /* from 3.10.0 we assume the caller has not taken a ref so we take one here */ + get_dma_buf(dma_buf); +#endif + + sgt = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL); + if (IS_ERR_OR_NULL(sgt)) { + ret = PTR_ERR(sgt); + goto err_buf_detach; + } + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) { + DRM_ERROR("%s: failed to allocate buffer object.\n", __func__); + ret = -ENOMEM; + goto err_unmap_attach; + } + + /* Find out whether the buffer is contiguous or not */ + sgl = sgt->sgl; + cont_phys = sg_phys(sgl); + bo->type |= PL111_BOT_DMA; + for_each_sg(sgt->sgl, sgl, sgt->nents, i) { + dma_addr_t real_phys = sg_phys(sgl); + if (real_phys != cont_phys) { + bo->type &= ~PL111_BOT_DMA; + break; + } + cont_phys += (PAGE_SIZE - sgl->offset); + } + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0)) + ret = drm_gem_private_object_init(dev, &bo->gem_object, + dma_buf->size); + if (ret != 0) { + DRM_ERROR("DRM could not import DMA GEM obj\n"); + goto err_free_buffer; + } +#else + drm_gem_private_object_init(dev, &bo->gem_object, dma_buf->size); +#endif + + if (bo->type & PL111_BOT_DMA) { + bo->backing_data.dma.fb_cpu_addr = sg_virt(sgt->sgl); + bo->backing_data.dma.fb_dev_addr = sg_phys(sgt->sgl); + DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, contiguous import\n", __func__, bo); + } else { /* PL111_BOT_SHM */ + DRM_DEBUG_KMS("DRM %s pl111_gem_bo=%p, non contiguous import\n", __func__, bo); + } + + bo->gem_object.import_attach = attachment; + bo->sgt = sgt; + + return &bo->gem_object; + +err_free_buffer: + kfree(bo); + bo = NULL; +err_unmap_attach: + dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL); +err_buf_detach: + dma_buf_detach(dma_buf, attachment); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 10, 0)) + /* from 3.10.0 we will have taken a ref so drop it here */ + dma_buf_put(dma_buf); +#endif + return ERR_PTR(ret); +} + +struct dma_buf *pl111_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *obj, int flags) +{ + struct dma_buf *new_buf; + struct pl111_gem_bo *bo; + size_t size; + + DRM_DEBUG("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj); + size = obj->size; + + new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size, + flags | O_RDWR); + bo = PL111_BO_FROM_GEM(new_buf->priv); + + /* + * bo->gem_object.export_dma_buf not setup until after gem_prime_export + * finishes + */ + +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + /* + * Ensure that we hold the kds resource if it's the currently + * displayed buffer. + */ + obtain_kds_if_currently_displayed(dev, bo, new_buf); +#endif + + DRM_DEBUG("Created dma_buf %p\n", new_buf); + + return new_buf; +} + +int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, + uint32_t handle, uint32_t flags, int *prime_fd) +{ + int result; + /* + * This will re-use any existing exports, and calls + * driver->gem_prime_export to do the first export when needed + */ + DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__, + file_priv, handle); + + mutex_lock(&priv.export_dma_buf_lock); + result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, + prime_fd); + mutex_unlock(&priv.export_dma_buf_lock); + + return result; +} diff --git a/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/drivers/gpu/drm/pl111/pl111_drm_encoder.c new file mode 100644 index 00000000000..78c91c08106 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_encoder.c @@ -0,0 +1,107 @@ +/* + * + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_encoder.c + * Implementation of the encoder functions for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> + +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> + +#include "pl111_drm.h" + +bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder); + return true; +} + +void pl111_encoder_helper_prepare(struct drm_encoder *encoder) +{ + DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder); +} + +void pl111_encoder_helper_commit(struct drm_encoder *encoder) +{ + DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder); +} + +void pl111_encoder_helper_mode_set(struct drm_encoder *encoder, + struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder); +} + +void pl111_encoder_helper_disable(struct drm_encoder *encoder) +{ + DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder); +} + +void pl111_encoder_destroy(struct drm_encoder *encoder) +{ + struct pl111_drm_encoder *pl111_encoder = + PL111_ENCODER_FROM_ENCODER(encoder); + + DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder); + + drm_encoder_cleanup(encoder); + kfree(pl111_encoder); +} + +const struct drm_encoder_funcs encoder_funcs = { + .destroy = pl111_encoder_destroy, +}; + +const struct drm_encoder_helper_funcs encoder_helper_funcs = { + .mode_fixup = pl111_encoder_helper_mode_fixup, + .prepare = pl111_encoder_helper_prepare, + .commit = pl111_encoder_helper_commit, + .mode_set = pl111_encoder_helper_mode_set, + .disable = pl111_encoder_helper_disable, +}; + +struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev, + int possible_crtcs) +{ + struct pl111_drm_encoder *pl111_encoder; + + pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL); + if (pl111_encoder == NULL) { + pr_err("Failed to allocated pl111_drm_encoder\n"); + return NULL; + } + + drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs, + DRM_MODE_ENCODER_DAC); + + drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs); + + pl111_encoder->encoder.possible_crtcs = possible_crtcs; + + return pl111_encoder; +} + diff --git a/drivers/gpu/drm/pl111/pl111_drm_fb.c b/drivers/gpu/drm/pl111/pl111_drm_fb.c new file mode 100644 index 00000000000..f575c9e5f7c --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_fb.c @@ -0,0 +1,202 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_fb.c + * Implementation of the framebuffer functions for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> + +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_crtc.h> +#include "pl111_drm.h" + +static void pl111_fb_destroy(struct drm_framebuffer *framebuffer) +{ + struct pl111_drm_framebuffer *pl111_fb; +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + struct drm_crtc *crtc; + unsigned long flags; +#endif + DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer); + + pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer); + + /* + * Because flips are deferred, wait for all previous flips to complete + */ + wait_event(priv.wait_for_flips, + atomic_read(&priv.nr_flips_in_flight) == 0); +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + /* + * Release KDS resources if it's currently being displayed. Only occurs + * when the last framebuffer is destroyed. + */ + list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list, + head) { + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc); + spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags); + if (pl111_crtc->displaying_fb == framebuffer) { + /* Release the current buffers */ + if (pl111_crtc->old_kds_res_set != NULL) { + DRM_DEBUG_KMS("Releasing KDS resources for "); + DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer); + kds_resource_set_release( + &pl111_crtc->old_kds_res_set); + } + pl111_crtc->old_kds_res_set = NULL; + } + spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, + flags); + } +#endif + drm_framebuffer_cleanup(framebuffer); + + if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL)) + drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object); + + kfree(pl111_fb); + + DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer); +} + +static int pl111_fb_create_handle(struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int *handle) +{ + struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb); + DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb); + + if (bo == NULL) + return -EINVAL; + + return drm_gem_handle_create(file_priv, &bo->gem_object, handle); +} + +const struct drm_framebuffer_funcs fb_funcs = { + .destroy = pl111_fb_destroy, + .create_handle = pl111_fb_create_handle, +}; + +struct drm_framebuffer *pl111_fb_create(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct pl111_drm_framebuffer *pl111_fb = NULL; + struct drm_framebuffer *fb = NULL; + struct drm_gem_object *gem_obj; + struct pl111_gem_bo *bo; + int err = 0; + size_t min_size; + int bpp; + int depth; + + pr_info("DRM %s\n", __func__); + gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]); + if (gem_obj == NULL) { + DRM_ERROR("Could not get gem obj from handle to create fb\n"); + err = -ENOENT; + goto error; + } + + bo = PL111_BO_FROM_GEM(gem_obj); + drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp); + + if (mode_cmd->pitches[0] < mode_cmd->width * (bpp >> 3)) { + DRM_ERROR("bad pitch %u for plane 0\n", mode_cmd->pitches[0]); + err = -EINVAL; + goto error; + } + + min_size = (mode_cmd->height - 1) * mode_cmd->pitches[0] + + mode_cmd->width * (bpp >> 3); + + if (bo->gem_object.size < min_size) { + DRM_ERROR("gem obj size < min size\n"); + err = -EINVAL; + goto error; + } + + /* We can't scan out SHM so we can't create an fb for it */ + if (!(bo->type & PL111_BOT_DMA)) { + DRM_ERROR("Can't create FB for non-scanout buffer\n"); + err = -EINVAL; + goto error; + } + + switch ((char)(mode_cmd->pixel_format & 0xFF)) { + case 'Y': + case 'U': + case 'V': + case 'N': + case 'T': + DRM_ERROR("YUV formats not supported\n"); + err = -EINVAL; + goto error; + } + + pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL); + if (pl111_fb == NULL) { + DRM_ERROR("Could not allocate pl111_drm_framebuffer\n"); + err = -ENOMEM; + goto error; + } + fb = &pl111_fb->fb; + + err = drm_framebuffer_init(dev, fb, &fb_funcs); + if (err) { + DRM_ERROR("drm_framebuffer_init failed\n"); + kfree(fb); + fb = NULL; + goto error; + } + + drm_helper_mode_fill_fb_struct(fb, mode_cmd); + + /* The only framebuffer formats supported by pl111 + * are 16 bpp or 32 bpp with 24 bit depth. + * See clcd_enable() + */ + if (!((fb->bits_per_pixel == 16) || + (fb->bits_per_pixel == 32 && fb->depth == 24))) { + DRM_DEBUG_KMS("unsupported pixel format bpp=%d, depth=%d\n", fb->bits_per_pixel, fb->depth); + drm_framebuffer_cleanup(fb); + kfree(fb); + fb = NULL; + err = -EINVAL; + goto error; + } + + pl111_fb->bo = bo; + + DRM_DEBUG_KMS("Created fb 0x%p with gem_obj 0x%p physaddr=0x%.8x\n", + fb, gem_obj, bo->backing_data.dma.fb_dev_addr); + + return fb; + +error: + if (gem_obj != NULL) + drm_gem_object_unreference_unlocked(gem_obj); + + return ERR_PTR(err); +} diff --git a/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/drivers/gpu/drm/pl111/pl111_drm_funcs.h new file mode 100644 index 00000000000..494baa0d057 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_funcs.h @@ -0,0 +1,130 @@ +/* + * + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_funcs.h + * Function prototypes for PL111 DRM + */ + +#ifndef PL111_DRM_FUNCS_H_ +#define PL111_DRM_FUNCS_H_ + +/* Platform Initialisation */ +int pl111_drm_init(struct platform_device *dev); +void pl111_drm_exit(struct platform_device *dev); + +/* KDS Callbacks */ +void show_framebuffer_on_crtc_cb(void *cb1, void *cb2); +void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res); + +/* CRTC Functions */ +struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev); +struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev); +void pl111_crtc_destroy(struct drm_crtc *crtc); + +bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev, + struct drm_framebuffer *fb); + +int show_framebuffer_on_crtc(struct drm_crtc *crtc, + struct drm_framebuffer *fb, bool page_flip, + struct drm_pending_vblank_event *event); + +/* Common IRQ handler */ +void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc); + +int pl111_crtc_cursor_set(struct drm_crtc *crtc, + struct drm_file *file_priv, + uint32_t handle, + uint32_t width, + uint32_t height); +int pl111_crtc_cursor_move(struct drm_crtc *crtc, + int x, int y); + +/* Connector Functions */ +struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev); +void pl111_connector_destroy(struct drm_connector *connector); +struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device + *dev); + +/* Encoder Functions */ +struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev, + int possible_crtcs); +struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev, + int possible_crtcs); +void pl111_encoder_destroy(struct drm_encoder *encoder); + +/* Frame Buffer Functions */ +struct drm_framebuffer *pl111_fb_create(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_mode_fb_cmd2 *mode_cmd); + +/* VMA Functions */ +int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma); +struct page **get_pages(struct drm_gem_object *obj); +void put_pages(struct drm_gem_object *obj, struct page **pages); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)) +void pl111_drm_vm_open_locked(struct drm_device *dev, + struct vm_area_struct *vma); +void pl111_gem_vm_open(struct vm_area_struct *vma); +void pl111_gem_vm_close(struct vm_area_struct *vma); +#endif + +/* Suspend Functions */ +int pl111_drm_resume(struct drm_device *dev); +int pl111_drm_suspend(struct drm_device *dev, pm_message_t state); + +/* GEM Functions */ +int pl111_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); +int pl111_dumb_destroy(struct drm_file *file_priv, + struct drm_device *dev, uint32_t handle); +int pl111_dumb_map_offset(struct drm_file *file_priv, + struct drm_device *dev, uint32_t handle, + uint64_t *offset); +void pl111_gem_free_object(struct drm_gem_object *obj); + +int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo, + struct vm_area_struct *vma, size_t size); +void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff); +void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo); + +/* DMA BUF Functions */ +struct drm_gem_object *pl111_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); +int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, + uint32_t handle, uint32_t flags, int *prime_fd); +struct dma_buf *pl111_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *obj, int flags); + +/* Pl111 Functions */ +void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource + *flip_res, struct drm_framebuffer *fb); +int clcd_disable(struct drm_crtc *crtc); +void do_flip_to_res(struct pl111_drm_flip_resource *flip_res); +int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id); +int pl111_amba_remove(struct amba_device *dev); + +int pl111_device_init(struct drm_device *dev); +void pl111_device_fini(struct drm_device *dev); + +void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode, + struct clcd_regs *timing); +void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing, + struct drm_display_mode *mode); +#endif /* PL111_DRM_FUNCS_H_ */ diff --git a/drivers/gpu/drm/pl111/pl111_drm_gem.c b/drivers/gpu/drm/pl111/pl111_drm_gem.c new file mode 100644 index 00000000000..13fb2560569 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_gem.c @@ -0,0 +1,476 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_gem.c + * Implementation of the GEM functions for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> +#include <asm/cacheflush.h> +#include <asm/outercache.h> +#include "pl111_drm.h" + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) +#include <linux/dma-attrs.h> +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)) +#include <drm/drm_vma_manager.h> +#endif + +void pl111_gem_free_object(struct drm_gem_object *obj) +{ + struct pl111_gem_bo *bo; + struct drm_device *dev = obj->dev; + DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj); + + bo = PL111_BO_FROM_GEM(obj); + + if (obj->import_attach) + drm_prime_gem_destroy(obj, bo->sgt); + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0)) + if (obj->map_list.map != NULL) + drm_gem_free_mmap_offset(obj); +#else + drm_gem_free_mmap_offset(obj); +#endif + /* + * Only free the backing memory if the object has not been imported. + * If it has been imported, the exporter is in charge to free that + * once dmabuf's refcount becomes 0. + */ + if (obj->import_attach) + goto imported_out; + + if (bo->type & PL111_BOT_DMA) { + dma_free_writecombine(dev->dev, obj->size, + bo->backing_data.dma.fb_cpu_addr, + bo->backing_data.dma.fb_dev_addr); + } else if (bo->backing_data.shm.pages != NULL) { + put_pages(obj, bo->backing_data.shm.pages); + } + +imported_out: + drm_gem_object_release(obj); + + kfree(bo); + + DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo); +} + +static int pl111_gem_object_create(struct drm_device *dev, u64 size, + u32 flags, struct drm_file *file_priv, + u32 *handle) +{ + int ret = 0; + struct pl111_gem_bo *bo = NULL; + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (bo == NULL) { + ret = -ENOMEM; + goto finish; + } + + bo->type = flags; + +#ifndef ARCH_HAS_SG_CHAIN + /* + * If the ARCH can't chain we can't have non-contiguous allocs larger + * than a single sg can hold. + * In this case we fall back to using contiguous memory + */ + if (!(bo->type & PL111_BOT_DMA)) { + long unsigned int n_pages = + PAGE_ALIGN(size) >> PAGE_SHIFT; + if (n_pages > SG_MAX_SINGLE_ALLOC) { + bo->type |= PL111_BOT_DMA; + /* + * Non-contiguous allocation request changed to + * contigous + */ + DRM_INFO("non-contig alloc to contig %lu > %lu pages.", + n_pages, SG_MAX_SINGLE_ALLOC); + } + } +#endif + if (bo->type & PL111_BOT_DMA) { + /* scanout compatible - use physically contiguous buffer */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + bo->backing_data.dma.fb_cpu_addr = + dma_alloc_attrs(dev->dev, size, + &bo->backing_data.dma.fb_dev_addr, + GFP_KERNEL, + &attrs); + if (bo->backing_data.dma.fb_cpu_addr == NULL) { + DRM_ERROR("dma_alloc_attrs failed\n"); + ret = -ENOMEM; + goto free_bo; + } +#else + bo->backing_data.dma.fb_cpu_addr = + dma_alloc_writecombine(dev->dev, size, + &bo->backing_data.dma.fb_dev_addr, + GFP_KERNEL); + if (bo->backing_data.dma.fb_cpu_addr == NULL) { + DRM_ERROR("dma_alloc_writecombine failed\n"); + ret = -ENOMEM; + goto free_bo; + } +#endif + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0)) + ret = drm_gem_private_object_init(dev, &bo->gem_object, + size); + if (ret != 0) { + DRM_ERROR("DRM could not initialise GEM object\n"); + goto free_dma; + } +#else + drm_gem_private_object_init(dev, &bo->gem_object, size); +#endif + + } else { /* PL111_BOT_SHM */ + /* not scanout compatible - use SHM backed object */ + ret = drm_gem_object_init(dev, &bo->gem_object, size); + if (ret != 0) { + DRM_ERROR("DRM could not init SHM backed GEM obj\n"); + ret = -ENOMEM; + goto free_bo; + } + DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size); + } + + DRM_DEBUG("s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n", + size, flags, + (bo->type & PL111_BOT_DMA) ? "physaddr" : "shared page array", + (bo->type & PL111_BOT_DMA) ? + (unsigned long)bo->backing_data.dma.fb_dev_addr: + (unsigned long)bo->backing_data.shm.pages, + bo->type); + + ret = drm_gem_handle_create(file_priv, &bo->gem_object, handle); + if (ret != 0) { + DRM_ERROR("DRM failed to create GEM handle\n"); + goto obj_release; + } + + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(&bo->gem_object); + + return 0; + +obj_release: + drm_gem_object_release(&bo->gem_object); +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0)) +free_dma: +#endif + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 5, 0)) + if (bo->type & PL111_BOT_DMA) { + DEFINE_DMA_ATTRS(attrs); + + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + dma_free_attrs(dev->dev, size, + bo->backing_data.dma.fb_cpu_addr, + bo->backing_data.dma.fb_dev_addr, + &attrs); + } +#else + if (bo->type & PL111_BOT_DMA) + dma_free_writecombine(dev->dev, size, + bo->backing_data.dma.fb_cpu_addr, + bo->backing_data.dma.fb_dev_addr); +#endif +free_bo: + kfree(bo); +finish: + return ret; +} + +int pl111_drm_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_pl111_gem_create *args = data; + uint32_t bytes_pp; + + /* Round bpp up, to allow for case where bpp<8 */ + bytes_pp = args->bpp >> 3; + if (args->bpp & ((1 << 3) - 1)) + bytes_pp++; + + if (args->flags & ~PL111_BOT_MASK) { + DRM_ERROR("wrong flags: 0x%x\n", args->flags); + return -EINVAL; + } + + args->pitch = ALIGN(args->width * bytes_pp, 64); + args->size = PAGE_ALIGN(args->pitch * args->height); + + DRM_DEBUG_KMS("gem_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n", + args->width, args->height, args->pitch, args->bpp, + bytes_pp, args->size, args->flags); + + return pl111_gem_object_create(dev, args->size, args->flags, file_priv, + &args->handle); +} + +int pl111_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, struct drm_mode_create_dumb *args) +{ + uint32_t bytes_pp; + + /* Round bpp up, to allow for case where bpp<8 */ + bytes_pp = args->bpp >> 3; + if (args->bpp & ((1 << 3) - 1)) + bytes_pp++; + + if (args->flags) { + DRM_ERROR("flags must be zero: 0x%x\n", args->flags); + return -EINVAL; + } + + args->pitch = ALIGN(args->width * bytes_pp, 64); + args->size = PAGE_ALIGN(args->pitch * args->height); + + DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n", + args->width, args->height, args->pitch, args->bpp, + bytes_pp, args->size, args->flags); + + return pl111_gem_object_create(dev, args->size, + PL111_BOT_DMA | PL111_BOT_UNCACHED, + file_priv, &args->handle); +} + +int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev, + uint32_t handle) +{ + DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__, + file_priv, handle); + return drm_gem_handle_delete(file_priv, handle); +} + +int pl111_dumb_map_offset(struct drm_file *file_priv, + struct drm_device *dev, uint32_t handle, + uint64_t *offset) +{ + struct drm_gem_object *obj; + int ret = 0; + DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__, + file_priv, handle); + + /* GEM does all our handle to object mapping */ + obj = drm_gem_object_lookup(dev, file_priv, handle); + if (obj == NULL) { + ret = -ENOENT; + goto fail; + } + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0)) + if (obj->map_list.map == NULL) { + ret = drm_gem_create_mmap_offset(obj); + if (ret != 0) { + drm_gem_object_unreference_unlocked(obj); + goto fail; + } + } +#else + ret = drm_gem_create_mmap_offset(obj); + if (ret != 0) { + drm_gem_object_unreference_unlocked(obj); + goto fail; + } +#endif + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 11, 0)) + *offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT; +#else + *offset = drm_vma_node_offset_addr(&obj->vma_node); + DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset); +#endif + + drm_gem_object_unreference_unlocked(obj); +fail: + return ret; +} + +/* sync the buffer for DMA access */ +void pl111_gem_sync_to_dma(struct pl111_gem_bo *bo) +{ + struct drm_device *dev = bo->gem_object.dev; + + if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED)) { + int i, npages = bo->gem_object.size >> PAGE_SHIFT; + struct page **pages = bo->backing_data.shm.pages; + bool dirty = false; + + for (i = 0; i < npages; i++) { + if (!bo->backing_data.shm.dma_addrs[i]) { + DRM_DEBUG("%s: dma map page=%d bo=%p\n", __func__, i, bo); + bo->backing_data.shm.dma_addrs[i] = + dma_map_page(dev->dev, pages[i], 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + dirty = true; + } + } + + if (dirty) { + DRM_DEBUG("%s: zap ptes (and flush cache) bo=%p\n", __func__, bo); + /* + * TODO MIDEGL-1813 + * + * Use flush_cache_page() and outer_flush_range() to + * flush only the user space mappings of the dirty pages + */ + flush_cache_all(); + outer_flush_all(); + unmap_mapping_range(bo->gem_object.filp->f_mapping, 0, + bo->gem_object.size, 1); + } + } +} + +void pl111_gem_sync_to_cpu(struct pl111_gem_bo *bo, int pgoff) +{ + struct drm_device *dev = bo->gem_object.dev; + + /* + * TODO MIDEGL-1808 + * + * The following check was meant to detect if the CPU is trying to access + * a buffer that is currently mapped for DMA accesses, which is illegal + * as described by the DMA-API. + * + * However, some of our tests are trying to do that, which triggers the message + * below and avoids dma-unmapping the pages not to annoy the DMA device but that + * leads to the test failing because of caches not being properly flushed. + */ + + /* + if (bo->sgt) { + DRM_ERROR("%s: the CPU is trying to access a dma-mapped buffer\n", __func__); + return; + } + */ + + if (!(bo->type & PL111_BOT_DMA) && (bo->type & PL111_BOT_CACHED) && + bo->backing_data.shm.dma_addrs[pgoff]) { + DRM_DEBUG("%s: unmap bo=%p (s=%d), paddr=%08x\n", + __func__, bo, bo->gem_object.size, + bo->backing_data.shm.dma_addrs[pgoff]); + dma_unmap_page(dev->dev, bo->backing_data.shm.dma_addrs[pgoff], + PAGE_SIZE, DMA_BIDIRECTIONAL); + bo->backing_data.shm.dma_addrs[pgoff] = 0; + } +} + +/* Based on omapdrm driver */ +int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo, + struct vm_area_struct *vma, size_t size) +{ + DRM_DEBUG("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p type=%08x\n", + __func__, obj, bo, bo->type); + + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_MIXEDMAP; + + if (bo->type & PL111_BOT_WC) { + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + } else if (bo->type & PL111_BOT_CACHED) { + /* + * Objects that do not have a filp (DMA backed) can't be + * mapped as cached now. Write-combine should be enough. + */ + if (WARN_ON(!obj->filp)) + return -EINVAL; + + /* + * As explained in Documentation/dma-buf-sharing.txt + * we need this trick so that we can manually zap ptes + * in order to fake coherency. + */ + fput(vma->vm_file); + get_file(obj->filp); + vma->vm_file = obj->filp; + + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + } else { /* PL111_BOT_UNCACHED */ + vma->vm_page_prot = + pgprot_noncached(vm_get_page_prot(vma->vm_flags)); + } + return 0; +} + +int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma) +{ + int ret; + struct drm_file *priv = file_priv->private_data; + struct drm_device *dev = priv->minor->dev; +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0)) + struct drm_gem_mm *mm = dev->mm_private; + struct drm_hash_item *hash; + struct drm_local_map *map = NULL; +#else + struct drm_vma_offset_node *node; +#endif + struct drm_gem_object *obj; + struct pl111_gem_bo *bo; + + DRM_DEBUG_KMS("DRM %s\n", __func__); + +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(3, 13, 0)) + drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash); + map = drm_hash_entry(hash, struct drm_map_list, hash)->map; + obj = map->handle; +#else + node = drm_vma_offset_exact_lookup(dev->vma_offset_manager, + vma->vm_pgoff, + vma_pages(vma)); + obj = container_of(node, struct drm_gem_object, vma_node); +#endif + bo = PL111_BO_FROM_GEM(obj); + + DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p bo->type 0x%08x\n", __func__, bo, bo->type); + + /* for an imported buffer we let the exporter handle the mmap */ + if (obj->import_attach) + return dma_buf_mmap(obj->import_attach->dmabuf, vma, 0); + + ret = drm_gem_mmap(file_priv, vma); + if (ret < 0) { + DRM_ERROR("failed to mmap\n"); + return ret; + } + + /* Our page fault handler uses the page offset calculated from the vma, + * so we need to remove the gem cookie offset specified in the call. + */ + vma->vm_pgoff = 0; + + return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start); +} diff --git a/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/drivers/gpu/drm/pl111/pl111_drm_pl111.c new file mode 100644 index 00000000000..1d613d0592a --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_pl111.c @@ -0,0 +1,417 @@ +/* + * + * (C) COPYRIGHT 2012-2014 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_pl111.c + * PL111 specific functions for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> + +#include "pl111_drm.h" + +/* This can't be called from IRQ context, due to clk_get() and board->enable */ +static int clcd_enable(struct drm_framebuffer *fb) +{ + __u32 cntl; + struct clcd_board *board; + + pr_info("DRM %s\n", __func__); + + clk_prepare_enable(priv.clk); + + /* Enable and Power Up */ + cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1); + DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel); + if (fb->bits_per_pixel == 16) + cntl |= CNTL_LCDBPP16_565; + else if (fb->bits_per_pixel == 32 && fb->depth == 24) + cntl |= CNTL_LCDBPP24; + else + BUG_ON(1); + + cntl |= CNTL_BGR; + + writel(cntl, priv.regs + CLCD_PL111_CNTL); + + if (priv.amba_dev->dev.platform_data) { + board = priv.amba_dev->dev.platform_data; + + if (board->enable) + board->enable(NULL); + } + + /* Enable Interrupts */ + writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB); + + return 0; +} + +int clcd_disable(struct drm_crtc *crtc) +{ + struct clcd_board *board; + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc); +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + unsigned long flags; +#endif + + pr_info("DRM %s\n", __func__); + + /* Disable Interrupts */ + writel(0x00000000, priv.regs + CLCD_PL111_IENB); + + if (priv.amba_dev->dev.platform_data) { + board = priv.amba_dev->dev.platform_data; + + if (board->disable) + board->disable(NULL); + } + + /* Disable and Power Down */ + writel(0, priv.regs + CLCD_PL111_CNTL); + + /* Disable clock */ + clk_disable_unprepare(priv.clk); + + pl111_crtc->last_bpp = 0; +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS + spin_lock_irqsave(&pl111_crtc->current_displaying_lock, flags); + /* Release the previous buffers */ + if (pl111_crtc->old_kds_res_set != NULL) + kds_resource_set_release(&pl111_crtc->old_kds_res_set); + + pl111_crtc->old_kds_res_set = NULL; + spin_unlock_irqrestore(&pl111_crtc->current_displaying_lock, flags); +#endif + return 0; +} + +/* + * To avoid a possible race where "pl111_crtc->current_update_res" has + * been updated (non NULL) but the corresponding scanout buffer has not been + * written to the base registers we must always call this function holding + * the "base_update_lock" spinlock with IRQs disabled (spin_lock_irqsave()). + */ +void do_flip_to_res(struct pl111_drm_flip_resource *flip_res) +{ + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc); + struct drm_framebuffer *fb; + struct pl111_gem_bo *bo; + size_t min_size; + fb = flip_res->fb; + bo = PL111_BO_FROM_FRAMEBUFFER(fb); + + + + min_size = (fb->height - 1) * fb->pitches[0] + + fb->width * (fb->bits_per_pixel >> 3); + + BUG_ON(bo->gem_object.size < min_size); + + /* Don't even attempt PL111_BOT_SHM, it's not contiguous */ + BUG_ON(bo->type != PL111_BOT_DMA); + + /* + * Note the buffer for releasing after IRQ, and don't allow any more + * updates until then. + * + * This clcd controller latches the new address on next vsync. Address + * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must + * wait for that before releasing the previous buffer's kds + * resources. Otherwise, we'll allow writers to write to the old buffer + * whilst it is still being displayed + */ + pl111_crtc->current_update_res = flip_res; + + DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n", + fb, bo, bo->backing_data.dma.fb_dev_addr); + + if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0) + DRM_ERROR("Could not get vblank reference for crtc %d\n", + pl111_crtc->crtc_index); + + /* Set the scanout buffer */ + writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS); + writel(bo->backing_data.dma.fb_dev_addr + + ((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS); +} + +void +show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res, + struct drm_framebuffer *fb) +{ + unsigned long irq_flags; + struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc); + + spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags); + if (list_empty(&pl111_crtc->update_queue) && + !pl111_crtc->current_update_res) { + do_flip_to_res(flip_res); + } else { + /* + * Enqueue the update to occur on a future IRQ + * This only happens on triple-or-greater buffering + */ + DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n", + fb); + list_add_tail(&flip_res->link, &pl111_crtc->update_queue); + } + spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags); + + if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 || + pl111_crtc->last_bpp != fb->bits_per_pixel || + !drm_mode_equal(pl111_crtc->new_mode, + pl111_crtc->current_mode))) { + struct clcd_regs timing; + + pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing); + + DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n", + timing.tim0, timing.tim1, timing.tim2, + timing.tim3, timing.pixclock); + + /* This is the actual mode setting part */ + clk_set_rate(priv.clk, timing.pixclock); + + writel(timing.tim0, priv.regs + CLCD_TIM0); + writel(timing.tim1, priv.regs + CLCD_TIM1); + writel(timing.tim2, priv.regs + CLCD_TIM2); + writel(timing.tim3, priv.regs + CLCD_TIM3); + + clcd_enable(fb); + pl111_crtc->last_bpp = fb->bits_per_pixel; + } + + if (!flip_res->page_flip) { + drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode); + pl111_crtc->current_mode = pl111_crtc->new_mode; + pl111_crtc->new_mode = NULL; + } + + BUG_ON(!pl111_crtc->current_mode); + + /* + * If IRQs weren't enabled before, they are now. This will eventually + * cause flip_res to be released via pl111_common_irq, which updates + * every time the Base Address is latched (i.e. every frame, regardless + * of whether we update the base address or not) + */ +} + +irqreturn_t pl111_irq(int irq, void *data) +{ + u32 irq_stat; + struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc; + + irq_stat = readl(priv.regs + CLCD_PL111_MIS); + + if (!irq_stat) + return IRQ_NONE; + + if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE) + pl111_common_irq(pl111_crtc); + + /* Clear the interrupt once done */ + writel(irq_stat, priv.regs + CLCD_PL111_ICR); + + return IRQ_HANDLED; +} + +int pl111_device_init(struct drm_device *dev) +{ + struct pl111_drm_dev_private *priv = dev->dev_private; + int ret; + + if (priv == NULL) { + pr_err("%s no private data\n", __func__); + return -EINVAL; + } + + if (priv->amba_dev == NULL) { + pr_err("%s no amba device found\n", __func__); + return -EINVAL; + } + + /* set up MMIO for register access */ + priv->mmio_start = priv->amba_dev->res.start; + priv->mmio_len = resource_size(&priv->amba_dev->res); + + DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start, + priv->mmio_len); + + priv->regs = ioremap(priv->mmio_start, priv->mmio_len); + if (priv->regs == NULL) { + pr_err("%s failed mmio\n", __func__); + return -EINVAL; + } + + /* turn off interrupts */ + writel(0, priv->regs + CLCD_PL111_IENB); + + ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0, + "pl111_irq_handler", NULL); + if (ret != 0) { + pr_err("%s failed %d\n", __func__, ret); + goto out_mmio; + } + + goto finish; + +out_mmio: + iounmap(priv->regs); +finish: + DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret); + return ret; +} + +void pl111_device_fini(struct drm_device *dev) +{ + struct pl111_drm_dev_private *priv = dev->dev_private; + u32 cntl; + + if (priv == NULL || priv->regs == NULL) + return; + + free_irq(priv->amba_dev->irq[0], NULL); + + cntl = readl(priv->regs + CLCD_PL111_CNTL); + + cntl &= ~CNTL_LCDEN; + writel(cntl, priv->regs + CLCD_PL111_CNTL); + + cntl &= ~CNTL_LCDPWR; + writel(cntl, priv->regs + CLCD_PL111_CNTL); + + iounmap(priv->regs); +} + +int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id) +{ + struct clcd_board *board = dev->dev.platform_data; + int ret; + pr_info("DRM %s\n", __func__); + + if (!board) + dev_warn(&dev->dev, "board data not available\n"); + + ret = amba_request_regions(dev, NULL); + if (ret != 0) { + DRM_ERROR("CLCD: unable to reserve regs region\n"); + goto out; + } + + priv.amba_dev = dev; + + priv.clk = clk_get(&priv.amba_dev->dev, NULL); + if (IS_ERR(priv.clk)) { + DRM_ERROR("CLCD: unable to get clk.\n"); + ret = PTR_ERR(priv.clk); + goto clk_err; + } + + return 0; + +clk_err: + amba_release_regions(dev); +out: + return ret; +} + +int pl111_amba_remove(struct amba_device *dev) +{ + DRM_DEBUG_KMS("DRM %s\n", __func__); + + clk_put(priv.clk); + + amba_release_regions(dev); + + priv.amba_dev = NULL; + + return 0; +} + +void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode, + struct clcd_regs *timing) +{ + unsigned int ppl, hsw, hfp, hbp; + unsigned int lpp, vsw, vfp, vbp; + unsigned int cpl; + + memset(timing, 0, sizeof(struct clcd_regs)); + + ppl = (mode->hdisplay / 16) - 1; + hsw = mode->hsync_end - mode->hsync_start - 1; + hfp = mode->hsync_start - mode->hdisplay - 1; + hbp = mode->htotal - mode->hsync_end - 1; + + lpp = mode->vdisplay - 1; + vsw = mode->vsync_end - mode->vsync_start - 1; + vfp = mode->vsync_start - mode->vdisplay; + vbp = mode->vtotal - mode->vsync_end; + + cpl = mode->hdisplay - 1; + + timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24); + timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24); + timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16); + timing->tim3 = 0; + + timing->pixclock = mode->clock * 1000; +} + +void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing, + struct drm_display_mode *mode) +{ + unsigned int ppl, hsw, hfp, hbp; + unsigned int lpp, vsw, vfp, vbp; + + ppl = (timing->tim0 >> 2) & 0x3f; + hsw = (timing->tim0 >> 8) & 0xff; + hfp = (timing->tim0 >> 16) & 0xff; + hbp = (timing->tim0 >> 24) & 0xff; + + lpp = timing->tim1 & 0x3ff; + vsw = (timing->tim1 >> 10) & 0x3f; + vfp = (timing->tim1 >> 16) & 0xff; + vbp = (timing->tim1 >> 24) & 0xff; + + mode->hdisplay = (ppl + 1) * 16; + mode->hsync_start = ((ppl + 1) * 16) + hfp + 1; + mode->hsync_end = ((ppl + 1) * 16) + hfp + hsw + 2; + mode->htotal = ((ppl + 1) * 16) + hfp + hsw + hbp + 3; + mode->hskew = 0; + + mode->vdisplay = lpp + 1; + mode->vsync_start = lpp + vfp + 1; + mode->vsync_end = lpp + vfp + vsw + 2; + mode->vtotal = lpp + vfp + vsw + vbp + 2; + + mode->flags = 0; + + mode->width_mm = 0; + mode->height_mm = 0; + + mode->clock = timing->pixclock / 1000; + mode->hsync = timing->pixclock / mode->htotal; + mode->vrefresh = mode->hsync / mode->vtotal; +} diff --git a/drivers/gpu/drm/pl111/pl111_drm_platform.c b/drivers/gpu/drm/pl111/pl111_drm_platform.c new file mode 100644 index 00000000000..9d5ec0cb175 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_platform.c @@ -0,0 +1,151 @@ +/* + * + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_platform.c + * Implementation of the Linux platform device entrypoints for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/platform_device.h> +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> +#include "pl111_drm.h" + +static int pl111_platform_drm_suspend(struct platform_device *dev, + pm_message_t state) +{ + pr_info("DRM %s\n", __func__); + return 0; +} + +static int pl111_platform_drm_resume(struct platform_device *dev) +{ + pr_info("DRM %s\n", __func__); + return 0; +} + +int pl111_platform_drm_probe(struct platform_device *dev) +{ + pr_info("DRM %s\n", __func__); + return pl111_drm_init(dev); +} + +static int pl111_platform_drm_remove(struct platform_device *dev) +{ + pr_info("DRM %s\n", __func__); + pl111_drm_exit(dev); + + return 0; +} + +static struct amba_id pl111_id_table[] = { + { + .id = 0x00041110, + .mask = 0x000ffffe, + }, + {0, 0}, +}; + +static struct amba_driver pl111_amba_driver = { + .drv = { + .name = "clcd-pl11x", + }, + .probe = pl111_amba_probe, + .remove = pl111_amba_remove, + .id_table = pl111_id_table, +}; + +static struct platform_driver platform_drm_driver = { + .probe = pl111_platform_drm_probe, + .remove = pl111_platform_drm_remove, + .suspend = pl111_platform_drm_suspend, + .resume = pl111_platform_drm_resume, + .driver = { + .owner = THIS_MODULE, + .name = DRIVER_NAME, + }, +}; + +static const struct platform_device_info pl111_drm_pdevinfo = { + .name = DRIVER_NAME, + .id = -1, + .dma_mask = ~0UL +}; + +static struct platform_device *pl111_drm_device; + +static int __init pl111_platform_drm_init(void) +{ + int ret; + + pr_info("DRM %s\n", __func__); + + pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo); + if (pl111_drm_device == NULL) { + pr_err("DRM platform_device_register_full() failed\n"); + return -ENOMEM; + } + + ret = amba_driver_register(&pl111_amba_driver); + if (ret != 0) { + pr_err("DRM amba_driver_register() failed %d\n", ret); + goto err_amba_reg; + } + + ret = platform_driver_register(&platform_drm_driver); + if (ret != 0) { + pr_err("DRM platform_driver_register() failed %d\n", ret); + goto err_pdrv_reg; + } + + return 0; + +err_pdrv_reg: + amba_driver_unregister(&pl111_amba_driver); +err_amba_reg: + platform_device_unregister(pl111_drm_device); + + return ret; +} + +static void __exit pl111_platform_drm_exit(void) +{ + pr_info("DRM %s\n", __func__); + + platform_device_unregister(pl111_drm_device); + amba_driver_unregister(&pl111_amba_driver); + platform_driver_unregister(&platform_drm_driver); +} + +#ifdef MODULE +module_init(pl111_platform_drm_init); +#else +late_initcall(pl111_platform_drm_init); +#endif +module_exit(pl111_platform_drm_exit); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_VERSION(DRIVER_VERSION); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_LICENSE(DRIVER_LICENCE); +MODULE_ALIAS(DRIVER_ALIAS); diff --git a/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/drivers/gpu/drm/pl111/pl111_drm_suspend.c new file mode 100644 index 00000000000..d033566a579 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_suspend.c @@ -0,0 +1,43 @@ +/* + * + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_suspend.c + * Implementation of the suspend/resume functions for PL111 DRM + */ + +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> +#include "pl111_drm.h" + +int pl111_drm_suspend(struct drm_device *dev, pm_message_t state) +{ + pr_info("DRM %s\n", __func__); + return 0; +} + +int pl111_drm_resume(struct drm_device *dev) +{ + pr_info("DRM %s\n", __func__); + return 0; +} diff --git a/drivers/gpu/drm/pl111/pl111_drm_vma.c b/drivers/gpu/drm/pl111/pl111_drm_vma.c new file mode 100644 index 00000000000..ff602efd6d6 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_drm_vma.c @@ -0,0 +1,308 @@ +/* + * + * (C) COPYRIGHT 2012-2015 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU licence. + * + * A copy of the licence is included with the program, and can also be obtained + * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + */ + + + +/** + * pl111_drm_vma.c + * Implementation of the VM functions for PL111 DRM + */ +#include <linux/amba/bus.h> +#include <linux/amba/clcd.h> +#include <linux/version.h> +#include <linux/shmem_fs.h> +#include <linux/dma-buf.h> +#include <linux/module.h> + +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> +#include "pl111_drm.h" + +/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */ +/** + * drm_gem_put_pages - helper to free backing pages for a GEM object + * @obj: obj in question + * @pages: pages to free + */ +static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages, + bool dirty, bool accessed) +{ + int i, npages; + struct pl111_gem_bo *bo; + npages = obj->size >> PAGE_SHIFT; + bo = PL111_BO_FROM_GEM(obj); + for (i = 0; i < npages; i++) { + if (dirty) + set_page_dirty(pages[i]); + if (accessed) + mark_page_accessed(pages[i]); + /* Undo the reference we took when populating the table */ + page_cache_release(pages[i]); + } + drm_free_large(pages); +} + +void put_pages(struct drm_gem_object *obj, struct page **pages) +{ + int i, npages; + struct pl111_gem_bo *bo; + npages = obj->size >> PAGE_SHIFT; + bo = PL111_BO_FROM_GEM(obj); + _drm_gem_put_pages(obj, pages, true, true); + if (bo->backing_data.shm.dma_addrs) { + for (i = 0; i < npages; i++) { + /* Filter pages unmapped because of CPU accesses */ + if (!bo->backing_data.shm.dma_addrs[i]) + continue; + if (!dma_mapping_error(obj->dev->dev, + bo->backing_data.shm.dma_addrs[i])) { + dma_unmap_page(obj->dev->dev, + bo->backing_data.shm.dma_addrs[i], + PAGE_SIZE, + DMA_BIDIRECTIONAL); + } + } + kfree(bo->backing_data.shm.dma_addrs); + bo->backing_data.shm.dma_addrs = NULL; + } +} + +/** + * drm_gem_get_pages - helper to allocate backing pages for a GEM object + * @obj: obj in question + * @gfpmask: gfp mask of requested pages + */ +static struct page **_drm_gem_get_pages(struct drm_gem_object *obj, + gfp_t gfpmask) +{ + struct inode *inode; + struct address_space *mapping; + struct page *p, **pages; + int i, npages; + + /* This is the shared memory object that backs the GEM resource */ + inode = obj->filp->f_path.dentry->d_inode; + mapping = inode->i_mapping; + + npages = obj->size >> PAGE_SHIFT; + + pages = drm_malloc_ab(npages, sizeof(struct page *)); + if (pages == NULL) + return ERR_PTR(-ENOMEM); + + gfpmask |= mapping_gfp_mask(mapping); + + for (i = 0; i < npages; i++) { + p = shmem_read_mapping_page_gfp(mapping, i, gfpmask); + if (IS_ERR(p)) + goto fail; + pages[i] = p; + + /* + * There is a hypothetical issue w/ drivers that require + * buffer memory in the low 4GB.. if the pages are un- + * pinned, and swapped out, they can end up swapped back + * in above 4GB. If pages are already in memory, then + * shmem_read_mapping_page_gfp will ignore the gfpmask, + * even if the already in-memory page disobeys the mask. + * + * It is only a theoretical issue today, because none of + * the devices with this limitation can be populated with + * enough memory to trigger the issue. But this BUG_ON() + * is here as a reminder in case the problem with + * shmem_read_mapping_page_gfp() isn't solved by the time + * it does become a real issue. + * + * See this thread: http://lkml.org/lkml/2011/7/11/238 + */ + BUG_ON((gfpmask & __GFP_DMA32) && + (page_to_pfn(p) >= 0x00100000UL)); + } + + return pages; + +fail: + while (i--) + page_cache_release(pages[i]); + + drm_free_large(pages); + return ERR_PTR(PTR_ERR(p)); +} + +struct page **get_pages(struct drm_gem_object *obj) +{ + struct pl111_gem_bo *bo; + bo = PL111_BO_FROM_GEM(obj); + + if (bo->backing_data.shm.pages == NULL) { + struct page **p; + int npages = obj->size >> PAGE_SHIFT; + int i; + + p = _drm_gem_get_pages(obj, GFP_KERNEL); + if (IS_ERR(p)) + return ERR_PTR(-ENOMEM); + + bo->backing_data.shm.pages = p; + + if (bo->backing_data.shm.dma_addrs == NULL) { + bo->backing_data.shm.dma_addrs = + kzalloc(npages * sizeof(dma_addr_t), + GFP_KERNEL); + if (bo->backing_data.shm.dma_addrs == NULL) + goto error_out; + } + + if (!(bo->type & PL111_BOT_CACHED)) { + for (i = 0; i < npages; ++i) { + bo->backing_data.shm.dma_addrs[i] = + dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(obj->dev->dev, + bo->backing_data.shm.dma_addrs[i])) + goto error_out; + } + } + } + + return bo->backing_data.shm.pages; + +error_out: + put_pages(obj, bo->backing_data.shm.pages); + bo->backing_data.shm.pages = NULL; + return ERR_PTR(-ENOMEM); +} + +/* END drivers/staging/omapdrm/omap_gem_helpers.c */ +int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page **pages; + unsigned long pfn; + struct drm_gem_object *obj = vma->vm_private_data; + struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj); + struct drm_device *dev = obj->dev; + int ret; + + mutex_lock(&dev->struct_mutex); + + /* + * Our mmap calls setup a valid vma->vm_pgoff + * so we can use vmf->pgoff + */ + + if (bo->type & PL111_BOT_DMA) { + pfn = (bo->backing_data.dma.fb_dev_addr >> PAGE_SHIFT) + + vmf->pgoff; + } else { /* PL111_BOT_SHM */ + pages = get_pages(obj); + if (IS_ERR(pages)) { + dev_err(obj->dev->dev, + "could not get pages: %ld\n", PTR_ERR(pages)); + ret = PTR_ERR(pages); + goto error; + } + pfn = page_to_pfn(pages[vmf->pgoff]); + pl111_gem_sync_to_cpu(bo, vmf->pgoff); + } + + DRM_DEBUG("bo=%p physaddr=0x%.8x for offset 0x%x\n", + bo, PFN_PHYS(pfn), PFN_PHYS(vmf->pgoff)); + + ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn); + +error: + mutex_unlock(&dev->struct_mutex); + + switch (ret) { + case 0: + case -ERESTARTSYS: + case -EINTR: + case -EBUSY: + return VM_FAULT_NOPAGE; + case -ENOMEM: + return VM_FAULT_OOM; + default: + return VM_FAULT_SIGBUS; + } +} + +/* + * The core drm_vm_ functions in kernel 3.4 are not ready + * to handle dma_buf cases where vma->vm_file->private_data + * cannot be accessed to get the device. + * + * We use these functions from 3.5 instead where the device + * pointer is passed explicitly. + * + * However they aren't exported from the kernel until 3.10 + */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)) +void pl111_drm_vm_open_locked(struct drm_device *dev, + struct vm_area_struct *vma) +{ + struct drm_vma_entry *vma_entry; + + DRM_DEBUG("0x%08lx,0x%08lx\n", + vma->vm_start, vma->vm_end - vma->vm_start); + atomic_inc(&dev->vma_count); + + vma_entry = kmalloc(sizeof(*vma_entry), GFP_KERNEL); + if (vma_entry) { + vma_entry->vma = vma; + vma_entry->pid = current->pid; + list_add(&vma_entry->head, &dev->vmalist); + } +} + +void pl111_drm_vm_close_locked(struct drm_device *dev, + struct vm_area_struct *vma) +{ + struct drm_vma_entry *pt, *temp; + + DRM_DEBUG("0x%08lx,0x%08lx\n", + vma->vm_start, vma->vm_end - vma->vm_start); + atomic_dec(&dev->vma_count); + + list_for_each_entry_safe(pt, temp, &dev->vmalist, head) { + if (pt->vma == vma) { + list_del(&pt->head); + kfree(pt); + break; + } + } +} + +void pl111_gem_vm_open(struct vm_area_struct *vma) +{ + struct drm_gem_object *obj = vma->vm_private_data; + + drm_gem_object_reference(obj); + + mutex_lock(&obj->dev->struct_mutex); + pl111_drm_vm_open_locked(obj->dev, vma); + mutex_unlock(&obj->dev->struct_mutex); +} + +void pl111_gem_vm_close(struct vm_area_struct *vma) +{ + struct drm_gem_object *obj = vma->vm_private_data; + struct drm_device *dev = obj->dev; + + mutex_lock(&dev->struct_mutex); + pl111_drm_vm_close_locked(obj->dev, vma); + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); +} +#endif |