aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arm/small_task_packing.txt136
-rw-r--r--Documentation/devicetree/bindings/arm/cci.txt172
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.txt3
-rw-r--r--Documentation/devicetree/bindings/arm/rtsm-dcscb.txt19
-rw-r--r--Documentation/devicetree/bindings/mfd/vexpress-spc.txt35
-rw-r--r--Documentation/kernel-parameters.txt24
-rw-r--r--arch/arm/Kconfig128
-rw-r--r--arch/arm/boot/dts/Makefile9
-rw-r--r--arch/arm/boot/dts/clcd-panels.dtsi52
-rw-r--r--arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts159
-rw-r--r--arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts165
-rw-r--r--arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts177
-rw-r--r--arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts171
-rw-r--r--arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts183
-rw-r--r--arch/arm/boot/dts/rtsm_ve-motherboard.dtsi231
-rw-r--r--arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts244
-rw-r--r--arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts358
-rw-r--r--arch/arm/boot/dts/vexpress-v2m-rs1.dtsi1
-rw-r--r--arch/arm/boot/dts/vexpress-v2m.dtsi1
-rw-r--r--arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts4
-rw-r--r--arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts168
-rw-r--r--arch/arm/boot/dts/vexpress-v2p-ca5s.dts4
-rw-r--r--arch/arm/boot/dts/vexpress-v2p-ca9.dts4
-rw-r--r--arch/arm/common/Makefile4
-rw-r--r--arch/arm/common/bL_switcher.c864
-rw-r--r--arch/arm/common/bL_switcher_dummy_if.c71
-rw-r--r--arch/arm/common/mcpm_entry.c12
-rw-r--r--arch/arm/common/mcpm_head.S16
-rw-r--r--arch/arm/include/asm/arch_timer.h2
-rw-r--r--arch/arm/include/asm/bL_switcher.h83
-rw-r--r--arch/arm/include/asm/cacheflush.h46
-rw-r--r--arch/arm/include/asm/cp15.h14
-rw-r--r--arch/arm/include/asm/hardirq.h2
-rw-r--r--arch/arm/include/asm/mach/arch.h5
-rw-r--r--arch/arm/include/asm/mcpm.h8
-rw-r--r--arch/arm/include/asm/pmu.h12
-rw-r--r--arch/arm/include/asm/psci.h21
-rw-r--r--arch/arm/include/asm/smp.h2
-rw-r--r--arch/arm/include/asm/topology.h34
-rw-r--r--arch/arm/kernel/Makefile5
-rw-r--r--arch/arm/kernel/head.S1
-rw-r--r--arch/arm/kernel/hw_breakpoint.c3
-rw-r--r--arch/arm/kernel/perf_event.c19
-rw-r--r--arch/arm/kernel/perf_event_cpu.c117
-rw-r--r--arch/arm/kernel/perf_event_v7.c57
-rw-r--r--arch/arm/kernel/psci.c49
-rw-r--r--arch/arm/kernel/psci_smp.c84
-rw-r--r--arch/arm/kernel/setup.c22
-rw-r--r--arch/arm/kernel/sleep.S32
-rw-r--r--arch/arm/kernel/smp.c28
-rw-r--r--arch/arm/kernel/topology.c135
-rw-r--r--arch/arm/mach-exynos/mach-exynos5-dt.c26
-rw-r--r--arch/arm/mach-vexpress/Kconfig20
-rw-r--r--arch/arm/mach-vexpress/Makefile8
-rw-r--r--arch/arm/mach-vexpress/core.h2
-rw-r--r--arch/arm/mach-vexpress/dcscb.c236
-rw-r--r--arch/arm/mach-vexpress/dcscb_setup.S38
-rw-r--r--arch/arm/mach-vexpress/include/mach/tc2.h10
-rw-r--r--arch/arm/mach-vexpress/platsmp.c20
-rw-r--r--arch/arm/mach-vexpress/tc2_pm.c277
-rw-r--r--arch/arm/mach-vexpress/tc2_pm_psci.c173
-rw-r--r--arch/arm/mach-vexpress/tc2_pm_setup.S68
-rw-r--r--arch/arm/mach-vexpress/v2m.c29
-rw-r--r--arch/arm/mach-virt/Makefile1
-rw-r--r--arch/arm/mach-virt/platsmp.c50
-rw-r--r--arch/arm/mach-virt/virt.c3
-rw-r--r--arch/arm/mm/cache-v7.S14
-rw-r--r--arch/arm/mm/fault.c13
-rw-r--r--arch/arm/plat-versatile/headsmp.S2
-rw-r--r--arch/arm64/Kconfig135
-rw-r--r--arch/arm64/boot/dts/Makefile3
-rw-r--r--arch/arm64/boot/dts/fvp-base-gicv2-psci.dts287
-rw-r--r--arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi9
-rw-r--r--arch/arm64/include/asm/cmpxchg.h28
-rw-r--r--arch/arm64/include/asm/cpu_ops.h59
-rw-r--r--arch/arm64/include/asm/irq.h1
-rw-r--r--arch/arm64/include/asm/pgtable-3level-types.h2
-rw-r--r--arch/arm64/include/asm/psci.h19
-rw-r--r--arch/arm64/include/asm/smp.h15
-rw-r--r--arch/arm64/include/asm/topology.h73
-rw-r--r--arch/arm64/kernel/Makefile5
-rw-r--r--arch/arm64/kernel/cpu_ops.c99
-rw-r--r--arch/arm64/kernel/cputable.c2
-rw-r--r--arch/arm64/kernel/head.S12
-rw-r--r--arch/arm64/kernel/irq.c61
-rw-r--r--arch/arm64/kernel/process.c7
-rw-r--r--arch/arm64/kernel/psci.c87
-rw-r--r--arch/arm64/kernel/setup.c7
-rw-r--r--arch/arm64/kernel/smp.c233
-rw-r--r--arch/arm64/kernel/smp_psci.c53
-rw-r--r--arch/arm64/kernel/smp_spin_table.c86
-rw-r--r--arch/arm64/kernel/topology.c537
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/include/asm/prom.h3
-rw-r--r--arch/powerpc/kernel/prom.c43
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-ioc.c3
-rw-r--r--block/genhd.c12
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/Makefile2
-rw-r--r--drivers/bus/Kconfig7
-rw-r--r--drivers/bus/Makefile2
-rw-r--r--drivers/bus/arm-cci.c945
-rw-r--r--drivers/clk/Kconfig2
-rw-r--r--drivers/clk/versatile/Makefile2
-rw-r--r--drivers/clk/versatile/clk-vexpress-osc.c4
-rw-r--r--drivers/clk/versatile/clk-vexpress-spc.c131
-rw-r--r--drivers/cpufreq/Kconfig2
-rw-r--r--drivers/cpufreq/Kconfig.arm10
-rw-r--r--drivers/cpufreq/Makefile1
-rw-r--r--drivers/cpufreq/arm_big_little.c419
-rw-r--r--drivers/cpufreq/arm_big_little.h17
-rw-r--r--drivers/cpufreq/cpufreq_stats.c56
-rw-r--r--drivers/cpufreq/vexpress_big_little.c86
-rw-r--r--drivers/cpuidle/Makefile2
-rw-r--r--drivers/cpuidle/arm_big_little.c183
-rw-r--r--drivers/cpuidle/cpuidle-calxeda.c14
-rw-r--r--drivers/extcon/extcon-adc-jack.c3
-rw-r--r--drivers/extcon/extcon-gpio.c2
-rw-r--r--drivers/gator/Kconfig33
-rw-r--r--drivers/gator/LICENSE339
-rw-r--r--drivers/gator/Makefile76
-rw-r--r--drivers/gator/gator.h142
-rw-r--r--drivers/gator/gator_annotate.c186
-rw-r--r--drivers/gator/gator_annotate_kernel.c200
-rw-r--r--drivers/gator/gator_backtrace.c168
-rw-r--r--drivers/gator/gator_cookies.c433
-rw-r--r--drivers/gator/gator_events_armv6.c237
-rw-r--r--drivers/gator/gator_events_armv7.c312
-rw-r--r--drivers/gator/gator_events_block.c153
-rw-r--r--drivers/gator/gator_events_ccn-504.c346
-rw-r--r--drivers/gator/gator_events_irq.c165
-rw-r--r--drivers/gator/gator_events_l2c-310.c208
-rw-r--r--drivers/gator/gator_events_mali_4xx.c723
-rw-r--r--drivers/gator/gator_events_mali_4xx.h18
-rw-r--r--drivers/gator/gator_events_mali_common.c81
-rw-r--r--drivers/gator/gator_events_mali_common.h86
-rw-r--r--drivers/gator/gator_events_mali_t6xx.c560
-rw-r--r--drivers/gator/gator_events_mali_t6xx_hw.c784
-rw-r--r--drivers/gator/gator_events_mali_t6xx_hw_test.c55
-rw-r--r--drivers/gator/gator_events_meminfo.c387
-rw-r--r--drivers/gator/gator_events_mmapped.c209
-rw-r--r--drivers/gator/gator_events_net.c172
-rw-r--r--drivers/gator/gator_events_perf_pmu.c587
-rw-r--r--drivers/gator/gator_events_sched.c113
-rw-r--r--drivers/gator/gator_events_scorpion.c669
-rw-r--r--drivers/gator/gator_fs.c382
-rw-r--r--drivers/gator/gator_hrtimer_gator.c86
-rw-r--r--drivers/gator/gator_hrtimer_perf.c113
-rw-r--r--drivers/gator/gator_iks.c197
-rw-r--r--drivers/gator/gator_main.c1532
-rw-r--r--drivers/gator/gator_marshaling.c432
-rw-r--r--drivers/gator/gator_pack.c58
-rw-r--r--drivers/gator/gator_trace_gpu.c294
-rw-r--r--drivers/gator/gator_trace_gpu.h79
-rw-r--r--drivers/gator/gator_trace_power.c203
-rw-r--r--drivers/gator/gator_trace_sched.c270
-rw-r--r--drivers/gator/mali/mali_mjollnir_profiling_gator_api.h163
-rw-r--r--drivers/gator/mali/mali_utgard_profiling_gator_api.h201
-rw-r--r--drivers/gator/mali_t6xx.mk26
-rw-r--r--drivers/irqchip/irq-gic.c156
-rw-r--r--drivers/mfd/Kconfig10
-rw-r--r--drivers/mfd/Makefile1
-rw-r--r--drivers/mfd/vexpress-config.c61
-rw-r--r--drivers/mfd/vexpress-spc.c633
-rw-r--r--drivers/mfd/vexpress-sysreg.c12
-rw-r--r--drivers/net/ethernet/smsc/smc91x.c2
-rw-r--r--drivers/net/phy/phy.c9
-rw-r--r--drivers/of/base.c95
-rw-r--r--drivers/power/reset/Kconfig3
-rw-r--r--drivers/regulator/core.c5
-rw-r--r--drivers/video/Kconfig23
-rw-r--r--drivers/video/Makefile4
-rw-r--r--drivers/video/amba-clcd.c280
-rw-r--r--drivers/video/arm-hdlcd.c844
-rw-r--r--drivers/video/console/fbcon.c2
-rw-r--r--drivers/video/vexpress-dvi.c220
-rw-r--r--include/linux/arm-cci.h61
-rw-r--r--include/linux/arm-hdlcd.h122
-rw-r--r--include/linux/cpu.h1
-rw-r--r--include/linux/irqchip/arm-gic.h9
-rw-r--r--include/linux/of.h7
-rw-r--r--include/linux/sched.h13
-rw-r--r--include/linux/vexpress.h62
-rw-r--r--include/linux/vmstat.h2
-rw-r--r--include/linux/workqueue.h35
-rw-r--r--include/trace/events/arm-ipi.h100
-rw-r--r--include/trace/events/power_cpu_migrate.h67
-rw-r--r--include/trace/events/sched.h274
-rw-r--r--include/trace/events/smp.h90
-rw-r--r--kernel/auditfilter.c2
-rw-r--r--kernel/irq/irqdesc.c29
-rw-r--r--kernel/power/Kconfig20
-rw-r--r--kernel/sched/core.c27
-rw-r--r--kernel/sched/debug.c9
-rw-r--r--kernel/sched/fair.c1761
-rw-r--r--kernel/sched/sched.h13
-rw-r--r--kernel/smp.c12
-rw-r--r--kernel/workqueue.c26
-rw-r--r--linaro/configs/android.conf40
-rw-r--r--linaro/configs/arndale.conf66
-rw-r--r--linaro/configs/big-LITTLE-IKS.conf5
-rw-r--r--linaro/configs/big-LITTLE-MP.conf12
-rw-r--r--linaro/configs/debug.conf1
-rw-r--r--linaro/configs/distribution.conf44
-rw-r--r--linaro/configs/highbank.conf40
-rw-r--r--linaro/configs/kvm-guest.conf11
-rw-r--r--linaro/configs/kvm-host.conf11
-rw-r--r--linaro/configs/linaro-base.conf94
-rw-r--r--linaro/configs/omap4.conf194
l---------linaro/configs/ubuntu-minimal.conf1
-rw-r--r--linaro/configs/vexpress-tuning.conf1
-rw-r--r--linaro/configs/vexpress.conf63
-rw-r--r--linaro/configs/vexpress64.conf32
-rw-r--r--linaro/configs/xen.conf7
-rw-r--r--mm/vmstat.c95
-rw-r--r--sound/soc/soc-compress.c5
-rw-r--r--sound/soc/soc-jack.c2
-rw-r--r--sound/soc/soc-pcm.c5
-rw-r--r--tools/gator/daemon/Android.mk52
-rw-r--r--tools/gator/daemon/Buffer.cpp228
-rw-r--r--tools/gator/daemon/Buffer.h66
-rw-r--r--tools/gator/daemon/CapturedXML.cpp134
-rw-r--r--tools/gator/daemon/CapturedXML.h26
-rw-r--r--tools/gator/daemon/Child.cpp409
-rw-r--r--tools/gator/daemon/Child.h35
-rw-r--r--tools/gator/daemon/Collector.cpp221
-rw-r--r--tools/gator/daemon/Collector.h38
-rw-r--r--tools/gator/daemon/ConfigurationXML.cpp209
-rw-r--r--tools/gator/daemon/ConfigurationXML.h38
-rw-r--r--tools/gator/daemon/Counter.h61
-rw-r--r--tools/gator/daemon/Driver.cpp15
-rw-r--r--tools/gator/daemon/Driver.h48
-rw-r--r--tools/gator/daemon/EventsXML.cpp70
-rw-r--r--tools/gator/daemon/EventsXML.h18
-rw-r--r--tools/gator/daemon/Fifo.cpp130
-rw-r--r--tools/gator/daemon/Fifo.h48
-rw-r--r--tools/gator/daemon/Hwmon.cpp330
-rw-r--r--tools/gator/daemon/Hwmon.h43
-rw-r--r--tools/gator/daemon/KMod.cpp102
-rw-r--r--tools/gator/daemon/KMod.h27
-rw-r--r--tools/gator/daemon/LocalCapture.cpp129
-rw-r--r--tools/gator/daemon/LocalCapture.h26
-rw-r--r--tools/gator/daemon/Logging.cpp78
-rw-r--r--tools/gator/daemon/Logging.h47
-rw-r--r--tools/gator/daemon/Makefile25
-rw-r--r--tools/gator/daemon/Makefile_aarch6415
-rw-r--r--tools/gator/daemon/OlySocket.cpp268
-rw-r--r--tools/gator/daemon/OlySocket.h36
-rw-r--r--tools/gator/daemon/OlyUtility.cpp227
-rw-r--r--tools/gator/daemon/OlyUtility.h42
-rw-r--r--tools/gator/daemon/Sender.cpp135
-rw-r--r--tools/gator/daemon/Sender.h42
-rw-r--r--tools/gator/daemon/SessionData.cpp150
-rw-r--r--tools/gator/daemon/SessionData.h75
-rw-r--r--tools/gator/daemon/SessionXML.cpp109
-rw-r--r--tools/gator/daemon/SessionXML.h42
-rw-r--r--tools/gator/daemon/StreamlineSetup.cpp272
-rw-r--r--tools/gator/daemon/StreamlineSetup.h47
-rw-r--r--tools/gator/daemon/common.mk50
-rw-r--r--tools/gator/daemon/configuration.xml57
-rw-r--r--tools/gator/daemon/escape.c75
-rw-r--r--tools/gator/daemon/events-ARM11.xml39
-rw-r--r--tools/gator/daemon/events-ARM11MPCore.xml26
-rw-r--r--tools/gator/daemon/events-CCI-400.xml107
-rw-r--r--tools/gator/daemon/events-CCN-504.xml122
-rw-r--r--tools/gator/daemon/events-Cortex-A12.xml86
-rw-r--r--tools/gator/daemon/events-Cortex-A15.xml68
-rw-r--r--tools/gator/daemon/events-Cortex-A5.xml36
-rw-r--r--tools/gator/daemon/events-Cortex-A53.xml171
-rw-r--r--tools/gator/daemon/events-Cortex-A57.xml171
-rw-r--r--tools/gator/daemon/events-Cortex-A7.xml43
-rw-r--r--tools/gator/daemon/events-Cortex-A8.xml52
-rw-r--r--tools/gator/daemon/events-Cortex-A9.xml65
-rw-r--r--tools/gator/daemon/events-Krait-architected.xml22
-rw-r--r--tools/gator/daemon/events-L2C-310.xml18
-rw-r--r--tools/gator/daemon/events-Linux.xml17
-rw-r--r--tools/gator/daemon/events-Mali-4xx.xml251
-rw-r--r--tools/gator/daemon/events-Mali-T6xx.xml48
-rw-r--r--tools/gator/daemon/events-Mali-T6xx_hw.xml116
-rw-r--r--tools/gator/daemon/events-Scorpion.xml107
-rw-r--r--tools/gator/daemon/events-ScorpionMP.xml90
-rw-r--r--tools/gator/daemon/events_footer.xml1
-rw-r--r--tools/gator/daemon/events_header.xml2
-rw-r--r--tools/gator/daemon/libsensors/COPYING.LGPL502
-rw-r--r--tools/gator/daemon/libsensors/access.c561
-rw-r--r--tools/gator/daemon/libsensors/access.h33
-rw-r--r--tools/gator/daemon/libsensors/conf-lex.c2881
-rw-r--r--tools/gator/daemon/libsensors/conf-lex.l372
-rw-r--r--tools/gator/daemon/libsensors/conf-parse.c2042
-rw-r--r--tools/gator/daemon/libsensors/conf-parse.h84
-rw-r--r--tools/gator/daemon/libsensors/conf-parse.y347
-rw-r--r--tools/gator/daemon/libsensors/conf.h34
-rw-r--r--tools/gator/daemon/libsensors/data.c278
-rw-r--r--tools/gator/daemon/libsensors/data.h184
-rw-r--r--tools/gator/daemon/libsensors/error.c92
-rw-r--r--tools/gator/daemon/libsensors/error.h74
-rw-r--r--tools/gator/daemon/libsensors/general.c85
-rw-r--r--tools/gator/daemon/libsensors/general.h39
-rw-r--r--tools/gator/daemon/libsensors/init.c341
-rw-r--r--tools/gator/daemon/libsensors/init.h28
-rw-r--r--tools/gator/daemon/libsensors/scanner.h32
-rw-r--r--tools/gator/daemon/libsensors/sensors.h311
-rw-r--r--tools/gator/daemon/libsensors/sysfs.c926
-rw-r--r--tools/gator/daemon/libsensors/sysfs.h43
-rw-r--r--tools/gator/daemon/libsensors/version.h1
-rw-r--r--tools/gator/daemon/main.cpp485
-rw-r--r--tools/gator/daemon/mxml/COPYING507
-rw-r--r--tools/gator/daemon/mxml/config.h96
-rw-r--r--tools/gator/daemon/mxml/mxml-attr.c319
-rw-r--r--tools/gator/daemon/mxml/mxml-entity.c460
-rw-r--r--tools/gator/daemon/mxml/mxml-file.c3082
-rw-r--r--tools/gator/daemon/mxml/mxml-get.c471
-rw-r--r--tools/gator/daemon/mxml/mxml-index.c662
-rw-r--r--tools/gator/daemon/mxml/mxml-node.c807
-rw-r--r--tools/gator/daemon/mxml/mxml-private.c331
-rw-r--r--tools/gator/daemon/mxml/mxml-private.h50
-rw-r--r--tools/gator/daemon/mxml/mxml-search.c287
-rw-r--r--tools/gator/daemon/mxml/mxml-set.c349
-rw-r--r--tools/gator/daemon/mxml/mxml-string.c476
-rw-r--r--tools/gator/daemon/mxml/mxml.h329
-rw-r--r--tools/lib/lk/Makefile3
322 files changed, 49672 insertions, 557 deletions
diff --git a/Documentation/arm/small_task_packing.txt b/Documentation/arm/small_task_packing.txt
new file mode 100644
index 0000000..43f0a8b
--- /dev/null
+++ b/Documentation/arm/small_task_packing.txt
@@ -0,0 +1,136 @@
+Small Task Packing in the big.LITTLE MP Reference Patch Set
+
+What is small task packing?
+----
+Simply that the scheduler will fit as many small tasks on a single CPU
+as possible before using other CPUs. A small task is defined as one
+whose tracked load is less than 90% of a NICE_0 task. This is a change
+from the usual behavior since the scheduler will normally use an idle
+CPU for a waking task unless that task is considered cache hot.
+
+
+How is it implemented?
+----
+Since all small tasks must wake up relatively frequently, the main
+requirement for packing small tasks is to select a partly-busy CPU when
+waking rather than looking for an idle CPU. We use the tracked load of
+the CPU runqueue to determine how heavily loaded each CPU is and the
+tracked load of the task to determine if it will fit on the CPU. We
+always start with the lowest-numbered CPU in a sched domain and stop
+looking when we find a CPU with enough space for the task.
+
+Some further tweaks are necessary to suppress load balancing when the
+CPU is not fully loaded, otherwise the scheduler attempts to spread
+tasks evenly across the domain.
+
+
+How does it interact with the HMP patches?
+----
+Firstly, we only enable packing on the little domain. The intent is that
+the big domain is intended to spread tasks amongst the available CPUs
+one-task-per-CPU. The little domain however is attempting to use as
+little power as possible while servicing its tasks.
+
+Secondly, since we offload big tasks onto little CPUs in order to try
+to devote one CPU to each task, we have a threshold above which we do
+not try to pack a task and instead will select an idle CPU if possible.
+This maintains maximum forward progress for busy tasks temporarily
+demoted from big CPUs.
+
+
+Can the behaviour be tuned?
+----
+Yes, the load level of a 'full' CPU can be easily modified in the source
+and is exposed through sysfs as /sys/kernel/hmp/packing_limit to be
+changed at runtime. The presence of the packing behaviour is controlled
+by CONFIG_SCHED_HMP_LITTLE_PACKING and can be disabled at run-time
+using /sys/kernel/hmp/packing_enable.
+The definition of a small task is hard coded as 90% of NICE_0_LOAD
+and cannot be modified at run time.
+
+
+Why do I need to tune it?
+----
+The optimal configuration is likely to be different depending upon the
+design and manufacturing of your SoC.
+
+In the main, there are two system effects from enabling small task
+packing.
+
+1. CPU operating point may increase
+2. wakeup latency of tasks may be increased
+
+There are also likely to be secondary effects from loading one CPU
+rather than spreading tasks.
+
+Note that all of these system effects are dependent upon the workload
+under consideration.
+
+
+CPU Operating Point
+----
+The primary impact of loading one CPU with a number of light tasks is to
+increase the compute requirement of that CPU since it is no longer idle
+as often. Increased compute requirement causes an increase in the
+frequency of the CPU through CPUfreq.
+
+Consider this example:
+We have a system with 3 CPUs which can operate at any frequency between
+350MHz and 1GHz. The system has 6 tasks which would each produce 10%
+load at 1GHz. The scheduler has frequency-invariant load scaling
+enabled. Our DVFS governor aims for 80% utilization at the chosen
+frequency.
+
+Without task packing, these tasks will be spread out amongst all CPUs
+such that each has 2. This will produce roughly 20% system load, and
+the frequency of the package will remain at 350MHz.
+
+With task packing set to the default packing_limit, all of these tasks
+will sit on one CPU and require a package frequency of ~750MHz to reach
+80% utilization. (0.75 = 0.6 * 0.8).
+
+When a package operates on a single frequency domain, all CPUs in that
+package share frequency and voltage.
+
+Depending upon the SoC implementation there can be a significant amount
+of energy lost to leakage from idle CPUs. The decision about how
+loaded a CPU must be to be considered 'full' is therefore controllable
+through sysfs (sys/kernel/hmp/packing_limit) and directly in the code.
+
+Continuing the example, lets set packing_limit to 450 which means we
+will pack tasks until the total load of all running tasks >= 450. In
+practise, this is very similar to a 55% idle 1Ghz CPU.
+
+Now we are only able to place 4 tasks on CPU0, and two will overflow
+onto CPU1. CPU0 will have a load of 40% and CPU1 will have a load of
+20%. In order to still hit 80% utilization, CPU0 now only needs to
+operate at (0.4*0.8=0.32) 320MHz, which means that the lowest operating
+point will be selected, the same as in the non-packing case, except that
+now CPU2 is no longer needed and can be power-gated.
+
+In order to use less energy, the saving from power-gating CPU2 must be
+more than the energy spent running CPU0 for the extra cycles. This
+depends upon the SoC implementation.
+
+This is obviously a contrived example requiring all the tasks to
+be runnable at the same time, but it illustrates the point.
+
+
+Wakeup Latency
+----
+This is an unavoidable consequence of trying to pack tasks together
+rather than giving them a CPU each. If you cannot find an acceptable
+level of wakeup latency, you should turn packing off.
+
+Cyclictest is a good test application for determining the added latency
+when configuring packing.
+
+
+Why is it turned off for the VersatileExpress V2P_CA15A7 CoreTile?
+----
+Simply, this core tile only has power gating for the whole A7 package.
+When small task packing is enabled, all our low-energy use cases
+normally fit onto one A7 CPU. We therefore end up with 2 mostly-idle
+CPUs and one mostly-busy CPU. This decreases the amount of time
+available where the whole package is idle and can be turned off.
+
diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt
new file mode 100644
index 0000000..92d36e2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cci.txt
@@ -0,0 +1,172 @@
+=======================================================
+ARM CCI cache coherent interconnect binding description
+=======================================================
+
+ARM multi-cluster systems maintain intra-cluster coherency through a
+cache coherent interconnect (CCI) that is capable of monitoring bus
+transactions and manage coherency, TLB invalidations and memory barriers.
+
+It allows snooping and distributed virtual memory message broadcast across
+clusters, through memory mapped interface, with a global control register
+space and multiple sets of interface control registers, one per slave
+interface.
+
+Bindings for the CCI node follow the ePAPR standard, available from:
+
+www.power.org/documentation/epapr-version-1-1/
+
+with the addition of the bindings described in this document which are
+specific to ARM.
+
+* CCI interconnect node
+
+ Description: Describes a CCI cache coherent Interconnect component
+
+ Node name must be "cci".
+ Node's parent must be the root node /, and the address space visible
+ through the CCI interconnect is the same as the one seen from the
+ root node (ie from CPUs perspective as per DT standard).
+ Every CCI node has to define the following properties:
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: must be set to
+ "arm,cci-400"
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies base physical
+ address of CCI control registers common to all
+ interfaces.
+
+ - ranges:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Follow rules in the ePAPR for
+ hierarchical bus addressing. CCI interfaces
+ addresses refer to the parent node addressing
+ scheme to declare their register bases.
+
+ CCI interconnect node can define the following child nodes:
+
+ - CCI control interface nodes
+
+ Node name must be "slave-if".
+ Parent node must be CCI interconnect node.
+
+ A CCI control interface node must contain the following
+ properties:
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: must be set to
+ "arm,cci-400-ctrl-if"
+
+ - interface-type:
+ Usage: required
+ Value type: <string>
+ Definition: must be set to one of {"ace", "ace-lite"}
+ depending on the interface type the node
+ represents.
+
+ - reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the base address and size of the
+ corresponding interface programming
+ registers.
+
+* CCI interconnect bus masters
+
+ Description: masters in the device tree connected to a CCI port
+ (inclusive of CPUs and their cpu nodes).
+
+ A CCI interconnect bus master node must contain the following
+ properties:
+
+ - cci-control-port:
+ Usage: required
+ Value type: <phandle>
+ Definition: a phandle containing the CCI control interface node
+ the master is connected to.
+
+Example:
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ CPU0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ cci-control-port = <&cci_control1>;
+ reg = <0x0>;
+ };
+
+ CPU1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ cci-control-port = <&cci_control1>;
+ reg = <0x1>;
+ };
+
+ CPU2: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ cci-control-port = <&cci_control2>;
+ reg = <0x100>;
+ };
+
+ CPU3: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ cci-control-port = <&cci_control2>;
+ reg = <0x101>;
+ };
+
+ };
+
+ dma0: dma@3000000 {
+ compatible = "arm,pl330", "arm,primecell";
+ cci-control-port = <&cci_control0>;
+ reg = <0x0 0x3000000 0x0 0x1000>;
+ interrupts = <10>;
+ #dma-cells = <1>;
+ #dma-channels = <8>;
+ #dma-requests = <32>;
+ };
+
+ cci@2c090000 {
+ compatible = "arm,cci-400";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x0 0x2c090000 0 0x1000>;
+ ranges = <0x0 0x0 0x2c090000 0x6000>;
+
+ cci_control0: slave-if@1000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace-lite";
+ reg = <0x1000 0x1000>;
+ };
+
+ cci_control1: slave-if@4000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x4000 0x1000>;
+ };
+
+ cci_control2: slave-if@5000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x5000 0x1000>;
+ };
+ };
+
+This CCI node corresponds to a CCI component whose control registers sits
+at address 0x000000002c090000.
+CCI slave interface @0x000000002c091000 is connected to dma controller dma0.
+CCI slave interface @0x000000002c094000 is connected to CPUs {CPU0, CPU1};
+CCI slave interface @0x000000002c095000 is connected to CPUs {CPU2, CPU3};
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 343781b..4ce82d0 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -16,6 +16,9 @@ Required properties:
"arm,arm1176-pmu"
"arm,arm1136-pmu"
- interrupts : 1 combined interrupt or 1 per core.
+- cluster : a phandle to the cluster to which it belongs
+ If there are more than one cluster with same CPU type
+ then there should be separate PMU nodes per cluster.
Example:
diff --git a/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt
new file mode 100644
index 0000000..3b8fbf3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt
@@ -0,0 +1,19 @@
+ARM Dual Cluster System Configuration Block
+-------------------------------------------
+
+The Dual Cluster System Configuration Block (DCSCB) provides basic
+functionality for controlling clocks, resets and configuration pins in
+the Dual Cluster System implemented by the Real-Time System Model (RTSM).
+
+Required properties:
+
+- compatible : should be "arm,rtsm,dcscb"
+
+- reg : physical base address and the size of the registers window
+
+Example:
+
+ dcscb@60000000 {
+ compatible = "arm,rtsm,dcscb";
+ reg = <0x60000000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/mfd/vexpress-spc.txt b/Documentation/devicetree/bindings/mfd/vexpress-spc.txt
new file mode 100644
index 0000000..1d71dc2
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/vexpress-spc.txt
@@ -0,0 +1,35 @@
+* ARM Versatile Express Serial Power Controller device tree bindings
+
+Latest ARM development boards implement a power management interface (serial
+power controller - SPC) that is capable of managing power/voltage and
+operating point transitions, through memory mapped registers interface.
+
+On testchips like TC2 it also provides a configuration interface that can
+be used to read/write values which cannot be read/written through simple
+memory mapped reads/writes.
+
+- spc node
+
+ - compatible:
+ Usage: required
+ Value type: <stringlist>
+ Definition: must be
+ "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc"
+ - reg:
+ Usage: required
+ Value type: <prop-encode-array>
+ Definition: A standard property that specifies the base address
+ and the size of the SPC address space
+ - interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: SPC interrupt configuration. A standard property
+ that follows ePAPR interrupts specifications
+
+Example:
+
+spc: spc@7fff0000 {
+ compatible = "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc";
+ reg = <0 0x7FFF0000 0 0x1000>;
+ interrupts = <0 95 4>;
+};
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1311a48..15b24a2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1240,6 +1240,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
See comment before ip2_setup() in
drivers/char/ip2/ip2base.c.
+ irqaffinity= [SMP] Set the default irq affinity mask
+ Format:
+ <cpu number>,...,<cpu number>
+ or
+ <cpu number>-<cpu number>
+ (must be a positive range in ascending order)
+ or a mixture
+ <cpu number>,...,<cpu number>-<cpu number>
+
irqfixup [HW]
When an interrupt is not handled search all handlers
for it. Intended to get systems with badly broken
@@ -3345,6 +3354,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
that this also can be controlled per-workqueue for
workqueues visible under /sys/bus/workqueue/.
+ workqueue.power_efficient
+ Per-cpu workqueues are generally preferred because
+ they show better performance thanks to cache
+ locality; unfortunately, per-cpu workqueues tend to
+ be more power hungry than unbound workqueues.
+
+ Enabling this makes the per-cpu workqueues which
+ were observed to contribute significantly to power
+ consumption unbound, leading to measurably lower
+ power usage at the cost of small performance
+ overhead.
+
+ The default value of this parameter is determined by
+ the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
+
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 18a9f5e..1116be5 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1494,6 +1494,109 @@ config SCHED_SMT
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.
+config DISABLE_CPU_SCHED_DOMAIN_BALANCE
+ bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
+ help
+ Disables scheduler load-balancing at CPU sched domain level.
+
+config SCHED_HMP
+ bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
+ depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
+ help
+ Experimental scheduler optimizations for heterogeneous platforms.
+ Attempts to introspectively select task affinity to optimize power
+ and performance. Basic support for multiple (>2) cpu types is in place,
+ but it has only been tested with two types of cpus.
+ There is currently no support for migration of task groups, hence
+ !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
+ between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
+ When turned on, this option adds sys/kernel/hmp directory which
+ contains the following files:
+ up_threshold - the load average threshold used for up migration
+ (0 - 1023)
+ down_threshold - the load average threshold used for down migration
+ (0 - 1023)
+ hmp_domains - a list of cpumasks for the present HMP domains,
+ starting with the 'biggest' and ending with the
+ 'smallest'.
+ Note that both the threshold files can be written at runtime to
+ control scheduler behaviour.
+
+config SCHED_HMP_PRIO_FILTER
+ bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
+ depends on SCHED_HMP
+ help
+ Enables task priority based HMP migration filter. Any task with
+ a NICE value above the threshold will always be on low-power cpus
+ with less compute capacity.
+
+config SCHED_HMP_PRIO_FILTER_VAL
+ int "NICE priority threshold"
+ default 5
+ depends on SCHED_HMP_PRIO_FILTER
+
+config HMP_FAST_CPU_MASK
+ string "HMP scheduler fast CPU mask"
+ depends on SCHED_HMP
+ help
+ Leave empty to use device tree information.
+ Specify the cpuids of the fast CPUs in the system as a list string,
+ e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_SLOW_CPU_MASK
+ string "HMP scheduler slow CPU mask"
+ depends on SCHED_HMP
+ help
+ Leave empty to use device tree information.
+ Specify the cpuids of the slow CPUs in the system as a list string,
+ e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_VARIABLE_SCALE
+ bool "Allows changing the load tracking scale through sysfs"
+ depends on SCHED_HMP
+ help
+ When turned on, this option exports the load average period value
+ for the load tracking patches through sysfs.
+ The values can be modified to change the rate of load accumulation
+ used for HMP migration. 'load_avg_period_ms' is the time in ms to
+ reach a load average of 0.5 for an idle task of 0 load average
+ ratio which becomes 100% busy.
+ For example, with load_avg_period_ms = 128 and up_threshold = 512,
+ a running task with a load of 0 will be migrated to a bigger CPU after
+ 128ms, because after 128ms its load_avg_ratio is 0.5 and the real
+ up_threshold is 0.5.
+ This patch has the same behavior as changing the Y of the load
+ average computation to
+ (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
+ but removes intermediate overflows in computation.
+
+config HMP_FREQUENCY_INVARIANT_SCALE
+ bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
+ depends on SCHED_HMP && CPU_FREQ
+ help
+ Scales the current load contribution in line with the frequency
+ of the CPU that the task was executed on.
+ In this version, we use a simple linear scale derived from the
+ maximum frequency reported by CPUFreq.
+ Restricting tracked load to be scaled by the CPU's frequency
+ represents the consumption of possible compute capacity
+ (rather than consumption of actual instantaneous capacity as
+ normal) and allows the HMP migration's simple threshold
+ migration strategy to interact more predictably with CPUFreq's
+ asynchronous compute capacity changes.
+
+config SCHED_HMP_LITTLE_PACKING
+ bool "Small task packing for HMP"
+ depends on SCHED_HMP
+ default n
+ help
+ Allows the HMP Scheduler to pack small tasks into CPUs in the
+ smallest HMP domain.
+ Controlled by two sysfs files in sys/kernel/hmp.
+ packing_enable: 1 to enable, 0 to disable packing. Default 1.
+ packing_limit: runqueue load ratio where a RQ is considered
+ to be full. Default is NICE_0_LOAD * 9/8.
+
config HAVE_ARM_SCU
bool
help
@@ -1521,6 +1624,31 @@ config MCPM
for (multi-)cluster based systems, such as big.LITTLE based
systems.
+config BIG_LITTLE
+ bool "big.LITTLE support (Experimental)"
+ depends on CPU_V7 && SMP
+ select MCPM
+ help
+ This option enables support for the big.LITTLE architecture.
+
+config BL_SWITCHER
+ bool "big.LITTLE switcher support"
+ depends on BIG_LITTLE && MCPM && HOTPLUG_CPU
+ select CPU_PM
+ select ARM_CPU_SUSPEND
+ help
+ The big.LITTLE "switcher" provides the core functionality to
+ transparently handle transition between a cluster of A15's
+ and a cluster of A7's in a big.LITTLE system.
+
+config BL_SWITCHER_DUMMY_IF
+ tristate "Simple big.LITTLE switcher user interface"
+ depends on BL_SWITCHER && DEBUG_KERNEL
+ help
+ This is a simple and dummy char dev interface to control
+ the big.LITTLE switcher core code. It is meant for
+ debugging purposes only.
+
choice
prompt "Memory split"
default VMSPLIT_3G
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index f0895c5..00baf9f 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -202,7 +202,14 @@ dtb-$(CONFIG_ARCH_VERSATILE) += versatile-ab.dtb \
dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2p-ca5s.dtb \
vexpress-v2p-ca9.dtb \
vexpress-v2p-ca15-tc1.dtb \
- vexpress-v2p-ca15_a7.dtb
+ vexpress-v2p-ca15_a7.dtb \
+ rtsm_ve-cortex_a9x2.dtb \
+ rtsm_ve-cortex_a9x4.dtb \
+ rtsm_ve-cortex_a15x1.dtb \
+ rtsm_ve-cortex_a15x2.dtb \
+ rtsm_ve-cortex_a15x4.dtb \
+ rtsm_ve-v2p-ca15x1-ca7x1.dtb \
+ rtsm_ve-v2p-ca15x4-ca7x4.dtb
dtb-$(CONFIG_ARCH_VIRT) += xenvm-4.2.dtb
dtb-$(CONFIG_ARCH_VT8500) += vt8500-bv07.dtb \
wm8505-ref.dtb \
diff --git a/arch/arm/boot/dts/clcd-panels.dtsi b/arch/arm/boot/dts/clcd-panels.dtsi
new file mode 100644
index 0000000..0b0ff6e
--- /dev/null
+++ b/arch/arm/boot/dts/clcd-panels.dtsi
@@ -0,0 +1,52 @@
+/*
+ * ARM Ltd. Versatile Express
+ *
+ */
+
+/ {
+ panels {
+ panel@0 {
+ compatible = "panel";
+ mode = "VGA";
+ refresh = <60>;
+ xres = <640>;
+ yres = <480>;
+ pixclock = <39721>;
+ left_margin = <40>;
+ right_margin = <24>;
+ upper_margin = <32>;
+ lower_margin = <11>;
+ hsync_len = <96>;
+ vsync_len = <2>;
+ sync = <0>;
+ vmode = "FB_VMODE_NONINTERLACED";
+
+ tim2 = "TIM2_BCD", "TIM2_IPC";
+ cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+ caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+ bpp = <16>;
+ };
+
+ panel@1 {
+ compatible = "panel";
+ mode = "XVGA";
+ refresh = <60>;
+ xres = <1024>;
+ yres = <768>;
+ pixclock = <15748>;
+ left_margin = <152>;
+ right_margin = <48>;
+ upper_margin = <23>;
+ lower_margin = <3>;
+ hsync_len = <104>;
+ vsync_len = <4>;
+ sync = <0>;
+ vmode = "FB_VMODE_NONINTERLACED";
+
+ tim2 = "TIM2_BCD", "TIM2_IPC";
+ cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+ caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+ bpp = <16>;
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts
new file mode 100644
index 0000000..c9eee91
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts
@@ -0,0 +1,159 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x1CT
+ *
+ * RTSM_VE_Cortex_A15x1.lisa
+ */
+
+/dts-v1/;
+
+/ {
+ model = "RTSM_VE_CortexA15x1";
+ arm,vexpress,site = <0xf>;
+ compatible = "arm,rtsm_ve,cortex_a15x1", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ serial1 = &v2m_serial1;
+ serial2 = &v2m_serial2;
+ serial3 = &v2m_serial3;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0>;
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0 0x80000000 0 0x80000000>;
+ };
+
+ gic: interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0 0x2c001000 0 0x1000>,
+ <0 0x2c002000 0 0x1000>,
+ <0 0x2c004000 0 0x2000>,
+ <0 0x2c006000 0 0x2000>;
+ interrupts = <1 9 0xf04>;
+ };
+
+ timer {
+ compatible = "arm,armv7-timer";
+ interrupts = <1 13 0xf08>,
+ <1 14 0xf08>,
+ <1 11 0xf08>,
+ <1 10 0xf08>;
+ };
+
+ dcc {
+ compatible = "arm,vexpress,config-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ osc@0 {
+ /* ACLK clock to the AXI master port on the test chip */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 0>;
+ freq-range = <30000000 50000000>;
+ #clock-cells = <0>;
+ clock-output-names = "extsaxiclk";
+ };
+
+ oscclk1: osc@1 {
+ /* Reference clock for the CLCD */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 1>;
+ freq-range = <10000000 80000000>;
+ #clock-cells = <0>;
+ clock-output-names = "clcdclk";
+ };
+
+ smbclk: oscclk2: osc@2 {
+ /* Reference clock for the test chip internal PLLs */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 2>;
+ freq-range = <33000000 100000000>;
+ #clock-cells = <0>;
+ clock-output-names = "tcrefclk";
+ };
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x08000000 0x04000000>,
+ <1 0 0 0x14000000 0x04000000>,
+ <2 0 0 0x18000000 0x04000000>,
+ <3 0 0 0x1c000000 0x04000000>,
+ <4 0 0 0x0c000000 0x04000000>,
+ <5 0 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <0 0 0 &gic 0 0 4>,
+ <0 0 1 &gic 0 1 4>,
+ <0 0 2 &gic 0 2 4>,
+ <0 0 3 &gic 0 3 4>,
+ <0 0 4 &gic 0 4 4>,
+ <0 0 5 &gic 0 5 4>,
+ <0 0 6 &gic 0 6 4>,
+ <0 0 7 &gic 0 7 4>,
+ <0 0 8 &gic 0 8 4>,
+ <0 0 9 &gic 0 9 4>,
+ <0 0 10 &gic 0 10 4>,
+ <0 0 11 &gic 0 11 4>,
+ <0 0 12 &gic 0 12 4>,
+ <0 0 13 &gic 0 13 4>,
+ <0 0 14 &gic 0 14 4>,
+ <0 0 15 &gic 0 15 4>,
+ <0 0 16 &gic 0 16 4>,
+ <0 0 17 &gic 0 17 4>,
+ <0 0 18 &gic 0 18 4>,
+ <0 0 19 &gic 0 19 4>,
+ <0 0 20 &gic 0 20 4>,
+ <0 0 21 &gic 0 21 4>,
+ <0 0 22 &gic 0 22 4>,
+ <0 0 23 &gic 0 23 4>,
+ <0 0 24 &gic 0 24 4>,
+ <0 0 25 &gic 0 25 4>,
+ <0 0 26 &gic 0 26 4>,
+ <0 0 27 &gic 0 27 4>,
+ <0 0 28 &gic 0 28 4>,
+ <0 0 29 &gic 0 29 4>,
+ <0 0 30 &gic 0 30 4>,
+ <0 0 31 &gic 0 31 4>,
+ <0 0 32 &gic 0 32 4>,
+ <0 0 33 &gic 0 33 4>,
+ <0 0 34 &gic 0 34 4>,
+ <0 0 35 &gic 0 35 4>,
+ <0 0 36 &gic 0 36 4>,
+ <0 0 37 &gic 0 37 4>,
+ <0 0 38 &gic 0 38 4>,
+ <0 0 39 &gic 0 39 4>,
+ <0 0 40 &gic 0 40 4>,
+ <0 0 41 &gic 0 41 4>,
+ <0 0 42 &gic 0 42 4>;
+
+ /include/ "rtsm_ve-motherboard.dtsi"
+ };
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts
new file mode 100644
index 0000000..853a166
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts
@@ -0,0 +1,165 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x2CT
+ *
+ * RTSM_VE_Cortex_A15x2.lisa
+ */
+
+/dts-v1/;
+
+/ {
+ model = "RTSM_VE_CortexA15x2";
+ arm,vexpress,site = <0xf>;
+ compatible = "arm,rtsm_ve,cortex_a15x2", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ serial1 = &v2m_serial1;
+ serial2 = &v2m_serial2;
+ serial3 = &v2m_serial3;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <1>;
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0 0x80000000 0 0x80000000>;
+ };
+
+ gic: interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0 0x2c001000 0 0x1000>,
+ <0 0x2c002000 0 0x1000>,
+ <0 0x2c004000 0 0x2000>,
+ <0 0x2c006000 0 0x2000>;
+ interrupts = <1 9 0xf04>;
+ };
+
+ timer {
+ compatible = "arm,armv7-timer";
+ interrupts = <1 13 0xf08>,
+ <1 14 0xf08>,
+ <1 11 0xf08>,
+ <1 10 0xf08>;
+ };
+
+ dcc {
+ compatible = "arm,vexpress,config-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ osc@0 {
+ /* ACLK clock to the AXI master port on the test chip */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 0>;
+ freq-range = <30000000 50000000>;
+ #clock-cells = <0>;
+ clock-output-names = "extsaxiclk";
+ };
+
+ oscclk1: osc@1 {
+ /* Reference clock for the CLCD */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 1>;
+ freq-range = <10000000 80000000>;
+ #clock-cells = <0>;
+ clock-output-names = "clcdclk";
+ };
+
+ smbclk: oscclk2: osc@2 {
+ /* Reference clock for the test chip internal PLLs */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 2>;
+ freq-range = <33000000 100000000>;
+ #clock-cells = <0>;
+ clock-output-names = "tcrefclk";
+ };
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x08000000 0x04000000>,
+ <1 0 0 0x14000000 0x04000000>,
+ <2 0 0 0x18000000 0x04000000>,
+ <3 0 0 0x1c000000 0x04000000>,
+ <4 0 0 0x0c000000 0x04000000>,
+ <5 0 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <0 0 0 &gic 0 0 4>,
+ <0 0 1 &gic 0 1 4>,
+ <0 0 2 &gic 0 2 4>,
+ <0 0 3 &gic 0 3 4>,
+ <0 0 4 &gic 0 4 4>,
+ <0 0 5 &gic 0 5 4>,
+ <0 0 6 &gic 0 6 4>,
+ <0 0 7 &gic 0 7 4>,
+ <0 0 8 &gic 0 8 4>,
+ <0 0 9 &gic 0 9 4>,
+ <0 0 10 &gic 0 10 4>,
+ <0 0 11 &gic 0 11 4>,
+ <0 0 12 &gic 0 12 4>,
+ <0 0 13 &gic 0 13 4>,
+ <0 0 14 &gic 0 14 4>,
+ <0 0 15 &gic 0 15 4>,
+ <0 0 16 &gic 0 16 4>,
+ <0 0 17 &gic 0 17 4>,
+ <0 0 18 &gic 0 18 4>,
+ <0 0 19 &gic 0 19 4>,
+ <0 0 20 &gic 0 20 4>,
+ <0 0 21 &gic 0 21 4>,
+ <0 0 22 &gic 0 22 4>,
+ <0 0 23 &gic 0 23 4>,
+ <0 0 24 &gic 0 24 4>,
+ <0 0 25 &gic 0 25 4>,
+ <0 0 26 &gic 0 26 4>,
+ <0 0 27 &gic 0 27 4>,
+ <0 0 28 &gic 0 28 4>,
+ <0 0 29 &gic 0 29 4>,
+ <0 0 30 &gic 0 30 4>,
+ <0 0 31 &gic 0 31 4>,
+ <0 0 32 &gic 0 32 4>,
+ <0 0 33 &gic 0 33 4>,
+ <0 0 34 &gic 0 34 4>,
+ <0 0 35 &gic 0 35 4>,
+ <0 0 36 &gic 0 36 4>,
+ <0 0 37 &gic 0 37 4>,
+ <0 0 38 &gic 0 38 4>,
+ <0 0 39 &gic 0 39 4>,
+ <0 0 40 &gic 0 40 4>,
+ <0 0 41 &gic 0 41 4>,
+ <0 0 42 &gic 0 42 4>;
+
+ /include/ "rtsm_ve-motherboard.dtsi"
+ };
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts
new file mode 100644
index 0000000..c1947a3
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts
@@ -0,0 +1,177 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x4CT
+ *
+ * RTSM_VE_Cortex_A15x4.lisa
+ */
+
+/dts-v1/;
+
+/ {
+ model = "RTSM_VE_CortexA15x4";
+ arm,vexpress,site = <0xf>;
+ compatible = "arm,rtsm_ve,cortex_a15x4", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ serial1 = &v2m_serial1;
+ serial2 = &v2m_serial2;
+ serial3 = &v2m_serial3;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <1>;
+ };
+
+ cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <2>;
+ };
+
+ cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <3>;
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0 0x80000000 0 0x80000000>;
+ };
+
+ gic: interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0 0x2c001000 0 0x1000>,
+ <0 0x2c002000 0 0x1000>,
+ <0 0x2c004000 0 0x2000>,
+ <0 0x2c006000 0 0x2000>;
+ interrupts = <1 9 0xf04>;
+ };
+
+ timer {
+ compatible = "arm,armv7-timer";
+ interrupts = <1 13 0xf08>,
+ <1 14 0xf08>,
+ <1 11 0xf08>,
+ <1 10 0xf08>;
+ };
+
+ dcc {
+ compatible = "arm,vexpress,config-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ osc@0 {
+ /* ACLK clock to the AXI master port on the test chip */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 0>;
+ freq-range = <30000000 50000000>;
+ #clock-cells = <0>;
+ clock-output-names = "extsaxiclk";
+ };
+
+ oscclk1: osc@1 {
+ /* Reference clock for the CLCD */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 1>;
+ freq-range = <10000000 80000000>;
+ #clock-cells = <0>;
+ clock-output-names = "clcdclk";
+ };
+
+ smbclk: oscclk2: osc@2 {
+ /* Reference clock for the test chip internal PLLs */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 2>;
+ freq-range = <33000000 100000000>;
+ #clock-cells = <0>;
+ clock-output-names = "tcrefclk";
+ };
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x08000000 0x04000000>,
+ <1 0 0 0x14000000 0x04000000>,
+ <2 0 0 0x18000000 0x04000000>,
+ <3 0 0 0x1c000000 0x04000000>,
+ <4 0 0 0x0c000000 0x04000000>,
+ <5 0 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <0 0 0 &gic 0 0 4>,
+ <0 0 1 &gic 0 1 4>,
+ <0 0 2 &gic 0 2 4>,
+ <0 0 3 &gic 0 3 4>,
+ <0 0 4 &gic 0 4 4>,
+ <0 0 5 &gic 0 5 4>,
+ <0 0 6 &gic 0 6 4>,
+ <0 0 7 &gic 0 7 4>,
+ <0 0 8 &gic 0 8 4>,
+ <0 0 9 &gic 0 9 4>,
+ <0 0 10 &gic 0 10 4>,
+ <0 0 11 &gic 0 11 4>,
+ <0 0 12 &gic 0 12 4>,
+ <0 0 13 &gic 0 13 4>,
+ <0 0 14 &gic 0 14 4>,
+ <0 0 15 &gic 0 15 4>,
+ <0 0 16 &gic 0 16 4>,
+ <0 0 17 &gic 0 17 4>,
+ <0 0 18 &gic 0 18 4>,
+ <0 0 19 &gic 0 19 4>,
+ <0 0 20 &gic 0 20 4>,
+ <0 0 21 &gic 0 21 4>,
+ <0 0 22 &gic 0 22 4>,
+ <0 0 23 &gic 0 23 4>,
+ <0 0 24 &gic 0 24 4>,
+ <0 0 25 &gic 0 25 4>,
+ <0 0 26 &gic 0 26 4>,
+ <0 0 27 &gic 0 27 4>,
+ <0 0 28 &gic 0 28 4>,
+ <0 0 29 &gic 0 29 4>,
+ <0 0 30 &gic 0 30 4>,
+ <0 0 31 &gic 0 31 4>,
+ <0 0 32 &gic 0 32 4>,
+ <0 0 33 &gic 0 33 4>,
+ <0 0 34 &gic 0 34 4>,
+ <0 0 35 &gic 0 35 4>,
+ <0 0 36 &gic 0 36 4>,
+ <0 0 37 &gic 0 37 4>,
+ <0 0 38 &gic 0 38 4>,
+ <0 0 39 &gic 0 39 4>,
+ <0 0 40 &gic 0 40 4>,
+ <0 0 41 &gic 0 41 4>,
+ <0 0 42 &gic 0 42 4>;
+
+ /include/ "rtsm_ve-motherboard.dtsi"
+ };
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts
new file mode 100644
index 0000000..fca6b2f
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts
@@ -0,0 +1,171 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA9MPx2CT
+ *
+ * RTSM_VE_Cortex_A9x2.lisa
+ */
+
+/dts-v1/;
+
+/ {
+ model = "RTSM_VE_CortexA9x2";
+ arm,vexpress,site = <0xf>;
+ compatible = "arm,rtsm_ve,cortex_a9x2", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ serial1 = &v2m_serial1;
+ serial2 = &v2m_serial2;
+ serial3 = &v2m_serial3;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a9";
+ reg = <0>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a9";
+ reg = <1>;
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x80000000 0x80000000>;
+ };
+
+ scu@2c000000 {
+ compatible = "arm,cortex-a9-scu";
+ reg = <0x2c000000 0x58>;
+ };
+
+ timer@2c000600 {
+ compatible = "arm,cortex-a9-twd-timer";
+ reg = <0x2c000600 0x20>;
+ interrupts = <1 13 0xf04>;
+ };
+
+ watchdog@2c000620 {
+ compatible = "arm,cortex-a9-twd-wdt";
+ reg = <0x2c000620 0x20>;
+ interrupts = <1 14 0xf04>;
+ };
+
+ gic: interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0x2c001000 0x1000>,
+ <0x2c000100 0x100>;
+ };
+
+ dcc {
+ compatible = "arm,vexpress,config-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ osc@0 {
+ /* ACLK clock to the AXI master port on the test chip */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 0>;
+ freq-range = <30000000 50000000>;
+ #clock-cells = <0>;
+ clock-output-names = "extsaxiclk";
+ };
+
+ oscclk1: osc@1 {
+ /* Reference clock for the CLCD */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 1>;
+ freq-range = <10000000 80000000>;
+ #clock-cells = <0>;
+ clock-output-names = "clcdclk";
+ };
+
+ smbclk: oscclk2: osc@2 {
+ /* Reference clock for the test chip internal PLLs */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 2>;
+ freq-range = <33000000 100000000>;
+ #clock-cells = <0>;
+ clock-output-names = "tcrefclk";
+ };
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0x08000000 0x04000000>,
+ <1 0 0x14000000 0x04000000>,
+ <2 0 0x18000000 0x04000000>,
+ <3 0 0x1c000000 0x04000000>,
+ <4 0 0x0c000000 0x04000000>,
+ <5 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <0 0 0 &gic 0 0 4>,
+ <0 0 1 &gic 0 1 4>,
+ <0 0 2 &gic 0 2 4>,
+ <0 0 3 &gic 0 3 4>,
+ <0 0 4 &gic 0 4 4>,
+ <0 0 5 &gic 0 5 4>,
+ <0 0 6 &gic 0 6 4>,
+ <0 0 7 &gic 0 7 4>,
+ <0 0 8 &gic 0 8 4>,
+ <0 0 9 &gic 0 9 4>,
+ <0 0 10 &gic 0 10 4>,
+ <0 0 11 &gic 0 11 4>,
+ <0 0 12 &gic 0 12 4>,
+ <0 0 13 &gic 0 13 4>,
+ <0 0 14 &gic 0 14 4>,
+ <0 0 15 &gic 0 15 4>,
+ <0 0 16 &gic 0 16 4>,
+ <0 0 17 &gic 0 17 4>,
+ <0 0 18 &gic 0 18 4>,
+ <0 0 19 &gic 0 19 4>,
+ <0 0 20 &gic 0 20 4>,
+ <0 0 21 &gic 0 21 4>,
+ <0 0 22 &gic 0 22 4>,
+ <0 0 23 &gic 0 23 4>,
+ <0 0 24 &gic 0 24 4>,
+ <0 0 25 &gic 0 25 4>,
+ <0 0 26 &gic 0 26 4>,
+ <0 0 27 &gic 0 27 4>,
+ <0 0 28 &gic 0 28 4>,
+ <0 0 29 &gic 0 29 4>,
+ <0 0 30 &gic 0 30 4>,
+ <0 0 31 &gic 0 31 4>,
+ <0 0 32 &gic 0 32 4>,
+ <0 0 33 &gic 0 33 4>,
+ <0 0 34 &gic 0 34 4>,
+ <0 0 35 &gic 0 35 4>,
+ <0 0 36 &gic 0 36 4>,
+ <0 0 37 &gic 0 37 4>,
+ <0 0 38 &gic 0 38 4>,
+ <0 0 39 &gic 0 39 4>,
+ <0 0 40 &gic 0 40 4>,
+ <0 0 41 &gic 0 41 4>,
+ <0 0 42 &gic 0 42 4>;
+
+ /include/ "rtsm_ve-motherboard.dtsi"
+ };
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts
new file mode 100644
index 0000000..fd8a6ed
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts
@@ -0,0 +1,183 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA9MPx4CT
+ *
+ * RTSM_VE_Cortex_A9x4.lisa
+ */
+
+/dts-v1/;
+
+/ {
+ model = "RTSM_VE_CortexA9x4";
+ arm,vexpress,site = <0xf>;
+ compatible = "arm,rtsm_ve,cortex_a9x4", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ serial1 = &v2m_serial1;
+ serial2 = &v2m_serial2;
+ serial3 = &v2m_serial3;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a9";
+ reg = <0>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a9";
+ reg = <1>;
+ };
+
+ cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a9";
+ reg = <2>;
+ };
+
+ cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a9";
+ reg = <3>;
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x80000000 0x80000000>;
+ };
+
+ scu@2c000000 {
+ compatible = "arm,cortex-a9-scu";
+ reg = <0x2c000000 0x58>;
+ };
+
+ timer@2c000600 {
+ compatible = "arm,cortex-a9-twd-timer";
+ reg = <0x2c000600 0x20>;
+ interrupts = <1 13 0xf04>;
+ };
+
+ watchdog@2c000620 {
+ compatible = "arm,cortex-a9-twd-wdt";
+ reg = <0x2c000620 0x20>;
+ interrupts = <1 14 0xf04>;
+ };
+
+ gic: interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0x2c001000 0x1000>,
+ <0x2c000100 0x100>;
+ };
+
+ dcc {
+ compatible = "arm,vexpress,config-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ osc@0 {
+ /* ACLK clock to the AXI master port on the test chip */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 0>;
+ freq-range = <30000000 50000000>;
+ #clock-cells = <0>;
+ clock-output-names = "extsaxiclk";
+ };
+
+ oscclk1: osc@1 {
+ /* Reference clock for the CLCD */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 1>;
+ freq-range = <10000000 80000000>;
+ #clock-cells = <0>;
+ clock-output-names = "clcdclk";
+ };
+
+ smbclk: oscclk2: osc@2 {
+ /* Reference clock for the test chip internal PLLs */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 2>;
+ freq-range = <33000000 100000000>;
+ #clock-cells = <0>;
+ clock-output-names = "tcrefclk";
+ };
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0x08000000 0x04000000>,
+ <1 0 0x14000000 0x04000000>,
+ <2 0 0x18000000 0x04000000>,
+ <3 0 0x1c000000 0x04000000>,
+ <4 0 0x0c000000 0x04000000>,
+ <5 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <0 0 0 &gic 0 0 4>,
+ <0 0 1 &gic 0 1 4>,
+ <0 0 2 &gic 0 2 4>,
+ <0 0 3 &gic 0 3 4>,
+ <0 0 4 &gic 0 4 4>,
+ <0 0 5 &gic 0 5 4>,
+ <0 0 6 &gic 0 6 4>,
+ <0 0 7 &gic 0 7 4>,
+ <0 0 8 &gic 0 8 4>,
+ <0 0 9 &gic 0 9 4>,
+ <0 0 10 &gic 0 10 4>,
+ <0 0 11 &gic 0 11 4>,
+ <0 0 12 &gic 0 12 4>,
+ <0 0 13 &gic 0 13 4>,
+ <0 0 14 &gic 0 14 4>,
+ <0 0 15 &gic 0 15 4>,
+ <0 0 16 &gic 0 16 4>,
+ <0 0 17 &gic 0 17 4>,
+ <0 0 18 &gic 0 18 4>,
+ <0 0 19 &gic 0 19 4>,
+ <0 0 20 &gic 0 20 4>,
+ <0 0 21 &gic 0 21 4>,
+ <0 0 22 &gic 0 22 4>,
+ <0 0 23 &gic 0 23 4>,
+ <0 0 24 &gic 0 24 4>,
+ <0 0 25 &gic 0 25 4>,
+ <0 0 26 &gic 0 26 4>,
+ <0 0 27 &gic 0 27 4>,
+ <0 0 28 &gic 0 28 4>,
+ <0 0 29 &gic 0 29 4>,
+ <0 0 30 &gic 0 30 4>,
+ <0 0 31 &gic 0 31 4>,
+ <0 0 32 &gic 0 32 4>,
+ <0 0 33 &gic 0 33 4>,
+ <0 0 34 &gic 0 34 4>,
+ <0 0 35 &gic 0 35 4>,
+ <0 0 36 &gic 0 36 4>,
+ <0 0 37 &gic 0 37 4>,
+ <0 0 38 &gic 0 38 4>,
+ <0 0 39 &gic 0 39 4>,
+ <0 0 40 &gic 0 40 4>,
+ <0 0 41 &gic 0 41 4>,
+ <0 0 42 &gic 0 42 4>;
+
+ /include/ "rtsm_ve-motherboard.dtsi"
+ };
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi
new file mode 100644
index 0000000..a2d895e
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi
@@ -0,0 +1,231 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * Motherboard component
+ *
+ * VEMotherBoard.lisa
+ */
+
+ motherboard {
+ compatible = "arm,vexpress,v2m-p1", "simple-bus";
+ arm,hbi = <0x190>;
+ arm,vexpress,site = <0>;
+ arm,v2m-memory-map = "rs1";
+ #address-cells = <2>; /* SMB chipselect number and offset */
+ #size-cells = <1>;
+ #interrupt-cells = <1>;
+ ranges;
+
+ flash@0,00000000 {
+ compatible = "arm,vexpress-flash", "cfi-flash";
+ reg = <0 0x00000000 0x04000000>,
+ <4 0x00000000 0x04000000>;
+ bank-width = <4>;
+ };
+
+ vram@2,00000000 {
+ compatible = "arm,vexpress-vram";
+ reg = <2 0x00000000 0x00800000>;
+ };
+
+ ethernet@2,02000000 {
+ compatible = "smsc,lan91c111";
+ reg = <2 0x02000000 0x10000>;
+ interrupts = <15>;
+ };
+
+ iofpga@3,00000000 {
+ compatible = "arm,amba-bus", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 3 0 0x200000>;
+
+ v2m_sysreg: sysreg@010000 {
+ compatible = "arm,vexpress-sysreg";
+ reg = <0x010000 0x1000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ v2m_sysctl: sysctl@020000 {
+ compatible = "arm,sp810", "arm,primecell";
+ reg = <0x020000 0x1000>;
+ clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&smbclk>;
+ clock-names = "refclk", "timclk", "apb_pclk";
+ #clock-cells = <1>;
+ clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3";
+ };
+
+ aaci@040000 {
+ compatible = "arm,pl041", "arm,primecell";
+ reg = <0x040000 0x1000>;
+ interrupts = <11>;
+ clocks = <&smbclk>;
+ clock-names = "apb_pclk";
+ };
+
+ mmci@050000 {
+ compatible = "arm,pl180", "arm,primecell";
+ reg = <0x050000 0x1000>;
+ interrupts = <9 10>;
+ cd-gpios = <&v2m_sysreg 0 0>;
+ wp-gpios = <&v2m_sysreg 1 0>;
+ max-frequency = <12000000>;
+ vmmc-supply = <&v2m_fixed_3v3>;
+ clocks = <&v2m_clk24mhz>, <&smbclk>;
+ clock-names = "mclk", "apb_pclk";
+ };
+
+ kmi@060000 {
+ compatible = "arm,pl050", "arm,primecell";
+ reg = <0x060000 0x1000>;
+ interrupts = <12>;
+ clocks = <&v2m_clk24mhz>, <&smbclk>;
+ clock-names = "KMIREFCLK", "apb_pclk";
+ };
+
+ kmi@070000 {
+ compatible = "arm,pl050", "arm,primecell";
+ reg = <0x070000 0x1000>;
+ interrupts = <13>;
+ clocks = <&v2m_clk24mhz>, <&smbclk>;
+ clock-names = "KMIREFCLK", "apb_pclk";
+ };
+
+ v2m_serial0: uart@090000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x090000 0x1000>;
+ interrupts = <5>;
+ clocks = <&v2m_clk24mhz>, <&smbclk>;
+ clock-names = "uartclk", "apb_pclk";
+ };
+
+ v2m_serial1: uart@0a0000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x0a0000 0x1000>;
+ interrupts = <6>;
+ clocks = <&v2m_clk24mhz>, <&smbclk>;
+ clock-names = "uartclk", "apb_pclk";
+ };
+
+ v2m_serial2: uart@0b0000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x0b0000 0x1000>;
+ interrupts = <7>;
+ clocks = <&v2m_clk24mhz>, <&smbclk>;
+ clock-names = "uartclk", "apb_pclk";
+ };
+
+ v2m_serial3: uart@0c0000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x0c0000 0x1000>;
+ interrupts = <8>;
+ clocks = <&v2m_clk24mhz>, <&smbclk>;
+ clock-names = "uartclk", "apb_pclk";
+ };
+
+ wdt@0f0000 {
+ compatible = "arm,sp805", "arm,primecell";
+ reg = <0x0f0000 0x1000>;
+ interrupts = <0>;
+ clocks = <&v2m_refclk32khz>, <&smbclk>;
+ clock-names = "wdogclk", "apb_pclk";
+ };
+
+ v2m_timer01: timer@110000 {
+ compatible = "arm,sp804", "arm,primecell";
+ reg = <0x110000 0x1000>;
+ interrupts = <2>;
+ clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&smbclk>;
+ clock-names = "timclken1", "timclken2", "apb_pclk";
+ };
+
+ v2m_timer23: timer@120000 {
+ compatible = "arm,sp804", "arm,primecell";
+ reg = <0x120000 0x1000>;
+ interrupts = <3>;
+ clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&smbclk>;
+ clock-names = "timclken1", "timclken2", "apb_pclk";
+ };
+
+ rtc@170000 {
+ compatible = "arm,pl031", "arm,primecell";
+ reg = <0x170000 0x1000>;
+ interrupts = <4>;
+ clocks = <&smbclk>;
+ clock-names = "apb_pclk";
+ };
+
+ clcd@1f0000 {
+ compatible = "arm,pl111", "arm,primecell";
+ reg = <0x1f0000 0x1000>;
+ interrupts = <14>;
+ clocks = <&v2m_oscclk1>, <&smbclk>;
+ clock-names = "v2m:oscclk1", "apb_pclk";
+ mode = "VGA";
+ use_dma = <0>;
+ framebuffer = <0x18000000 0x00180000>;
+ };
+
+ virtio_block@0130000 {
+ compatible = "virtio,mmio";
+ reg = <0x130000 0x200>;
+ interrupts = <42>;
+ };
+
+ };
+
+ v2m_fixed_3v3: fixedregulator@0 {
+ compatible = "regulator-fixed";
+ regulator-name = "3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ v2m_clk24mhz: clk24mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <24000000>;
+ clock-output-names = "v2m:clk24mhz";
+ };
+
+ v2m_refclk1mhz: refclk1mhz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <1000000>;
+ clock-output-names = "v2m:refclk1mhz";
+ };
+
+ v2m_refclk32khz: refclk32khz {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ clock-output-names = "v2m:refclk32khz";
+ };
+
+ mcc {
+ compatible = "simple-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ v2m_oscclk1: osc@1 {
+ /* CLCD clock */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 1>;
+ freq-range = <23750000 63500000>;
+ #clock-cells = <0>;
+ clock-output-names = "v2m:oscclk1";
+ };
+
+ muxfpga@0 {
+ compatible = "arm,vexpress-muxfpga";
+ arm,vexpress-sysreg,func = <7 0>;
+ };
+
+ shutdown@0 {
+ compatible = "arm,vexpress-shutdown";
+ arm,vexpress-sysreg,func = <8 0>;
+ };
+ };
+ };
diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts
new file mode 100644
index 0000000..fe8cf5d
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts
@@ -0,0 +1,244 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x4CT
+ * ARMCortexA7x4CT
+ * RTSM_VE_Cortex_A15x1_A7x1.lisa
+ */
+
+/dts-v1/;
+
+/memreserve/ 0xff000000 0x01000000;
+
+/ {
+ model = "RTSM_VE_CortexA15x1-A7x1";
+ arm,vexpress,site = <0xf>;
+ compatible = "arm,rtsm_ve,cortex_a15x1_a7x1", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ serial1 = &v2m_serial1;
+ serial2 = &v2m_serial2;
+ serial3 = &v2m_serial3;
+ };
+
+ clusters {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cluster0: cluster@0 {
+ reg = <0>;
+// freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>;
+ cores {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ core0: core@0 {
+ reg = <0>;
+ };
+
+ };
+ };
+
+ cluster1: cluster@1 {
+ reg = <1>;
+// freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>;
+ cores {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ core1: core@0 {
+ reg = <0>;
+ };
+
+ };
+ };
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0>;
+ cluster = <&cluster0>;
+ core = <&core0>;
+// clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control1>;
+ };
+
+ cpu1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x100>;
+ cluster = <&cluster1>;
+ core = <&core1>;
+// clock-frequency = <800000000>;
+ cci-control-port = <&cci_control2>;
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0 0x80000000 0 0x80000000>;
+ };
+
+ cci@2c090000 {
+ compatible = "arm,cci-400", "arm,cci";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0 0x2c090000 0 0x1000>;
+ ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+ cci_control1: slave-if@4000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x4000 0x1000>;
+ };
+
+ cci_control2: slave-if@5000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x5000 0x1000>;
+ };
+ };
+
+ dcscb@60000000 {
+ compatible = "arm,rtsm,dcscb";
+ reg = <0 0x60000000 0 0x1000>;
+ };
+
+ gic: interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0 0x2c001000 0 0x1000>,
+ <0 0x2c002000 0 0x1000>,
+ <0 0x2c004000 0 0x2000>,
+ <0 0x2c006000 0 0x2000>;
+ interrupts = <1 9 0xf04>;
+
+ gic-cpuif@0 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <0>;
+ cpu = <&cpu0>;
+ };
+ gic-cpuif@1 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <1>;
+ cpu = <&cpu1>;
+ };
+ };
+
+ timer {
+ compatible = "arm,armv7-timer";
+ interrupts = <1 13 0xf08>,
+ <1 14 0xf08>,
+ <1 11 0xf08>,
+ <1 10 0xf08>;
+ };
+
+ dcc {
+ compatible = "arm,vexpress,config-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ osc@0 {
+ /* ACLK clock to the AXI master port on the test chip */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 0>;
+ freq-range = <30000000 50000000>;
+ #clock-cells = <0>;
+ clock-output-names = "extsaxiclk";
+ };
+
+ oscclk1: osc@1 {
+ /* Reference clock for the CLCD */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 1>;
+ freq-range = <10000000 80000000>;
+ #clock-cells = <0>;
+ clock-output-names = "clcdclk";
+ };
+
+ smbclk: oscclk2: osc@2 {
+ /* Reference clock for the test chip internal PLLs */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 2>;
+ freq-range = <33000000 100000000>;
+ #clock-cells = <0>;
+ clock-output-names = "tcrefclk";
+ };
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x08000000 0x04000000>,
+ <1 0 0 0x14000000 0x04000000>,
+ <2 0 0 0x18000000 0x04000000>,
+ <3 0 0 0x1c000000 0x04000000>,
+ <4 0 0 0x0c000000 0x04000000>,
+ <5 0 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <0 0 0 &gic 0 0 4>,
+ <0 0 1 &gic 0 1 4>,
+ <0 0 2 &gic 0 2 4>,
+ <0 0 3 &gic 0 3 4>,
+ <0 0 4 &gic 0 4 4>,
+ <0 0 5 &gic 0 5 4>,
+ <0 0 6 &gic 0 6 4>,
+ <0 0 7 &gic 0 7 4>,
+ <0 0 8 &gic 0 8 4>,
+ <0 0 9 &gic 0 9 4>,
+ <0 0 10 &gic 0 10 4>,
+ <0 0 11 &gic 0 11 4>,
+ <0 0 12 &gic 0 12 4>,
+ <0 0 13 &gic 0 13 4>,
+ <0 0 14 &gic 0 14 4>,
+ <0 0 15 &gic 0 15 4>,
+ <0 0 16 &gic 0 16 4>,
+ <0 0 17 &gic 0 17 4>,
+ <0 0 18 &gic 0 18 4>,
+ <0 0 19 &gic 0 19 4>,
+ <0 0 20 &gic 0 20 4>,
+ <0 0 21 &gic 0 21 4>,
+ <0 0 22 &gic 0 22 4>,
+ <0 0 23 &gic 0 23 4>,
+ <0 0 24 &gic 0 24 4>,
+ <0 0 25 &gic 0 25 4>,
+ <0 0 26 &gic 0 26 4>,
+ <0 0 27 &gic 0 27 4>,
+ <0 0 28 &gic 0 28 4>,
+ <0 0 29 &gic 0 29 4>,
+ <0 0 30 &gic 0 30 4>,
+ <0 0 31 &gic 0 31 4>,
+ <0 0 32 &gic 0 32 4>,
+ <0 0 33 &gic 0 33 4>,
+ <0 0 34 &gic 0 34 4>,
+ <0 0 35 &gic 0 35 4>,
+ <0 0 36 &gic 0 36 4>,
+ <0 0 37 &gic 0 37 4>,
+ <0 0 38 &gic 0 38 4>,
+ <0 0 39 &gic 0 39 4>,
+ <0 0 40 &gic 0 40 4>,
+ <0 0 41 &gic 0 41 4>,
+ <0 0 42 &gic 0 42 4>;
+
+ /include/ "rtsm_ve-motherboard.dtsi"
+ };
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts
new file mode 100644
index 0000000..f715285
--- /dev/null
+++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts
@@ -0,0 +1,358 @@
+/*
+ * ARM Ltd. Fast Models
+ *
+ * Versatile Express (VE) system model
+ * ARMCortexA15x4CT
+ * ARMCortexA7x4CT
+ * RTSM_VE_Cortex_A15x4_A7x4.lisa
+ */
+
+/dts-v1/;
+
+/memreserve/ 0xff000000 0x01000000;
+
+/ {
+ model = "RTSM_VE_CortexA15x4-A7x4";
+ arm,vexpress,site = <0xf>;
+ compatible = "arm,rtsm_ve,cortex_a15x4_a7x4", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ serial1 = &v2m_serial1;
+ serial2 = &v2m_serial2;
+ serial3 = &v2m_serial3;
+ };
+
+ clusters {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cluster0: cluster@0 {
+ reg = <0>;
+// freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>;
+ cores {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ core0: core@0 {
+ reg = <0>;
+ };
+
+ core1: core@1 {
+ reg = <1>;
+ };
+
+ core2: core@2 {
+ reg = <2>;
+ };
+
+ core3: core@3 {
+ reg = <3>;
+ };
+
+ };
+ };
+
+ cluster1: cluster@1 {
+ reg = <1>;
+// freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>;
+ cores {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ core4: core@0 {
+ reg = <0>;
+ };
+
+ core5: core@1 {
+ reg = <1>;
+ };
+
+ core6: core@2 {
+ reg = <2>;
+ };
+
+ core7: core@3 {
+ reg = <3>;
+ };
+
+ };
+ };
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0>;
+ cluster = <&cluster0>;
+ core = <&core0>;
+// clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control1>;
+ };
+
+ cpu1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <1>;
+ cluster = <&cluster0>;
+ core = <&core1>;
+// clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control1>;
+ };
+
+ cpu2: cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <2>;
+ cluster = <&cluster0>;
+ core = <&core2>;
+// clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control1>;
+ };
+
+ cpu3: cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <3>;
+ cluster = <&cluster0>;
+ core = <&core3>;
+// clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control1>;
+ };
+
+ cpu4: cpu@4 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x100>;
+ cluster = <&cluster1>;
+ core = <&core4>;
+// clock-frequency = <800000000>;
+ cci-control-port = <&cci_control2>;
+ };
+
+ cpu5: cpu@5 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x101>;
+ cluster = <&cluster1>;
+ core = <&core5>;
+// clock-frequency = <800000000>;
+ cci-control-port = <&cci_control2>;
+ };
+
+ cpu6: cpu@6 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x102>;
+ cluster = <&cluster1>;
+ core = <&core6>;
+// clock-frequency = <800000000>;
+ cci-control-port = <&cci_control2>;
+ };
+
+ cpu7: cpu@7 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x103>;
+ cluster = <&cluster1>;
+ core = <&core7>;
+// clock-frequency = <800000000>;
+ cci-control-port = <&cci_control2>;
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0 0x80000000 0 0x80000000>;
+ };
+
+ cci@2c090000 {
+ compatible = "arm,cci-400", "arm,cci";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0 0x2c090000 0 0x1000>;
+ ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+ cci_control1: slave-if@4000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x4000 0x1000>;
+ };
+
+ cci_control2: slave-if@5000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x5000 0x1000>;
+ };
+ };
+
+ dcscb@60000000 {
+ compatible = "arm,rtsm,dcscb";
+ reg = <0 0x60000000 0 0x1000>;
+ };
+
+ gic: interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0 0x2c001000 0 0x1000>,
+ <0 0x2c002000 0 0x1000>,
+ <0 0x2c004000 0 0x2000>,
+ <0 0x2c006000 0 0x2000>;
+ interrupts = <1 9 0xf04>;
+
+ gic-cpuif@0 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <0>;
+ cpu = <&cpu0>;
+ };
+ gic-cpuif@1 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <1>;
+ cpu = <&cpu1>;
+ };
+ gic-cpuif@2 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <2>;
+ cpu = <&cpu2>;
+ };
+ gic-cpuif@3 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <3>;
+ cpu = <&cpu3>;
+ };
+ gic-cpuif@4 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <4>;
+ cpu = <&cpu4>;
+ };
+ gic-cpuif@5 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <5>;
+ cpu = <&cpu5>;
+ };
+ gic-cpuif@6 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <6>;
+ cpu = <&cpu6>;
+ };
+ gic-cpuif@7 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <7>;
+ cpu = <&cpu7>;
+ };
+ };
+
+ timer {
+ compatible = "arm,armv7-timer";
+ interrupts = <1 13 0xf08>,
+ <1 14 0xf08>,
+ <1 11 0xf08>,
+ <1 10 0xf08>;
+ };
+
+ dcc {
+ compatible = "arm,vexpress,config-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ osc@0 {
+ /* ACLK clock to the AXI master port on the test chip */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 0>;
+ freq-range = <30000000 50000000>;
+ #clock-cells = <0>;
+ clock-output-names = "extsaxiclk";
+ };
+
+ oscclk1: osc@1 {
+ /* Reference clock for the CLCD */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 1>;
+ freq-range = <10000000 80000000>;
+ #clock-cells = <0>;
+ clock-output-names = "clcdclk";
+ };
+
+ smbclk: oscclk2: osc@2 {
+ /* Reference clock for the test chip internal PLLs */
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 2>;
+ freq-range = <33000000 100000000>;
+ #clock-cells = <0>;
+ clock-output-names = "tcrefclk";
+ };
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x08000000 0x04000000>,
+ <1 0 0 0x14000000 0x04000000>,
+ <2 0 0 0x18000000 0x04000000>,
+ <3 0 0 0x1c000000 0x04000000>,
+ <4 0 0 0x0c000000 0x04000000>,
+ <5 0 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <0 0 0 &gic 0 0 4>,
+ <0 0 1 &gic 0 1 4>,
+ <0 0 2 &gic 0 2 4>,
+ <0 0 3 &gic 0 3 4>,
+ <0 0 4 &gic 0 4 4>,
+ <0 0 5 &gic 0 5 4>,
+ <0 0 6 &gic 0 6 4>,
+ <0 0 7 &gic 0 7 4>,
+ <0 0 8 &gic 0 8 4>,
+ <0 0 9 &gic 0 9 4>,
+ <0 0 10 &gic 0 10 4>,
+ <0 0 11 &gic 0 11 4>,
+ <0 0 12 &gic 0 12 4>,
+ <0 0 13 &gic 0 13 4>,
+ <0 0 14 &gic 0 14 4>,
+ <0 0 15 &gic 0 15 4>,
+ <0 0 16 &gic 0 16 4>,
+ <0 0 17 &gic 0 17 4>,
+ <0 0 18 &gic 0 18 4>,
+ <0 0 19 &gic 0 19 4>,
+ <0 0 20 &gic 0 20 4>,
+ <0 0 21 &gic 0 21 4>,
+ <0 0 22 &gic 0 22 4>,
+ <0 0 23 &gic 0 23 4>,
+ <0 0 24 &gic 0 24 4>,
+ <0 0 25 &gic 0 25 4>,
+ <0 0 26 &gic 0 26 4>,
+ <0 0 27 &gic 0 27 4>,
+ <0 0 28 &gic 0 28 4>,
+ <0 0 29 &gic 0 29 4>,
+ <0 0 30 &gic 0 30 4>,
+ <0 0 31 &gic 0 31 4>,
+ <0 0 32 &gic 0 32 4>,
+ <0 0 33 &gic 0 33 4>,
+ <0 0 34 &gic 0 34 4>,
+ <0 0 35 &gic 0 35 4>,
+ <0 0 36 &gic 0 36 4>,
+ <0 0 37 &gic 0 37 4>,
+ <0 0 38 &gic 0 38 4>,
+ <0 0 39 &gic 0 39 4>,
+ <0 0 40 &gic 0 40 4>,
+ <0 0 41 &gic 0 41 4>,
+ <0 0 42 &gic 0 42 4>;
+
+ /include/ "rtsm_ve-motherboard.dtsi"
+ };
+};
+
+/include/ "clcd-panels.dtsi"
diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
index ac870fb..9584232 100644
--- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
+++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
@@ -228,6 +228,7 @@
};
clcd@1f0000 {
+ status = "disabled";
compatible = "arm,pl111", "arm,primecell";
reg = <0x1f0000 0x1000>;
interrupts = <14>;
diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi b/arch/arm/boot/dts/vexpress-v2m.dtsi
index f142036..6593398 100644
--- a/arch/arm/boot/dts/vexpress-v2m.dtsi
+++ b/arch/arm/boot/dts/vexpress-v2m.dtsi
@@ -227,6 +227,7 @@
};
clcd@1f000 {
+ status = "disabled";
compatible = "arm,pl111", "arm,primecell";
reg = <0x1f000 0x1000>;
interrupts = <14>;
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts
index 9420053..cc6a8c0 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts
@@ -9,6 +9,8 @@
/dts-v1/;
+/memreserve/ 0xbf000000 0x01000000;
+
/ {
model = "V2P-CA15";
arm,hbi = <0x237>;
@@ -57,6 +59,8 @@
interrupts = <0 85 4>;
clocks = <&oscclk5>;
clock-names = "pxlclk";
+ mode = "1024x768-16@60";
+ framebuffer = <0 0xff000000 0 0x01000000>;
};
memory-controller@2b0a0000 {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index d2803be..f1dc620 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -9,11 +9,13 @@
/dts-v1/;
+/memreserve/ 0xff000000 0x01000000;
+
/ {
model = "V2P-CA15_CA7";
arm,hbi = <0x249>;
arm,vexpress,site = <0xf>;
- compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress";
+ compatible = "arm,vexpress,v2p-ca15_a7", "arm,vexpress", "arm,generic";
interrupt-parent = <&gic>;
#address-cells = <2>;
#size-cells = <2>;
@@ -29,44 +31,106 @@
i2c1 = &v2m_i2c_pcie;
};
- cpus {
+ clusters {
#address-cells = <1>;
#size-cells = <0>;
- cpu0: cpu@0 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
+ cluster0: cluster@0 {
reg = <0>;
+ cores {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ core0: core@0 {
+ reg = <0>;
+ };
+
+ core1: core@1 {
+ reg = <1>;
+ };
+
+ };
};
- cpu1: cpu@1 {
- device_type = "cpu";
- compatible = "arm,cortex-a15";
+ cluster1: cluster@1 {
reg = <1>;
+ cores {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ core2: core@0 {
+ reg = <0>;
+ };
+
+ core3: core@1 {
+ reg = <1>;
+ };
+
+ core4: core@2 {
+ reg = <2>;
+ };
+ };
};
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
cpu2: cpu@2 {
device_type = "cpu";
compatible = "arm,cortex-a7";
reg = <0x100>;
+ cluster = <&cluster1>;
+ core = <&core2>;
+ clock-frequency = <800000000>;
+ cci-control-port = <&cci_control2>;
};
cpu3: cpu@3 {
device_type = "cpu";
compatible = "arm,cortex-a7";
reg = <0x101>;
+ cluster = <&cluster1>;
+ core = <&core3>;
+ clock-frequency = <800000000>;
+ cci-control-port = <&cci_control2>;
};
cpu4: cpu@4 {
device_type = "cpu";
compatible = "arm,cortex-a7";
reg = <0x102>;
+ cluster = <&cluster1>;
+ core = <&core4>;
+ clock-frequency = <800000000>;
+ cci-control-port = <&cci_control2>;
+ };
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0>;
+ cluster = <&cluster0>;
+ core = <&core0>;
+ clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control1>;
+ };
+
+ cpu1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <1>;
+ cluster = <&cluster0>;
+ core = <&core1>;
+ clock-frequency = <1000000000>;
+ cci-control-port = <&cci_control1>;
};
};
memory@80000000 {
device_type = "memory";
- reg = <0 0x80000000 0 0x40000000>;
+ reg = <0 0x80000000 0 0x80000000>;
};
wdt@2a490000 {
@@ -81,6 +145,8 @@
compatible = "arm,hdlcd";
reg = <0 0x2b000000 0 0x1000>;
interrupts = <0 85 4>;
+ mode = "1024x768-16@60";
+ framebuffer = <0 0xff000000 0 0x01000000>;
clocks = <&oscclk5>;
clock-names = "pxlclk";
};
@@ -102,6 +168,64 @@
<0 0x2c004000 0 0x2000>,
<0 0x2c006000 0 0x2000>;
interrupts = <1 9 0xf04>;
+
+ gic-cpuif@0 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <0>;
+ cpu = <&cpu0>;
+ };
+ gic-cpuif@1 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <1>;
+ cpu = <&cpu1>;
+ };
+ gic-cpuif@2 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <2>;
+ cpu = <&cpu2>;
+ };
+
+ gic-cpuif@3 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <3>;
+ cpu = <&cpu3>;
+ };
+
+ gic-cpuif@4 {
+ compatible = "arm,gic-cpuif";
+ cpuif-id = <4>;
+ cpu = <&cpu4>;
+ };
+ };
+
+ cci@2c090000 {
+ compatible = "arm,cci-400";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0 0x2c090000 0 0x1000>;
+ ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+ cci_control1: slave-if@4000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x4000 0x1000>;
+ };
+
+ cci_control2: slave-if@5000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x5000 0x1000>;
+ };
+ };
+
+ cci-pmu@2c099000 {
+ compatible = "arm,cci-400-pmu";
+ reg = <0 0x2c099000 0 0x6000>;
+ interrupts = <0 101 4>,
+ <0 102 4>,
+ <0 103 4>,
+ <0 104 4>,
+ <0 105 4>;
};
memory-controller@7ffd0000 {
@@ -125,6 +249,12 @@
clock-names = "apb_pclk";
};
+ spc@7fff0000 {
+ compatible = "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc";
+ reg = <0 0x7fff0000 0 0x1000>;
+ interrupts = <0 95 4>;
+ };
+
timer {
compatible = "arm,armv7-timer";
interrupts = <1 13 0xf08>,
@@ -133,12 +263,21 @@
<1 10 0xf08>;
};
- pmu {
+ pmu_a15 {
compatible = "arm,cortex-a15-pmu";
+ cluster = <&cluster0>;
interrupts = <0 68 4>,
<0 69 4>;
};
+ pmu_a7 {
+ compatible = "arm,cortex-a7-pmu";
+ cluster = <&cluster1>;
+ interrupts = <0 128 4>,
+ <0 129 4>,
+ <0 130 4>;
+ };
+
oscclk6a: oscclk6a {
/* Reference 24MHz clock */
compatible = "fixed-clock";
@@ -147,6 +286,15 @@
clock-output-names = "oscclk6a";
};
+ psci {
+ compatible = "arm,psci";
+ method = "smc";
+ cpu_suspend = <0x80100001>;
+ cpu_off = <0x80100002>;
+ cpu_on = <0x80100003>;
+ migrate = <0x80100004>;
+ };
+
dcc {
compatible = "arm,vexpress,config-bus";
arm,vexpress,config-bridge = <&v2m_sysreg>;
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
index c544a55..cf633ed 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts
@@ -9,6 +9,8 @@
/dts-v1/;
+/memreserve/ 0xbf000000 0x01000000;
+
/ {
model = "V2P-CA5s";
arm,hbi = <0x225>;
@@ -59,6 +61,8 @@
interrupts = <0 85 4>;
clocks = <&oscclk3>;
clock-names = "pxlclk";
+ mode = "640x480-16@60";
+ framebuffer = <0xbf000000 0x01000000>;
};
memory-controller@2a150000 {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
index 62d9b22..f83706b 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts
@@ -9,6 +9,8 @@
/dts-v1/;
+/include/ "clcd-panels.dtsi"
+
/ {
model = "V2P-CA9";
arm,hbi = <0x191>;
@@ -73,6 +75,8 @@
interrupts = <0 44 4>;
clocks = <&oscclk1>, <&oscclk2>;
clock-names = "clcdclk", "apb_pclk";
+ mode = "XVGA";
+ use_dma = <1>;
};
memory-controller@100e0000 {
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 48434cb..462cd58 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -14,5 +14,9 @@ obj-$(CONFIG_SHARP_SCOOP) += scoop.o
obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o
obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o
obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o
+obj-$(CONFIG_BL_SWITCHER) += bL_switcher.o
+obj-$(CONFIG_BL_SWITCHER_DUMMY_IF) += bL_switcher_dummy_if.o
+
AFLAGS_mcpm_head.o := -march=armv7-a
AFLAGS_vlock.o := -march=armv7-a
+CFLAGS_REMOVE_mcpm_entry.o = -pg
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
new file mode 100644
index 0000000..8fee70d
--- /dev/null
+++ b/arch/arm/common/bL_switcher.c
@@ -0,0 +1,864 @@
+/*
+ * arch/arm/common/bL_switcher.c -- big.LITTLE cluster switcher core driver
+ *
+ * Created by: Nicolas Pitre, March 2012
+ * Copyright: (C) 2012 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cpu_pm.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
+#include <linux/time.h>
+#include <linux/clockchips.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/moduleparam.h>
+
+#include <asm/smp_plat.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/suspend.h>
+#include <asm/mcpm.h>
+#include <asm/bL_switcher.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/power_cpu_migrate.h>
+
+
+/*
+ * Use our own MPIDR accessors as the generic ones in asm/cputype.h have
+ * __attribute_const__ and we don't want the compiler to assume any
+ * constness here as the value _does_ change along some code paths.
+ */
+
+static int read_mpidr(void)
+{
+ unsigned int id;
+ asm volatile ("mrc\tp15, 0, %0, c0, c0, 5" : "=r" (id));
+ return id & MPIDR_HWID_BITMASK;
+}
+
+/*
+ * Get a global nanosecond time stamp for tracing.
+ */
+static s64 get_ns(void)
+{
+ struct timespec ts;
+ getnstimeofday(&ts);
+ return timespec_to_ns(&ts);
+}
+
+/*
+ * bL switcher core code.
+ */
+
+static void bL_do_switch(void *_arg)
+{
+ unsigned ib_mpidr, ib_cpu, ib_cluster;
+ long volatile handshake, **handshake_ptr = _arg;
+
+ pr_debug("%s\n", __func__);
+
+ ib_mpidr = cpu_logical_map(smp_processor_id());
+ ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+ ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+ /* Advertise our handshake location */
+ if (handshake_ptr) {
+ handshake = 0;
+ *handshake_ptr = &handshake;
+ } else
+ handshake = -1;
+
+ /*
+ * Our state has been saved at this point. Let's release our
+ * inbound CPU.
+ */
+ mcpm_set_entry_vector(ib_cpu, ib_cluster, cpu_resume);
+ sev();
+
+ /*
+ * From this point, we must assume that our counterpart CPU might
+ * have taken over in its parallel world already, as if execution
+ * just returned from cpu_suspend(). It is therefore important to
+ * be very careful not to make any change the other guy is not
+ * expecting. This is why we need stack isolation.
+ *
+ * Fancy under cover tasks could be performed here. For now
+ * we have none.
+ */
+
+ /*
+ * Let's wait until our inbound is alive.
+ */
+ while (!handshake) {
+ wfe();
+ smp_mb();
+ }
+
+ /* Let's put ourself down. */
+ mcpm_cpu_power_down();
+
+ /* should never get here */
+ BUG();
+}
+
+/*
+ * Stack isolation. To ensure 'current' remains valid, we just use another
+ * piece of our thread's stack space which should be fairly lightly used.
+ * The selected area starts just above the thread_info structure located
+ * at the very bottom of the stack, aligned to a cache line, and indexed
+ * with the cluster number.
+ */
+#define STACK_SIZE 512
+extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+static int bL_switchpoint(unsigned long _arg)
+{
+ unsigned int mpidr = read_mpidr();
+ unsigned int clusterid = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ void *stack = current_thread_info() + 1;
+ stack = PTR_ALIGN(stack, L1_CACHE_BYTES);
+ stack += clusterid * STACK_SIZE + STACK_SIZE;
+ call_with_stack(bL_do_switch, (void *)_arg, stack);
+ BUG();
+}
+
+/*
+ * Generic switcher interface
+ */
+
+static unsigned int bL_gic_id[MAX_CPUS_PER_CLUSTER][MAX_NR_CLUSTERS];
+static int bL_switcher_cpu_pairing[NR_CPUS];
+
+/*
+ * bL_switch_to - Switch to a specific cluster for the current CPU
+ * @new_cluster_id: the ID of the cluster to switch to.
+ *
+ * This function must be called on the CPU to be switched.
+ * Returns 0 on success, else a negative status code.
+ */
+static int bL_switch_to(unsigned int new_cluster_id)
+{
+ unsigned int mpidr, this_cpu, that_cpu;
+ unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
+ struct completion inbound_alive;
+ struct tick_device *tdev;
+ enum clock_event_mode tdev_mode;
+ long volatile *handshake_ptr;
+ int ipi_nr, ret;
+
+ this_cpu = smp_processor_id();
+ ob_mpidr = read_mpidr();
+ ob_cpu = MPIDR_AFFINITY_LEVEL(ob_mpidr, 0);
+ ob_cluster = MPIDR_AFFINITY_LEVEL(ob_mpidr, 1);
+ BUG_ON(cpu_logical_map(this_cpu) != ob_mpidr);
+
+ if (new_cluster_id == ob_cluster)
+ return 0;
+
+ that_cpu = bL_switcher_cpu_pairing[this_cpu];
+ ib_mpidr = cpu_logical_map(that_cpu);
+ ib_cpu = MPIDR_AFFINITY_LEVEL(ib_mpidr, 0);
+ ib_cluster = MPIDR_AFFINITY_LEVEL(ib_mpidr, 1);
+
+ pr_debug("before switch: CPU %d MPIDR %#x -> %#x\n",
+ this_cpu, ob_mpidr, ib_mpidr);
+
+ this_cpu = smp_processor_id();
+
+ /* Close the gate for our entry vectors */
+ mcpm_set_entry_vector(ob_cpu, ob_cluster, NULL);
+ mcpm_set_entry_vector(ib_cpu, ib_cluster, NULL);
+
+ /* Install our "inbound alive" notifier. */
+ init_completion(&inbound_alive);
+ ipi_nr = register_ipi_completion(&inbound_alive, this_cpu);
+ ipi_nr |= ((1 << 16) << bL_gic_id[ob_cpu][ob_cluster]);
+ mcpm_set_early_poke(ib_cpu, ib_cluster, gic_get_sgir_physaddr(), ipi_nr);
+
+ /*
+ * Let's wake up the inbound CPU now in case it requires some delay
+ * to come online, but leave it gated in our entry vector code.
+ */
+ ret = mcpm_cpu_power_up(ib_cpu, ib_cluster);
+ if (ret) {
+ pr_err("%s: mcpm_cpu_power_up() returned %d\n", __func__, ret);
+ return ret;
+ }
+
+ /*
+ * Raise a SGI on the inbound CPU to make sure it doesn't stall
+ * in a possible WFI, such as in bL_power_down().
+ */
+ gic_send_sgi(bL_gic_id[ib_cpu][ib_cluster], 0);
+
+ /*
+ * Wait for the inbound to come up. This allows for other
+ * tasks to be scheduled in the mean time.
+ */
+ wait_for_completion(&inbound_alive);
+ mcpm_set_early_poke(ib_cpu, ib_cluster, 0, 0);
+
+ /*
+ * From this point we are entering the switch critical zone
+ * and can't sleep/schedule anymore.
+ */
+ local_irq_disable();
+ local_fiq_disable();
+ trace_cpu_migrate_begin(get_ns(), ob_mpidr);
+
+ /* redirect GIC's SGIs to our counterpart */
+ gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
+
+ tdev = tick_get_device(this_cpu);
+ if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
+ tdev = NULL;
+ if (tdev) {
+ tdev_mode = tdev->evtdev->mode;
+ clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+ }
+
+ ret = cpu_pm_enter();
+
+ /* we can not tolerate errors at this point */
+ if (ret)
+ panic("%s: cpu_pm_enter() returned %d\n", __func__, ret);
+
+ /*
+ * Swap the physical CPUs in the logical map for this logical CPU.
+ * This must be flushed to RAM as the resume code
+ * needs to access it while the caches are still disabled.
+ */
+ cpu_logical_map(this_cpu) = ib_mpidr;
+ cpu_logical_map(that_cpu) = ob_mpidr;
+ sync_cache_w(&cpu_logical_map(this_cpu));
+
+ /* Let's do the actual CPU switch. */
+ ret = cpu_suspend((unsigned long)&handshake_ptr, bL_switchpoint);
+ if (ret > 0)
+ panic("%s: cpu_suspend() returned %d\n", __func__, ret);
+
+ /* We are executing on the inbound CPU at this point */
+ mpidr = read_mpidr();
+ pr_debug("after switch: CPU %d MPIDR %#x\n", this_cpu, mpidr);
+ BUG_ON(mpidr != ib_mpidr);
+
+ mcpm_cpu_powered_up();
+
+ ret = cpu_pm_exit();
+
+ if (tdev) {
+ clockevents_set_mode(tdev->evtdev, tdev_mode);
+ clockevents_program_event(tdev->evtdev,
+ tdev->evtdev->next_event, 1);
+ }
+
+ trace_cpu_migrate_finish(get_ns(), ib_mpidr);
+ local_fiq_enable();
+ local_irq_enable();
+
+ *handshake_ptr = 1;
+ dsb_sev();
+
+ if (ret)
+ pr_err("%s exiting with error %d\n", __func__, ret);
+ return ret;
+}
+
+struct bL_thread {
+ spinlock_t lock;
+ struct task_struct *task;
+ wait_queue_head_t wq;
+ int wanted_cluster;
+ struct completion started;
+ bL_switch_completion_handler completer;
+ void *completer_cookie;
+};
+
+static struct bL_thread bL_threads[NR_CPUS];
+
+static int bL_switcher_thread(void *arg)
+{
+ struct bL_thread *t = arg;
+ struct sched_param param = { .sched_priority = 1 };
+ int cluster;
+ bL_switch_completion_handler completer;
+ void *completer_cookie;
+
+ sched_setscheduler_nocheck(current, SCHED_FIFO, &param);
+ complete(&t->started);
+
+ do {
+ if (signal_pending(current))
+ flush_signals(current);
+ wait_event_interruptible(t->wq,
+ t->wanted_cluster != -1 ||
+ kthread_should_stop());
+
+ spin_lock(&t->lock);
+ cluster = t->wanted_cluster;
+ completer = t->completer;
+ completer_cookie = t->completer_cookie;
+ t->wanted_cluster = -1;
+ t->completer = NULL;
+ spin_unlock(&t->lock);
+
+ if (cluster != -1) {
+ bL_switch_to(cluster);
+
+ if (completer)
+ completer(completer_cookie);
+ }
+ } while (!kthread_should_stop());
+
+ return 0;
+}
+
+static struct task_struct * bL_switcher_thread_create(int cpu, void *arg)
+{
+ struct task_struct *task;
+
+ task = kthread_create_on_node(bL_switcher_thread, arg,
+ cpu_to_node(cpu), "kswitcher_%d", cpu);
+ if (!IS_ERR(task)) {
+ kthread_bind(task, cpu);
+ wake_up_process(task);
+ } else
+ pr_err("%s failed for CPU %d\n", __func__, cpu);
+ return task;
+}
+
+/*
+ * bL_switch_request_cb - Switch to a specific cluster for the given CPU,
+ * with completion notification via a callback
+ *
+ * @cpu: the CPU to switch
+ * @new_cluster_id: the ID of the cluster to switch to.
+ * @completer: switch completion callback. if non-NULL,
+ * @completer(@completer_cookie) will be called on completion of
+ * the switch, in non-atomic context.
+ * @completer_cookie: opaque context argument for @completer.
+ *
+ * This function causes a cluster switch on the given CPU by waking up
+ * the appropriate switcher thread. This function may or may not return
+ * before the switch has occurred.
+ *
+ * If a @completer callback function is supplied, it will be called when
+ * the switch is complete. This can be used to determine asynchronously
+ * when the switch is complete, regardless of when bL_switch_request()
+ * returns. When @completer is supplied, no new switch request is permitted
+ * for the affected CPU until after the switch is complete, and @completer
+ * has returned.
+ */
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+ bL_switch_completion_handler completer,
+ void *completer_cookie)
+{
+ struct bL_thread *t;
+
+ if (cpu >= ARRAY_SIZE(bL_threads)) {
+ pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
+ return -EINVAL;
+ }
+
+ t = &bL_threads[cpu];
+
+ if (IS_ERR(t->task))
+ return PTR_ERR(t->task);
+ if (!t->task)
+ return -ESRCH;
+
+ spin_lock(&t->lock);
+ if (t->completer) {
+ spin_unlock(&t->lock);
+ return -EBUSY;
+ }
+ t->completer = completer;
+ t->completer_cookie = completer_cookie;
+ t->wanted_cluster = new_cluster_id;
+ spin_unlock(&t->lock);
+ wake_up(&t->wq);
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(bL_switch_request_cb);
+
+/*
+ * Detach an outstanding switch request.
+ *
+ * The switcher will continue with the switch request in the background,
+ * but the completer function will not be called.
+ *
+ * This may be necessary if the completer is in a kernel module which is
+ * about to be unloaded.
+ */
+void bL_switch_request_detach(unsigned int cpu,
+ bL_switch_completion_handler completer)
+{
+ struct bL_thread *t;
+
+ if (cpu >= ARRAY_SIZE(bL_threads)) {
+ pr_err("%s: cpu %d out of bounds\n", __func__, cpu);
+ return;
+ }
+
+ t = &bL_threads[cpu];
+
+ if (IS_ERR(t->task) || !t->task)
+ return;
+
+ spin_lock(&t->lock);
+ if (t->completer == completer)
+ t->completer = NULL;
+ spin_unlock(&t->lock);
+}
+
+EXPORT_SYMBOL_GPL(bL_switch_request_detach);
+
+/*
+ * Activation and configuration code.
+ */
+
+static DEFINE_MUTEX(bL_switcher_activation_lock);
+static BLOCKING_NOTIFIER_HEAD(bL_activation_notifier);
+static unsigned int bL_switcher_active;
+static unsigned int bL_switcher_cpu_original_cluster[NR_CPUS];
+static cpumask_t bL_switcher_removed_logical_cpus;
+
+int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_register_notifier);
+
+int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&bL_activation_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_unregister_notifier);
+
+static int bL_activation_notify(unsigned long val)
+{
+ int ret;
+
+ ret = blocking_notifier_call_chain(&bL_activation_notifier, val, NULL);
+ if (ret & NOTIFY_STOP_MASK)
+ pr_err("%s: notifier chain failed with status 0x%x\n",
+ __func__, ret);
+ return notifier_to_errno(ret);
+}
+
+static void bL_switcher_restore_cpus(void)
+{
+ int i;
+
+ for_each_cpu(i, &bL_switcher_removed_logical_cpus)
+ cpu_up(i);
+}
+
+static int bL_switcher_halve_cpus(void)
+{
+ int i, j, cluster_0, gic_id, ret;
+ unsigned int cpu, cluster, mask;
+ cpumask_t available_cpus;
+
+ /* First pass to validate what we have */
+ mask = 0;
+ for_each_online_cpu(i) {
+ cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+ if (cluster >= 2) {
+ pr_err("%s: only dual cluster systems are supported\n", __func__);
+ return -EINVAL;
+ }
+ if (WARN_ON(cpu >= MAX_CPUS_PER_CLUSTER))
+ return -EINVAL;
+ mask |= (1 << cluster);
+ }
+ if (mask != 3) {
+ pr_err("%s: no CPU pairing possible\n", __func__);
+ return -EINVAL;
+ }
+
+ /*
+ * Now let's do the pairing. We match each CPU with another CPU
+ * from a different cluster. To get a uniform scheduling behavior
+ * without fiddling with CPU topology and compute capacity data,
+ * we'll use logical CPUs initially belonging to the same cluster.
+ */
+ memset(bL_switcher_cpu_pairing, -1, sizeof(bL_switcher_cpu_pairing));
+ cpumask_copy(&available_cpus, cpu_online_mask);
+ cluster_0 = -1;
+ for_each_cpu(i, &available_cpus) {
+ int match = -1;
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+ if (cluster_0 == -1)
+ cluster_0 = cluster;
+ if (cluster != cluster_0)
+ continue;
+ cpumask_clear_cpu(i, &available_cpus);
+ for_each_cpu(j, &available_cpus) {
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(j), 1);
+ /*
+ * Let's remember the last match to create "odd"
+ * pairing on purpose in order for other code not
+ * to assume any relation between physical and
+ * logical CPU numbers.
+ */
+ if (cluster != cluster_0)
+ match = j;
+ }
+ if (match != -1) {
+ bL_switcher_cpu_pairing[i] = match;
+ cpumask_clear_cpu(match, &available_cpus);
+ pr_info("CPU%d paired with CPU%d\n", i, match);
+ }
+ }
+
+ /*
+ * Now we disable the unwanted CPUs i.e. everything that has no
+ * pairing information (that includes the pairing counterparts).
+ */
+ cpumask_clear(&bL_switcher_removed_logical_cpus);
+ for_each_online_cpu(i) {
+ cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0);
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 1);
+
+ /* Let's take note of the GIC ID for this CPU */
+ gic_id = gic_get_cpu_id(i);
+ if (gic_id < 0) {
+ pr_err("%s: bad GIC ID for CPU %d\n", __func__, i);
+ bL_switcher_restore_cpus();
+ return -EINVAL;
+ }
+ bL_gic_id[cpu][cluster] = gic_id;
+ pr_info("GIC ID for CPU %u cluster %u is %u\n",
+ cpu, cluster, gic_id);
+
+ if (bL_switcher_cpu_pairing[i] != -1) {
+ bL_switcher_cpu_original_cluster[i] = cluster;
+ continue;
+ }
+
+ ret = cpu_down(i);
+ if (ret) {
+ bL_switcher_restore_cpus();
+ return ret;
+ }
+ cpumask_set_cpu(i, &bL_switcher_removed_logical_cpus);
+ }
+
+ return 0;
+}
+
+/* Determine the logical CPU a given physical CPU is grouped on. */
+int bL_switcher_get_logical_index(u32 mpidr)
+{
+ int cpu;
+
+ if (!bL_switcher_active)
+ return -EUNATCH;
+
+ mpidr &= MPIDR_HWID_BITMASK;
+ for_each_online_cpu(cpu) {
+ int pairing = bL_switcher_cpu_pairing[cpu];
+ if (pairing == -1)
+ continue;
+ if ((mpidr == cpu_logical_map(cpu)) ||
+ (mpidr == cpu_logical_map(pairing)))
+ return cpu;
+ }
+ return -EINVAL;
+}
+
+static void bL_switcher_trace_trigger_cpu(void *__always_unused info)
+{
+ trace_cpu_migrate_current(get_ns(), read_mpidr());
+}
+
+int bL_switcher_trace_trigger(void)
+{
+ int ret;
+
+ preempt_disable();
+
+ bL_switcher_trace_trigger_cpu(NULL);
+ ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true);
+
+ preempt_enable();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger);
+
+static int bL_switcher_enable(void)
+{
+ int cpu, ret;
+
+ mutex_lock(&bL_switcher_activation_lock);
+ cpu_hotplug_driver_lock();
+ if (bL_switcher_active) {
+ cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
+ return 0;
+ }
+
+ pr_info("big.LITTLE switcher initializing\n");
+
+ ret = bL_activation_notify(BL_NOTIFY_PRE_ENABLE);
+ if (ret)
+ goto error;
+
+ ret = bL_switcher_halve_cpus();
+ if (ret)
+ goto error;
+
+ bL_switcher_trace_trigger();
+
+ for_each_online_cpu(cpu) {
+ struct bL_thread *t = &bL_threads[cpu];
+ spin_lock_init(&t->lock);
+ init_waitqueue_head(&t->wq);
+ init_completion(&t->started);
+ t->wanted_cluster = -1;
+ t->task = bL_switcher_thread_create(cpu, t);
+ }
+
+ bL_switcher_active = 1;
+ bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+ pr_info("big.LITTLE switcher initialized\n");
+ goto out;
+
+error:
+ pr_warning("big.LITTLE switcher initialization failed\n");
+ bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+ cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
+ return ret;
+}
+
+#ifdef CONFIG_SYSFS
+
+static void bL_switcher_disable(void)
+{
+ unsigned int cpu, cluster;
+ struct bL_thread *t;
+ struct task_struct *task;
+
+ mutex_lock(&bL_switcher_activation_lock);
+ cpu_hotplug_driver_lock();
+
+ if (!bL_switcher_active)
+ goto out;
+
+ if (bL_activation_notify(BL_NOTIFY_PRE_DISABLE) != 0) {
+ bL_activation_notify(BL_NOTIFY_POST_ENABLE);
+ goto out;
+ }
+
+ bL_switcher_active = 0;
+
+ /*
+ * To deactivate the switcher, we must shut down the switcher
+ * threads to prevent any other requests from being accepted.
+ * Then, if the final cluster for given logical CPU is not the
+ * same as the original one, we'll recreate a switcher thread
+ * just for the purpose of switching the CPU back without any
+ * possibility for interference from external requests.
+ */
+ for_each_online_cpu(cpu) {
+ t = &bL_threads[cpu];
+ task = t->task;
+ t->task = NULL;
+ if (!task || IS_ERR(task))
+ continue;
+ kthread_stop(task);
+ /* no more switch may happen on this CPU at this point */
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+ if (cluster == bL_switcher_cpu_original_cluster[cpu])
+ continue;
+ init_completion(&t->started);
+ t->wanted_cluster = bL_switcher_cpu_original_cluster[cpu];
+ task = bL_switcher_thread_create(cpu, t);
+ if (!IS_ERR(task)) {
+ wait_for_completion(&t->started);
+ kthread_stop(task);
+ cluster = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 1);
+ if (cluster == bL_switcher_cpu_original_cluster[cpu])
+ continue;
+ }
+ /* If execution gets here, we're in trouble. */
+ pr_crit("%s: unable to restore original cluster for CPU %d\n",
+ __func__, cpu);
+ pr_crit("%s: CPU %d can't be restored\n",
+ __func__, bL_switcher_cpu_pairing[cpu]);
+ cpumask_clear_cpu(bL_switcher_cpu_pairing[cpu],
+ &bL_switcher_removed_logical_cpus);
+ }
+
+ bL_switcher_restore_cpus();
+ bL_switcher_trace_trigger();
+
+ bL_activation_notify(BL_NOTIFY_POST_DISABLE);
+
+out:
+ cpu_hotplug_driver_unlock();
+ mutex_unlock(&bL_switcher_activation_lock);
+}
+
+static ssize_t bL_switcher_active_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", bL_switcher_active);
+}
+
+static ssize_t bL_switcher_active_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ int ret;
+
+ switch (buf[0]) {
+ case '0':
+ bL_switcher_disable();
+ ret = 0;
+ break;
+ case '1':
+ ret = bL_switcher_enable();
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return (ret >= 0) ? count : ret;
+}
+
+static ssize_t bL_switcher_trace_trigger_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ int ret = bL_switcher_trace_trigger();
+
+ return ret ? ret : count;
+}
+
+static struct kobj_attribute bL_switcher_active_attr =
+ __ATTR(active, 0644, bL_switcher_active_show, bL_switcher_active_store);
+
+static struct kobj_attribute bL_switcher_trace_trigger_attr =
+ __ATTR(trace_trigger, 0200, NULL, bL_switcher_trace_trigger_store);
+
+static struct attribute *bL_switcher_attrs[] = {
+ &bL_switcher_active_attr.attr,
+ &bL_switcher_trace_trigger_attr.attr,
+ NULL,
+};
+
+static struct attribute_group bL_switcher_attr_group = {
+ .attrs = bL_switcher_attrs,
+};
+
+static struct kobject *bL_switcher_kobj;
+
+static int __init bL_switcher_sysfs_init(void)
+{
+ int ret;
+
+ bL_switcher_kobj = kobject_create_and_add("bL_switcher", kernel_kobj);
+ if (!bL_switcher_kobj)
+ return -ENOMEM;
+ ret = sysfs_create_group(bL_switcher_kobj, &bL_switcher_attr_group);
+ if (ret)
+ kobject_put(bL_switcher_kobj);
+ return ret;
+}
+
+#endif /* CONFIG_SYSFS */
+
+bool bL_switcher_get_enabled(void)
+{
+ mutex_lock(&bL_switcher_activation_lock);
+
+ return bL_switcher_active;
+}
+EXPORT_SYMBOL_GPL(bL_switcher_get_enabled);
+
+void bL_switcher_put_enabled(void)
+{
+ mutex_unlock(&bL_switcher_activation_lock);
+}
+EXPORT_SYMBOL_GPL(bL_switcher_put_enabled);
+
+/*
+ * Veto any CPU hotplug operation while the switcher is active.
+ * We're just not ready to deal with that given the trickery involved.
+ */
+static int bL_switcher_hotplug_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_DOWN_PREPARE:
+ if (bL_switcher_active)
+ return NOTIFY_BAD;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block bL_switcher_hotplug_notifier =
+ { &bL_switcher_hotplug_callback, NULL, 0 };
+
+#ifdef CONFIG_SCHED_HMP
+static bool no_bL_switcher = true;
+#else
+static bool no_bL_switcher;
+#endif
+core_param(no_bL_switcher, no_bL_switcher, bool, 0644);
+
+static int __init bL_switcher_init(void)
+{
+ int ret;
+
+ if (MAX_NR_CLUSTERS != 2) {
+ pr_err("%s: only dual cluster systems are supported\n", __func__);
+ return -EINVAL;
+ }
+
+ register_cpu_notifier(&bL_switcher_hotplug_notifier);
+
+ if (!no_bL_switcher) {
+ ret = bL_switcher_enable();
+ if (ret)
+ return ret;
+ }
+
+#ifdef CONFIG_SYSFS
+ ret = bL_switcher_sysfs_init();
+ if (ret)
+ pr_err("%s: unable to create sysfs entry\n", __func__);
+#endif
+
+ return 0;
+}
+
+late_initcall(bL_switcher_init);
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
new file mode 100644
index 0000000..5e2dd19
--- /dev/null
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -0,0 +1,71 @@
+/*
+ * arch/arm/common/bL_switcher_dummy_if.c -- b.L switcher dummy interface
+ *
+ * Created by: Nicolas Pitre, November 2012
+ * Copyright: (C) 2012 Linaro Limited
+ *
+ * Dummy interface to user space for debugging purpose only.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <asm/uaccess.h>
+#include <asm/bL_switcher.h>
+
+static ssize_t bL_switcher_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ unsigned char val[3];
+ unsigned int cpu, cluster;
+ int ret;
+
+ pr_debug("%s\n", __func__);
+
+ if (len < 3)
+ return -EINVAL;
+
+ if (copy_from_user(val, buf, 3))
+ return -EFAULT;
+
+ /* format: <cpu#>,<cluster#> */
+ if (val[0] < '0' || val[0] > '4' ||
+ val[1] != ',' ||
+ val[2] < '0' || val[2] > '1')
+ return -EINVAL;
+
+ cpu = val[0] - '0';
+ cluster = val[2] - '0';
+ ret = bL_switch_request(cpu, cluster);
+
+ return ret ? : len;
+}
+
+static const struct file_operations bL_switcher_fops = {
+ .write = bL_switcher_write,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice bL_switcher_device = {
+ MISC_DYNAMIC_MINOR,
+ "b.L_switcher",
+ &bL_switcher_fops
+};
+
+static int __init bL_switcher_dummy_if_init(void)
+{
+ return misc_register(&bL_switcher_device);
+}
+
+static void __exit bL_switcher_dummy_if_exit(void)
+{
+ misc_deregister(&bL_switcher_device);
+}
+
+module_init(bL_switcher_dummy_if_init);
+module_exit(bL_switcher_dummy_if_exit);
diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 370236d..4a2b32f 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -27,6 +27,18 @@ void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
}
+extern unsigned long mcpm_entry_early_pokes[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER][2];
+
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+ unsigned long poke_phys_addr, unsigned long poke_val)
+{
+ unsigned long *poke = &mcpm_entry_early_pokes[cluster][cpu][0];
+ poke[0] = poke_phys_addr;
+ poke[1] = poke_val;
+ __cpuc_flush_dcache_area((void *)poke, 8);
+ outer_clean_range(__pa(poke), __pa(poke + 2));
+}
+
static const struct mcpm_platform_ops *platform_ops;
int __init mcpm_platform_register(const struct mcpm_platform_ops *ops)
diff --git a/arch/arm/common/mcpm_head.S b/arch/arm/common/mcpm_head.S
index 8178705..057e9c5 100644
--- a/arch/arm/common/mcpm_head.S
+++ b/arch/arm/common/mcpm_head.S
@@ -71,12 +71,19 @@ ENTRY(mcpm_entry_point)
* position independent way.
*/
adr r5, 3f
- ldmia r5, {r6, r7, r8, r11}
+ ldmia r5, {r0, r6, r7, r8, r11}
+ add r0, r5, r0 @ r0 = mcpm_entry_early_pokes
add r6, r5, r6 @ r6 = mcpm_entry_vectors
ldr r7, [r5, r7] @ r7 = mcpm_power_up_setup_phys
add r8, r5, r8 @ r8 = mcpm_sync
add r11, r5, r11 @ r11 = first_man_locks
+ @ Perform an early poke, if any
+ add r0, r0, r4, lsl #3
+ ldmia r0, {r0, r1}
+ teq r0, #0
+ strne r1, [r0]
+
mov r0, #MCPM_SYNC_CLUSTER_SIZE
mla r8, r0, r10, r8 @ r8 = sync cluster base
@@ -195,7 +202,8 @@ mcpm_entry_gated:
.align 2
-3: .word mcpm_entry_vectors - .
+3: .word mcpm_entry_early_pokes - .
+ .word mcpm_entry_vectors - 3b
.word mcpm_power_up_setup_phys - 3b
.word mcpm_sync - 3b
.word first_man_locks - 3b
@@ -214,6 +222,10 @@ first_man_locks:
ENTRY(mcpm_entry_vectors)
.space 4 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+ .type mcpm_entry_early_pokes, #object
+ENTRY(mcpm_entry_early_pokes)
+ .space 8 * MAX_NR_CLUSTERS * MAX_CPUS_PER_CLUSTER
+
.type mcpm_power_up_setup_phys, #object
ENTRY(mcpm_power_up_setup_phys)
.space 4 @ set by mcpm_sync_init()
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index accefe0..f693038 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -96,7 +96,7 @@ static inline void __cpuinit arch_counter_set_user_access(void)
asm volatile("mrc p15, 0, %0, c14, c1, 0" : "=r" (cntkctl));
/* disable user access to everything */
- cntkctl &= ~((3 << 8) | (7 << 0));
+ cntkctl &= ~((3 << 8) | (3 << 0));
asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl));
}
diff --git a/arch/arm/include/asm/bL_switcher.h b/arch/arm/include/asm/bL_switcher.h
new file mode 100644
index 0000000..482383b
--- /dev/null
+++ b/arch/arm/include/asm/bL_switcher.h
@@ -0,0 +1,83 @@
+/*
+ * arch/arm/include/asm/bL_switcher.h
+ *
+ * Created by: Nicolas Pitre, April 2012
+ * Copyright: (C) 2012 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_BL_SWITCHER_H
+#define ASM_BL_SWITCHER_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+typedef void (*bL_switch_completion_handler)(void *cookie);
+
+int bL_switch_request_cb(unsigned int cpu, unsigned int new_cluster_id,
+ bL_switch_completion_handler completer,
+ void *completer_cookie);
+static inline int bL_switch_request(unsigned int cpu, unsigned int new_cluster_id)
+{
+ return bL_switch_request_cb(cpu, new_cluster_id, NULL, NULL);
+}
+
+/*
+ * Register here to be notified about runtime enabling/disabling of
+ * the switcher.
+ *
+ * The notifier chain is called with the switcher activation lock held:
+ * the switcher will not be enabled or disabled during callbacks.
+ * Callbacks must not call bL_switcher_{get,put}_enabled().
+ */
+#define BL_NOTIFY_PRE_ENABLE 0
+#define BL_NOTIFY_POST_ENABLE 1
+#define BL_NOTIFY_PRE_DISABLE 2
+#define BL_NOTIFY_POST_DISABLE 3
+
+#ifdef CONFIG_BL_SWITCHER
+
+void bL_switch_request_detach(unsigned int cpu,
+ bL_switch_completion_handler completer);
+
+int bL_switcher_register_notifier(struct notifier_block *nb);
+int bL_switcher_unregister_notifier(struct notifier_block *nb);
+
+/*
+ * Use these functions to temporarily prevent enabling/disabling of
+ * the switcher.
+ * bL_switcher_get_enabled() returns true if the switcher is currently
+ * enabled. Each call to bL_switcher_get_enabled() must be followed
+ * by a call to bL_switcher_put_enabled(). These functions are not
+ * recursive.
+ */
+bool bL_switcher_get_enabled(void);
+void bL_switcher_put_enabled(void);
+
+int bL_switcher_trace_trigger(void);
+int bL_switcher_get_logical_index(u32 mpidr);
+
+#else
+static void bL_switch_request_detach(unsigned int cpu,
+ bL_switch_completion_handler completer) { }
+
+static inline int bL_switcher_register_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int bL_switcher_unregister_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline bool bL_switcher_get_enabled(void) { return false; }
+static inline void bL_switcher_put_enabled(void) { }
+static inline int bL_switcher_trace_trigger(void) { return 0; }
+static inline int bL_switcher_get_logical_index(u32 mpidr) { return -EUNATCH; }
+#endif /* CONFIG_BL_SWITCHER */
+
+#endif
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 17d0ae8..beb8702 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -436,4 +436,50 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size)
#define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr))
#define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr))
+/*
+ * Disabling cache access for one CPU in an ARMv7 SMP system is tricky.
+ * To do so we must:
+ *
+ * - Clear the SCTLR.C bit to prevent further cache allocations
+ * - Flush the desired level of cache
+ * - Clear the ACTLR "SMP" bit to disable local coherency
+ *
+ * ... and so without any intervening memory access in between those steps,
+ * not even to the stack.
+ *
+ * WARNING -- After this has been called:
+ *
+ * - No ldrex/strex (and similar) instructions must be used.
+ * - The CPU is obviously no longer coherent with the other CPUs.
+ * - This is unlikely to work as expected if Linux is running non-secure.
+ *
+ * Note:
+ *
+ * - This is known to apply to several ARMv7 processor implementations,
+ * however some exceptions may exist. Caveat emptor.
+ *
+ * - The clobber list is dictated by the call to v7_flush_dcache_*.
+ * fp is preserved to the stack explicitly prior disabling the cache
+ * since adding it to the clobber list is incompatible with having
+ * CONFIG_FRAME_POINTER=y. ip is saved as well if ever r12-clobbering
+ * trampoline are inserted by the linker and to keep sp 64-bit aligned.
+ */
+#define v7_exit_coherency_flush(level) \
+ asm volatile( \
+ "stmfd sp!, {fp, ip} \n\t" \
+ "mrc p15, 0, r0, c1, c0, 0 @ get SCTLR \n\t" \
+ "bic r0, r0, #"__stringify(CR_C)" \n\t" \
+ "mcr p15, 0, r0, c1, c0, 0 @ set SCTLR \n\t" \
+ "isb \n\t" \
+ "bl v7_flush_dcache_"__stringify(level)" \n\t" \
+ "clrex \n\t" \
+ "mrc p15, 0, r0, c1, c0, 1 @ get ACTLR \n\t" \
+ "bic r0, r0, #(1 << 6) @ disable local coherency \n\t" \
+ "mcr p15, 0, r0, c1, c0, 1 @ set ACTLR \n\t" \
+ "isb \n\t" \
+ "dsb \n\t" \
+ "ldmfd sp!, {fp, ip}" \
+ : : : "r0","r1","r2","r3","r4","r5","r6","r7", \
+ "r9","r10","lr","memory" )
+
#endif
diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 1f3262e..cedd372 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -61,6 +61,20 @@ static inline void set_cr(unsigned int val)
isb();
}
+static inline unsigned int get_auxcr(void)
+{
+ unsigned int val;
+ asm("mrc p15, 0, %0, c1, c0, 1 @ get AUXCR" : "=r" (val));
+ return val;
+}
+
+static inline void set_auxcr(unsigned int val)
+{
+ asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set AUXCR"
+ : : "r" (val));
+ isb();
+}
+
#ifndef CONFIG_SMP
extern void adjust_cr(unsigned long mask, unsigned long set);
#endif
diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h
index 2740c2a..3d7351c 100644
--- a/arch/arm/include/asm/hardirq.h
+++ b/arch/arm/include/asm/hardirq.h
@@ -5,7 +5,7 @@
#include <linux/threads.h>
#include <asm/irq.h>
-#define NR_IPI 6
+#define NR_IPI 7
typedef struct {
unsigned int __softirq_pending;
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 308ad7d..75bf079 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -8,6 +8,8 @@
* published by the Free Software Foundation.
*/
+#include <linux/types.h>
+
#ifndef __ASSEMBLY__
struct tag;
@@ -16,8 +18,10 @@ struct pt_regs;
struct smp_operations;
#ifdef CONFIG_SMP
#define smp_ops(ops) (&(ops))
+#define smp_init_ops(ops) (&(ops))
#else
#define smp_ops(ops) (struct smp_operations *)NULL
+#define smp_init_ops(ops) (bool (*)(void))NULL
#endif
struct machine_desc {
@@ -41,6 +45,7 @@ struct machine_desc {
unsigned char reserve_lp2 :1; /* never has lp2 */
char restart_mode; /* default restart mode */
struct smp_operations *smp; /* SMP operations */
+ bool (*smp_init)(void);
void (*fixup)(struct tag *, char **,
struct meminfo *);
void (*reserve)(void);/* reserve mem blocks */
diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h
index 0f7b762..7626a7f 100644
--- a/arch/arm/include/asm/mcpm.h
+++ b/arch/arm/include/asm/mcpm.h
@@ -42,6 +42,14 @@ extern void mcpm_entry_point(void);
void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr);
/*
+ * This sets an early poke i.e a value to be poked into some address
+ * from very early assembly code before the CPU is ungated. The
+ * address must be physical, and if 0 then nothing will happen.
+ */
+void mcpm_set_early_poke(unsigned cpu, unsigned cluster,
+ unsigned long poke_phys_addr, unsigned long poke_val);
+
+/*
* CPU/cluster power operations API for higher subsystems to use.
*/
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index f24edad..0cd7824 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -62,9 +62,19 @@ struct pmu_hw_events {
raw_spinlock_t pmu_lock;
};
+struct cpupmu_regs {
+ u32 pmc;
+ u32 pmcntenset;
+ u32 pmuseren;
+ u32 pmintenset;
+ u32 pmxevttype[8];
+ u32 pmxevtcnt[8];
+};
+
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
+ cpumask_t valid_cpus;
char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct perf_event *event);
@@ -81,6 +91,8 @@ struct arm_pmu {
int (*request_irq)(struct arm_pmu *, irq_handler_t handler);
void (*free_irq)(struct arm_pmu *);
int (*map_event)(struct perf_event *event);
+ void (*save_regs)(struct arm_pmu *, struct cpupmu_regs *);
+ void (*restore_regs)(struct arm_pmu *, struct cpupmu_regs *);
int num_events;
atomic_t active_events;
struct mutex reserve_mutex;
diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h
index ce0dbe7..f0a8627 100644
--- a/arch/arm/include/asm/psci.h
+++ b/arch/arm/include/asm/psci.h
@@ -16,6 +16,10 @@
#define PSCI_POWER_STATE_TYPE_STANDBY 0
#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1
+#define PSCI_POWER_STATE_AFFINITY_LEVEL0 0
+#define PSCI_POWER_STATE_AFFINITY_LEVEL1 1
+#define PSCI_POWER_STATE_AFFINITY_LEVEL2 2
+#define PSCI_POWER_STATE_AFFINITY_LEVEL3 3
struct psci_power_state {
u16 id;
@@ -32,5 +36,22 @@ struct psci_operations {
};
extern struct psci_operations psci_ops;
+extern struct smp_operations psci_smp_ops;
+#ifdef CONFIG_ARM_PSCI
+void psci_init(void);
+bool psci_smp_available(void);
+#else
+static inline void psci_init(void) { }
+static inline bool psci_smp_available(void) { return false; }
+#endif
+
+#ifdef CONFIG_ARM_PSCI
+extern int __init psci_probe(void);
+#else
+static inline int psci_probe(void)
+{
+ return -ENODEV;
+}
+#endif
#endif /* __ASM_ARM_PSCI_H */
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index d3a22be..610ccf3 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -81,6 +81,8 @@ extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+extern int register_ipi_completion(struct completion *completion, int cpu);
+
struct smp_operations {
#ifdef CONFIG_SMP
/*
diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 58b8b84..983fa7c 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -26,11 +26,45 @@ extern struct cputopo_arm cpu_topology[NR_CPUS];
void init_cpu_topology(void);
void store_cpu_topology(unsigned int cpuid);
const struct cpumask *cpu_coregroup_mask(int cpu);
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask);
+
+#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) { \
+ .min_interval = 1, \
+ .max_interval = 4, \
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+ .newidle_idx = 0, \
+ .wake_idx = 0, \
+ .forkexec_idx = 0, \
+ \
+ .flags = 0*SD_LOAD_BALANCE \
+ | 1*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_EXEC \
+ | 1*SD_BALANCE_FORK \
+ | 0*SD_BALANCE_WAKE \
+ | 1*SD_WAKE_AFFINE \
+ | 0*SD_SHARE_CPUPOWER \
+ | 0*SD_SHARE_PKG_RESOURCES \
+ | 0*SD_SERIALIZE \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+}
+#endif
+#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */
#else
static inline void init_cpu_topology(void) { }
static inline void store_cpu_topology(unsigned int cpuid) { }
+static inline int cluster_to_logical_mask(unsigned int socket_id,
+ cpumask_t *cluster_mask) { return -EINVAL; }
#endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5f3338e..dd9d90a 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -82,6 +82,9 @@ obj-$(CONFIG_DEBUG_LL) += debug.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o
-obj-$(CONFIG_ARM_PSCI) += psci.o
+ifeq ($(CONFIG_ARM_PSCI),y)
+obj-y += psci.o
+obj-$(CONFIG_SMP) += psci_smp.o
+endif
extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 8bac553..2725c87 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -342,7 +342,6 @@ __turn_mmu_on_loc:
.long __turn_mmu_on_end
#if defined(CONFIG_SMP)
- __CPUINIT
ENTRY(secondary_startup)
/*
* Common entry point for secondary CPUs.
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 1fd749e..1b80311 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -1049,7 +1049,8 @@ static struct notifier_block dbg_cpu_pm_nb = {
static void __init pm_init(void)
{
- cpu_pm_register_notifier(&dbg_cpu_pm_nb);
+ if (has_ossr)
+ cpu_pm_register_notifier(&dbg_cpu_pm_nb);
}
#else
static inline void pm_init(void)
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index e19edc6..3a9d627 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -12,6 +12,7 @@
*/
#define pr_fmt(fmt) "hw perfevents: " fmt
+#include <linux/cpumask.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
@@ -86,6 +87,9 @@ armpmu_map_event(struct perf_event *event,
return armpmu_map_cache_event(cache_map, config);
case PERF_TYPE_RAW:
return armpmu_map_raw_event(raw_event_mask, config);
+ default:
+ if (event->attr.type >= PERF_TYPE_MAX)
+ return armpmu_map_raw_event(raw_event_mask, config);
}
return -ENOENT;
@@ -163,6 +167,8 @@ armpmu_stop(struct perf_event *event, int flags)
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
/*
* ARM pmu always has to update the counter, so ignore
* PERF_EF_UPDATE, see comments in armpmu_start().
@@ -179,6 +185,8 @@ static void armpmu_start(struct perf_event *event, int flags)
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
/*
* ARM pmu always has to reprogram the period, so ignore
* PERF_EF_RELOAD, see the comment below.
@@ -206,6 +214,9 @@ armpmu_del(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return;
+
armpmu_stop(event, PERF_EF_UPDATE);
hw_events->events[idx] = NULL;
clear_bit(idx, hw_events->used_mask);
@@ -222,6 +233,10 @@ armpmu_add(struct perf_event *event, int flags)
int idx;
int err = 0;
+ /* An event following a process won't be stopped earlier */
+ if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus))
+ return 0;
+
perf_pmu_disable(event->pmu);
/* If we don't have a space for the counter then finish early. */
@@ -424,6 +439,10 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0;
atomic_t *active_events = &armpmu->active_events;
+ if (event->cpu != -1 &&
+ !cpumask_test_cpu(event->cpu, &armpmu->valid_cpus))
+ return -ENOENT;
+
/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 1f2740e..0b48a38 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -19,6 +19,7 @@
#define pr_fmt(fmt) "CPU PMU: " fmt
#include <linux/bitmap.h>
+#include <linux/cpu_pm.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/of.h>
@@ -31,33 +32,36 @@
#include <asm/pmu.h>
/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
+static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu);
static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
+static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs);
+
/*
* Despite the names, these two functions are CPU-specific and are used
* by the OProfile/perf code.
*/
const char *perf_pmu_name(void)
{
- if (!cpu_pmu)
+ struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
+ if (!pmu)
return NULL;
- return cpu_pmu->name;
+ return pmu->name;
}
EXPORT_SYMBOL_GPL(perf_pmu_name);
int perf_num_counters(void)
{
- int max_events = 0;
+ struct arm_pmu *pmu = per_cpu(cpu_pmu, 0);
- if (cpu_pmu != NULL)
- max_events = cpu_pmu->num_events;
+ if (!pmu)
+ return 0;
- return max_events;
+ return pmu->num_events;
}
EXPORT_SYMBOL_GPL(perf_num_counters);
@@ -75,11 +79,13 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
{
int i, irq, irqs;
struct platform_device *pmu_device = cpu_pmu->plat_device;
+ int cpu = -1;
irqs = min(pmu_device->num_resources, num_possible_cpus());
for (i = 0; i < irqs; ++i) {
- if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
+ cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
+ if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
continue;
irq = platform_get_irq(pmu_device, i);
if (irq >= 0)
@@ -91,6 +97,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
{
int i, err, irq, irqs;
struct platform_device *pmu_device = cpu_pmu->plat_device;
+ int cpu = -1;
if (!pmu_device)
return -ENODEV;
@@ -103,6 +110,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
for (i = 0; i < irqs; ++i) {
err = 0;
+ cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus);
irq = platform_get_irq(pmu_device, i);
if (irq < 0)
continue;
@@ -112,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
* assume that we're running on a uniprocessor machine and
* continue. Otherwise, continue without this interrupt.
*/
- if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+ if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
irq, i);
continue;
@@ -126,7 +134,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
return err;
}
- cpumask_set_cpu(i, &cpu_pmu->active_irqs);
+ cpumask_set_cpu(cpu, &cpu_pmu->active_irqs);
}
return 0;
@@ -135,7 +143,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
{
int cpu;
- for_each_possible_cpu(cpu) {
+ for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) {
struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
events->events = per_cpu(hw_events, cpu);
events->used_mask = per_cpu(used_mask, cpu);
@@ -148,7 +156,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
/* Ensure the PMU has sane values out of reset. */
if (cpu_pmu->reset)
- on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+ on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1);
}
/*
@@ -160,21 +168,46 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
static int __cpuinit cpu_pmu_notify(struct notifier_block *b,
unsigned long action, void *hcpu)
{
+ struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu);
+
if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
return NOTIFY_DONE;
- if (cpu_pmu && cpu_pmu->reset)
- cpu_pmu->reset(cpu_pmu);
+ if (pmu && pmu->reset)
+ pmu->reset(pmu);
else
return NOTIFY_DONE;
return NOTIFY_OK;
}
+static int cpu_pmu_pm_notify(struct notifier_block *b,
+ unsigned long action, void *hcpu)
+{
+ int cpu = smp_processor_id();
+ struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu);
+ struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu);
+
+ if (!pmu)
+ return NOTIFY_DONE;
+
+ if (action == CPU_PM_ENTER && pmu->save_regs) {
+ pmu->save_regs(pmu, pmuregs);
+ } else if (action == CPU_PM_EXIT && pmu->restore_regs) {
+ pmu->restore_regs(pmu, pmuregs);
+ }
+
+ return NOTIFY_OK;
+}
+
static struct notifier_block __cpuinitdata cpu_pmu_hotplug_notifier = {
.notifier_call = cpu_pmu_notify,
};
+static struct notifier_block __cpuinitdata cpu_pmu_pm_notifier = {
+ .notifier_call = cpu_pmu_pm_notify,
+};
+
/*
* PMU platform driver and devicetree bindings.
*/
@@ -246,6 +279,9 @@ static int probe_current_pmu(struct arm_pmu *pmu)
}
}
+ /* assume PMU support all the CPUs in this case */
+ cpumask_setall(&pmu->valid_cpus);
+
put_cpu();
return ret;
}
@@ -253,15 +289,10 @@ static int probe_current_pmu(struct arm_pmu *pmu)
static int cpu_pmu_device_probe(struct platform_device *pdev)
{
const struct of_device_id *of_id;
- int (*init_fn)(struct arm_pmu *);
struct device_node *node = pdev->dev.of_node;
struct arm_pmu *pmu;
- int ret = -ENODEV;
-
- if (cpu_pmu) {
- pr_info("attempt to register multiple PMU devices!");
- return -ENOSPC;
- }
+ int ret = 0;
+ int cpu;
pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
if (!pmu) {
@@ -270,8 +301,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
}
if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) {
- init_fn = of_id->data;
- ret = init_fn(pmu);
+ smp_call_func_t init_fn = (smp_call_func_t)of_id->data;
+ struct device_node *ncluster;
+ int cluster = -1;
+ cpumask_t sibling_mask;
+
+ ncluster = of_parse_phandle(node, "cluster", 0);
+ if (ncluster) {
+ int len;
+ const u32 *hwid;
+ hwid = of_get_property(ncluster, "reg", &len);
+ if (hwid && len == 4)
+ cluster = be32_to_cpup(hwid);
+ }
+ /* set sibling mask to all cpu mask if socket is not specified */
+ if (cluster == -1 ||
+ cluster_to_logical_mask(cluster, &sibling_mask))
+ cpumask_setall(&sibling_mask);
+
+ smp_call_function_any(&sibling_mask, init_fn, pmu, 1);
+
+ /* now set the valid_cpus after init */
+ cpumask_copy(&pmu->valid_cpus, &sibling_mask);
} else {
ret = probe_current_pmu(pmu);
}
@@ -281,10 +332,12 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
goto out_free;
}
- cpu_pmu = pmu;
- cpu_pmu->plat_device = pdev;
- cpu_pmu_init(cpu_pmu);
- ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
+ for_each_cpu_mask(cpu, pmu->valid_cpus)
+ per_cpu(cpu_pmu, cpu) = pmu;
+
+ pmu->plat_device = pdev;
+ cpu_pmu_init(pmu);
+ ret = armpmu_register(pmu, -1);
if (!ret)
return 0;
@@ -313,9 +366,17 @@ static int __init register_pmu_driver(void)
if (err)
return err;
+ err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier);
+ if (err) {
+ unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+ return err;
+ }
+
err = platform_driver_register(&cpu_pmu_driver);
- if (err)
+ if (err) {
+ cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier);
unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
+ }
return err;
}
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 039cffb..654db50 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -950,6 +950,51 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
}
#endif
+static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu,
+ struct cpupmu_regs *regs)
+{
+ unsigned int cnt;
+ asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc));
+ if (!(regs->pmc & ARMV7_PMNC_E))
+ return;
+
+ asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset));
+ asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren));
+ asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset));
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0]));
+ for (cnt = ARMV7_IDX_COUNTER0;
+ cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+ armv7_pmnc_select_counter(cnt);
+ asm volatile("mrc p15, 0, %0, c9, c13, 1"
+ : "=r"(regs->pmxevttype[cnt]));
+ asm volatile("mrc p15, 0, %0, c9, c13, 2"
+ : "=r"(regs->pmxevtcnt[cnt]));
+ }
+ return;
+}
+
+static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu,
+ struct cpupmu_regs *regs)
+{
+ unsigned int cnt;
+ if (!(regs->pmc & ARMV7_PMNC_E))
+ return;
+
+ asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset));
+ asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren));
+ asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset));
+ asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0]));
+ for (cnt = ARMV7_IDX_COUNTER0;
+ cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
+ armv7_pmnc_select_counter(cnt);
+ asm volatile("mcr p15, 0, %0, c9, c13, 1"
+ : : "r"(regs->pmxevttype[cnt]));
+ asm volatile("mcr p15, 0, %0, c9, c13, 2"
+ : : "r"(regs->pmxevtcnt[cnt]));
+ }
+ asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc));
+}
+
static void armv7pmu_enable_event(struct perf_event *event)
{
unsigned long flags;
@@ -1223,6 +1268,8 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->start = armv7pmu_start;
cpu_pmu->stop = armv7pmu_stop;
cpu_pmu->reset = armv7pmu_reset;
+ cpu_pmu->save_regs = armv7pmu_save_regs;
+ cpu_pmu->restore_regs = armv7pmu_restore_regs;
cpu_pmu->max_period = (1LLU << 32) - 1;
};
@@ -1240,7 +1287,7 @@ static u32 armv7_read_num_pmnc_events(void)
static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A8";
+ cpu_pmu->name = "ARMv7_Cortex_A8";
cpu_pmu->map_event = armv7_a8_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1249,7 +1296,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A9";
+ cpu_pmu->name = "ARMv7_Cortex_A9";
cpu_pmu->map_event = armv7_a9_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1258,7 +1305,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A5";
+ cpu_pmu->name = "ARMv7_Cortex_A5";
cpu_pmu->map_event = armv7_a5_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
return 0;
@@ -1267,7 +1314,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A15";
+ cpu_pmu->name = "ARMv7_Cortex_A15";
cpu_pmu->map_event = armv7_a15_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
@@ -1277,7 +1324,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu)
static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu)
{
armv7pmu_init(cpu_pmu);
- cpu_pmu->name = "ARMv7 Cortex-A7";
+ cpu_pmu->name = "ARMv7_Cortex_A7";
cpu_pmu->map_event = armv7_a7_map_event;
cpu_pmu->num_events = armv7_read_num_pmnc_events();
cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c
index 3653164..0daf4f2 100644
--- a/arch/arm/kernel/psci.c
+++ b/arch/arm/kernel/psci.c
@@ -17,6 +17,7 @@
#include <linux/init.h>
#include <linux/of.h>
+#include <linux/string.h>
#include <asm/compiler.h>
#include <asm/errno.h>
@@ -26,6 +27,11 @@
struct psci_operations psci_ops;
+/* Type of psci support. Currently can only be enabled or disabled */
+#define PSCI_SUP_DISABLED 0
+#define PSCI_SUP_ENABLED 1
+
+static unsigned int psci;
static int (*invoke_psci_fn)(u32, u32, u32, u32);
enum psci_function {
@@ -42,6 +48,7 @@ static u32 psci_function_id[PSCI_FN_MAX];
#define PSCI_RET_EOPNOTSUPP -1
#define PSCI_RET_EINVAL -2
#define PSCI_RET_EPERM -3
+#define PSCI_RET_EALREADYON -4
static int psci_to_linux_errno(int errno)
{
@@ -54,6 +61,8 @@ static int psci_to_linux_errno(int errno)
return -EINVAL;
case PSCI_RET_EPERM:
return -EPERM;
+ case PSCI_RET_EALREADYON:
+ return -EAGAIN;
};
return -EINVAL;
@@ -158,15 +167,18 @@ static const struct of_device_id psci_of_match[] __initconst = {
{},
};
-static int __init psci_init(void)
+void __init psci_init(void)
{
struct device_node *np;
const char *method;
u32 id;
+ if (psci == PSCI_SUP_DISABLED)
+ return;
+
np = of_find_matching_node(NULL, psci_of_match);
if (!np)
- return 0;
+ return;
pr_info("probing function IDs from device-tree\n");
@@ -206,6 +218,35 @@ static int __init psci_init(void)
out_put_node:
of_node_put(np);
- return 0;
+ return;
+}
+
+int __init psci_probe(void)
+{
+ struct device_node *np;
+ int ret = -ENODEV;
+
+ if (psci == PSCI_SUP_ENABLED) {
+ np = of_find_matching_node(NULL, psci_of_match);
+ if (np)
+ ret = 0;
+ }
+
+ of_node_put(np);
+ return ret;
+}
+
+static int __init early_psci(char *val)
+{
+ int ret = 0;
+
+ if (strcmp(val, "enable") == 0)
+ psci = PSCI_SUP_ENABLED;
+ else if (strcmp(val, "disable") == 0)
+ psci = PSCI_SUP_DISABLED;
+ else
+ ret = -EINVAL;
+
+ return ret;
}
-early_initcall(psci_init);
+early_param("psci", early_psci);
diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c
new file mode 100644
index 0000000..23a1142
--- /dev/null
+++ b/arch/arm/kernel/psci_smp.c
@@ -0,0 +1,84 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/smp.h>
+#include <linux/of.h>
+
+#include <asm/psci.h>
+#include <asm/smp_plat.h>
+
+/*
+ * psci_smp assumes that the following is true about PSCI:
+ *
+ * cpu_suspend Suspend the execution on a CPU
+ * @state we don't currently describe affinity levels, so just pass 0.
+ * @entry_point the first instruction to be executed on return
+ * returns 0 success, < 0 on failure
+ *
+ * cpu_off Power down a CPU
+ * @state we don't currently describe affinity levels, so just pass 0.
+ * no return on successful call
+ *
+ * cpu_on Power up a CPU
+ * @cpuid cpuid of target CPU, as from MPIDR
+ * @entry_point the first instruction to be executed on return
+ * returns 0 success, < 0 on failure
+ *
+ * migrate Migrate the context to a different CPU
+ * @cpuid cpuid of target CPU, as from MPIDR
+ * returns 0 success, < 0 on failure
+ *
+ */
+
+extern void secondary_startup(void);
+
+static int __cpuinit psci_boot_secondary(unsigned int cpu,
+ struct task_struct *idle)
+{
+ if (psci_ops.cpu_on)
+ return psci_ops.cpu_on(cpu_logical_map(cpu),
+ __pa(secondary_startup));
+ return -ENODEV;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void __ref psci_cpu_die(unsigned int cpu)
+{
+ const struct psci_power_state ps = {
+ .type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
+ };
+
+ if (psci_ops.cpu_off)
+ psci_ops.cpu_off(ps);
+
+ /* We should never return */
+ panic("psci: cpu %d failed to shutdown\n", cpu);
+}
+#else
+#define psci_cpu_die NULL
+#endif
+
+bool __init psci_smp_available(void)
+{
+ /* is cpu_on available at least? */
+ return (psci_ops.cpu_on != NULL);
+}
+
+struct smp_operations __initdata psci_smp_ops = {
+ .smp_boot_secondary = psci_boot_secondary,
+ .cpu_die = psci_cpu_die,
+};
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index b4b1d39..6002fe0 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -37,6 +37,7 @@
#include <asm/cputype.h>
#include <asm/elf.h>
#include <asm/procinfo.h>
+#include <asm/psci.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp_plat.h>
@@ -261,6 +262,19 @@ static int cpu_has_aliasing_icache(unsigned int arch)
int aliasing_icache;
unsigned int id_reg, num_sets, line_size;
+#ifdef CONFIG_BIG_LITTLE
+ /*
+ * We expect a combination of Cortex-A15 and Cortex-A7 cores.
+ * A7 = VIPT aliasing I-cache
+ * A15 = PIPT (non-aliasing) I-cache
+ * To cater for this discrepancy, let's assume aliasing I-cache
+ * all the time. This means unneeded extra work on the A15 but
+ * only ptrace is affected which is not performance critical.
+ */
+ if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc0f0)
+ return 1;
+#endif
+
/* PIPT caches never alias. */
if (icache_is_pipt())
return 0;
@@ -796,9 +810,15 @@ void __init setup_arch(char **cmdline_p)
unflatten_device_tree();
arm_dt_init_cpu_maps();
+ psci_init();
#ifdef CONFIG_SMP
if (is_smp()) {
- smp_set_ops(mdesc->smp);
+ if (!mdesc->smp_init || !mdesc->smp_init()) {
+ if (psci_smp_available())
+ smp_set_ops(&psci_smp_ops);
+ else if (mdesc->smp)
+ smp_set_ops(mdesc->smp);
+ }
smp_init_cpus();
}
#endif
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 987dcf3..b5c1e63 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -4,6 +4,7 @@
#include <asm/assembler.h>
#include <asm/glue-cache.h>
#include <asm/glue-proc.h>
+#include "entry-header.S"
.text
/*
@@ -30,9 +31,8 @@ ENTRY(__cpu_suspend)
mov r2, r5 @ virtual SP
ldr r3, =sleep_save_sp
#ifdef CONFIG_SMP
- ALT_SMP(mrc p15, 0, lr, c0, c0, 5)
- ALT_UP(mov lr, #0)
- and lr, lr, #15
+ get_thread_info r5
+ ldr lr, [r5, #TI_CPU] @ cpu logical index
add r3, r3, lr, lsl #2
#endif
bl __cpu_suspend_save
@@ -82,10 +82,13 @@ ENDPROC(cpu_resume_after_mmu)
.align
ENTRY(cpu_resume)
#ifdef CONFIG_SMP
+ mov r1, #0 @ fall-back logical index for UP
+ ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
+ ALT_UP_B(1f)
+ bic r0, #0xff000000
+ bl cpu_logical_index @ return logical index in r1
+1:
adr r0, sleep_save_sp
- ALT_SMP(mrc p15, 0, r1, c0, c0, 5)
- ALT_UP(mov r1, #0)
- and r1, r1, #15
ldr r0, [r0, r1, lsl #2] @ stack phys addr
#else
ldr r0, sleep_save_sp @ stack phys addr
@@ -102,3 +105,20 @@ sleep_save_sp:
.rept CONFIG_NR_CPUS
.long 0 @ preserve stack phys ptr here
.endr
+
+#ifdef CONFIG_SMP
+cpu_logical_index:
+ adr r3, cpu_map_ptr
+ ldr r2, [r3]
+ add r3, r3, r2 @ virt_to_phys(__cpu_logical_map)
+ mov r1, #0
+1:
+ ldr r2, [r3, r1, lsl #2]
+ cmp r2, r0
+ moveq pc, lr
+ add r1, r1, #1
+ b 1b
+
+cpu_map_ptr:
+ .long __cpu_logical_map - .
+#endif
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5919eb4..dc2843f 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -46,6 +46,9 @@
#include <asm/virt.h>
#include <asm/mach/arch.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/arm-ipi.h>
+
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -57,7 +60,7 @@ struct secondary_data secondary_data;
* control for which core is the next to come out of the secondary
* boot "holding pen"
*/
-volatile int __cpuinitdata pen_release = -1;
+volatile int pen_release = -1;
enum ipi_msg_type {
IPI_WAKEUP,
@@ -66,6 +69,7 @@ enum ipi_msg_type {
IPI_CALL_FUNC,
IPI_CALL_FUNC_SINGLE,
IPI_CPU_STOP,
+ IPI_COMPLETION,
};
static DECLARE_COMPLETION(cpu_running);
@@ -463,6 +467,7 @@ static const char *ipi_types[NR_IPI] = {
S(IPI_CALL_FUNC, "Function call interrupts"),
S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"),
S(IPI_CPU_STOP, "CPU stop interrupts"),
+ S(IPI_COMPLETION, "completion interrupts"),
};
void show_ipi_list(struct seq_file *p, int prec)
@@ -588,6 +593,19 @@ static void ipi_cpu_stop(unsigned int cpu)
cpu_relax();
}
+static DEFINE_PER_CPU(struct completion *, cpu_completion);
+
+int register_ipi_completion(struct completion *completion, int cpu)
+{
+ per_cpu(cpu_completion, cpu) = completion;
+ return IPI_COMPLETION;
+}
+
+static void ipi_complete(unsigned int cpu)
+{
+ complete(per_cpu(cpu_completion, cpu));
+}
+
/*
* Main handler for inter-processor interrupts
*/
@@ -604,6 +622,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
if (ipinr < NR_IPI)
__inc_irq_stat(cpu, ipi_irqs[ipinr]);
+ trace_arm_ipi_entry(ipinr);
switch (ipinr) {
case IPI_WAKEUP:
break;
@@ -638,11 +657,18 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
irq_exit();
break;
+ case IPI_COMPLETION:
+ irq_enter();
+ ipi_complete(cpu);
+ irq_exit();
+ break;
+
default:
printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n",
cpu, ipinr);
break;
}
+ trace_arm_ipi_exit(ipinr);
set_irq_regs(old_regs);
}
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index c5a5954..677da58d 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <asm/cputype.h>
+#include <asm/smp_plat.h>
#include <asm/topology.h>
/*
@@ -289,6 +290,140 @@ void store_cpu_topology(unsigned int cpuid)
cpu_topology[cpuid].socket_id, mpidr);
}
+
+#ifdef CONFIG_SCHED_HMP
+
+static const char * const little_cores[] = {
+ "arm,cortex-a7",
+ NULL,
+};
+
+static bool is_little_cpu(struct device_node *cn)
+{
+ const char * const *lc;
+ for (lc = little_cores; *lc; lc++)
+ if (of_device_is_compatible(cn, *lc))
+ return true;
+ return false;
+}
+
+void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
+ struct cpumask *slow)
+{
+ struct device_node *cn = NULL;
+ int cpu;
+
+ cpumask_clear(fast);
+ cpumask_clear(slow);
+
+ /*
+ * Use the config options if they are given. This helps testing
+ * HMP scheduling on systems without a big.LITTLE architecture.
+ */
+ if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
+ if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
+ WARN(1, "Failed to parse HMP fast cpu mask!\n");
+ if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
+ WARN(1, "Failed to parse HMP slow cpu mask!\n");
+ return;
+ }
+
+ /*
+ * Else, parse device tree for little cores.
+ */
+ while ((cn = of_find_node_by_type(cn, "cpu"))) {
+
+ const u32 *mpidr;
+ int len;
+
+ mpidr = of_get_property(cn, "reg", &len);
+ if (!mpidr || len != 4) {
+ pr_err("* %s missing reg property\n", cn->full_name);
+ continue;
+ }
+
+ cpu = get_logical_index(be32_to_cpup(mpidr));
+ if (cpu == -EINVAL) {
+ pr_err("couldn't get logical index for mpidr %x\n",
+ be32_to_cpup(mpidr));
+ break;
+ }
+
+ if (is_little_cpu(cn))
+ cpumask_set_cpu(cpu, slow);
+ else
+ cpumask_set_cpu(cpu, fast);
+ }
+
+ if (!cpumask_empty(fast) && !cpumask_empty(slow))
+ return;
+
+ /*
+ * We didn't find both big and little cores so let's call all cores
+ * fast as this will keep the system running, with all cores being
+ * treated equal.
+ */
+ cpumask_setall(fast);
+ cpumask_clear(slow);
+}
+
+struct cpumask hmp_slow_cpu_mask;
+
+void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
+{
+ struct cpumask hmp_fast_cpu_mask;
+ struct hmp_domain *domain;
+
+ arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask);
+
+ /*
+ * Initialize hmp_domains
+ * Must be ordered with respect to compute capacity.
+ * Fastest domain at head of list.
+ */
+ if(!cpumask_empty(&hmp_slow_cpu_mask)) {
+ domain = (struct hmp_domain *)
+ kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+ cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask);
+ cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+ list_add(&domain->hmp_domains, hmp_domains_list);
+ }
+ domain = (struct hmp_domain *)
+ kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+ cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask);
+ cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+ list_add(&domain->hmp_domains, hmp_domains_list);
+}
+#endif /* CONFIG_SCHED_HMP */
+
+
+/*
+ * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
+ * @socket_id: cluster HW identifier
+ * @cluster_mask: the cpumask location to be initialized, modified by the
+ * function only if return value == 0
+ *
+ * Return:
+ *
+ * 0 on success
+ * -EINVAL if cluster_mask is NULL or there is no record matching socket_id
+ */
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
+{
+ int cpu;
+
+ if (!cluster_mask)
+ return -EINVAL;
+
+ for_each_online_cpu(cpu)
+ if (socket_id == topology_physical_package_id(cpu)) {
+ cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
/*
* init_cpu_topology is called at boot when only one cpu is running
* which prevent simultaneous write access to cpu_topology array
diff --git a/arch/arm/mach-exynos/mach-exynos5-dt.c b/arch/arm/mach-exynos/mach-exynos5-dt.c
index 753b94f..d88234e 100644
--- a/arch/arm/mach-exynos/mach-exynos5-dt.c
+++ b/arch/arm/mach-exynos/mach-exynos5-dt.c
@@ -14,6 +14,7 @@
#include <linux/memblock.h>
#include <linux/io.h>
#include <linux/clocksource.h>
+#include <linux/dma-mapping.h>
#include <asm/mach/arch.h>
#include <mach/regs-pmu.h>
@@ -23,11 +24,31 @@
#include "common.h"
+static u64 dma_mask64 = DMA_BIT_MASK(64);
+
static void __init exynos5_dt_map_io(void)
{
exynos_init_io(NULL, 0);
}
+static int exynos5250_platform_notifier(struct notifier_block *nb,
+ unsigned long event, void *__dev)
+{
+ struct device *dev = __dev;
+
+ if (event != BUS_NOTIFY_ADD_DEVICE)
+ return NOTIFY_DONE;
+
+ dev->dma_mask = &dma_mask64;
+ dev->coherent_dma_mask = DMA_BIT_MASK(64);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block exynos5250_platform_nb = {
+ .notifier_call = exynos5250_platform_notifier,
+};
+
static void __init exynos5_dt_machine_init(void)
{
struct device_node *i2c_np;
@@ -52,6 +73,11 @@ static void __init exynos5_dt_machine_init(void)
}
}
+ if (config_enabled(CONFIG_ARM_LPAE) &&
+ of_machine_is_compatible("samsung,exynos5250"))
+ bus_register_notifier(&platform_bus_type,
+ &exynos5250_platform_nb);
+
of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
}
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index 5907e10..dd3d597 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -1,5 +1,7 @@
config ARCH_VEXPRESS
bool "ARM Ltd. Versatile Express family" if ARCH_MULTI_V7
+ select ARCH_HAS_CPUFREQ
+ select ARCH_HAS_OPP
select ARCH_REQUIRE_GPIOLIB
select ARM_AMBA
select ARM_GIC
@@ -56,5 +58,23 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA
config ARCH_VEXPRESS_CA9X4
bool "Versatile Express Cortex-A9x4 tile"
+ select ARM_ERRATA_643719
+
+config ARCH_VEXPRESS_DCSCB
+ bool "Dual Cluster System Control Block (DCSCB) support"
+ depends on MCPM
+ select ARM_CCI
+ help
+ Support for the Dual Cluster System Configuration Block (DCSCB).
+ This is needed to provide CPU and cluster power management
+ on RTSM implementing big.LITTLE.
+
+config ARCH_VEXPRESS_TC2
+ bool "TC2 cluster management"
+ depends on MCPM
+ select VEXPRESS_SPC
+ select ARM_CCI
+ help
+ Support for CPU and cluster power management on TC2.
endmenu
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index 42703e8..14193dc 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -6,5 +6,13 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
obj-y := v2m.o
obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o
+obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o dcscb_setup.o
+CFLAGS_REMOVE_dcscb.o = -pg
+obj-$(CONFIG_ARCH_VEXPRESS_TC2) += tc2_pm.o tc2_pm_setup.o
+CFLAGS_REMOVE_tc2_pm.o = -pg
+ifeq ($(CONFIG_ARCH_VEXPRESS_TC2),y)
+obj-$(CONFIG_ARM_PSCI) += tc2_pm_psci.o
+CFLAGS_REMOVE_tc2_pm_psci.o = -pg
+endif
obj-$(CONFIG_SMP) += platsmp.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o
diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h
index f134cd4..bde4374 100644
--- a/arch/arm/mach-vexpress/core.h
+++ b/arch/arm/mach-vexpress/core.h
@@ -6,6 +6,8 @@
void vexpress_dt_smp_map_io(void);
+bool vexpress_smp_init_ops(void);
+
extern struct smp_operations vexpress_smp_ops;
extern void vexpress_cpu_die(unsigned int cpu);
diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c
new file mode 100644
index 0000000..b35700f
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb.c
@@ -0,0 +1,236 @@
+/*
+ * arch/arm/mach-vexpress/dcscb.c - Dual Cluster System Configuration Block
+ *
+ * Created by: Nicolas Pitre, May 2012
+ * Copyright: (C) 2012-2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/of_address.h>
+#include <linux/vexpress.h>
+#include <linux/arm-cci.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/psci.h>
+
+
+#define RST_HOLD0 0x0
+#define RST_HOLD1 0x4
+#define SYS_SWRESET 0x8
+#define RST_STAT0 0xc
+#define RST_STAT1 0x10
+#define EAG_CFG_R 0x20
+#define EAG_CFG_W 0x24
+#define KFC_CFG_R 0x28
+#define KFC_CFG_W 0x2c
+#define DCS_CFG_R 0x30
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() while its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t dcscb_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+static void __iomem *dcscb_base;
+static int dcscb_use_count[4][2];
+static int dcscb_allcpus_mask[2];
+
+static int dcscb_power_up(unsigned int cpu, unsigned int cluster)
+{
+ unsigned int rst_hold, cpumask = (1 << cpu);
+ unsigned int all_mask = dcscb_allcpus_mask[cluster];
+
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ if (cpu >= 4 || cluster >= 2)
+ return -EINVAL;
+
+ /*
+ * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+ * variant exists, we need to disable IRQs manually here.
+ */
+ local_irq_disable();
+ arch_spin_lock(&dcscb_lock);
+
+ dcscb_use_count[cpu][cluster]++;
+ if (dcscb_use_count[cpu][cluster] == 1) {
+ rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+ if (rst_hold & (1 << 8)) {
+ /* remove cluster reset and add individual CPU's reset */
+ rst_hold &= ~(1 << 8);
+ rst_hold |= all_mask;
+ }
+ rst_hold &= ~(cpumask | (cpumask << 4));
+ writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+ } else if (dcscb_use_count[cpu][cluster] != 2) {
+ /*
+ * The only possible values are:
+ * 0 = CPU down
+ * 1 = CPU (still) up
+ * 2 = CPU requested to be up before it had a chance
+ * to actually make itself down.
+ * Any other value is a bug.
+ */
+ BUG();
+ }
+
+ arch_spin_unlock(&dcscb_lock);
+ local_irq_enable();
+
+ return 0;
+}
+
+static void dcscb_power_down(void)
+{
+ unsigned int mpidr, cpu, cluster, rst_hold, cpumask, all_mask;
+ bool last_man = false, skip_wfi = false;
+
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ cpumask = (1 << cpu);
+ all_mask = dcscb_allcpus_mask[cluster];
+
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ BUG_ON(cpu >= 4 || cluster >= 2);
+
+ __mcpm_cpu_going_down(cpu, cluster);
+
+ arch_spin_lock(&dcscb_lock);
+ BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+ dcscb_use_count[cpu][cluster]--;
+ if (dcscb_use_count[cpu][cluster] == 0) {
+ rst_hold = readl_relaxed(dcscb_base + RST_HOLD0 + cluster * 4);
+ rst_hold |= cpumask;
+ if (((rst_hold | (rst_hold >> 4)) & all_mask) == all_mask) {
+ rst_hold |= (1 << 8);
+ last_man = true;
+ }
+ writel_relaxed(rst_hold, dcscb_base + RST_HOLD0 + cluster * 4);
+ } else if (dcscb_use_count[cpu][cluster] == 1) {
+ /*
+ * A power_up request went ahead of us.
+ * Even if we do not want to shut this CPU down,
+ * the caller expects a certain state as if the WFI
+ * was aborted. So let's continue with cache cleaning.
+ */
+ skip_wfi = true;
+ } else
+ BUG();
+
+ if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+ arch_spin_unlock(&dcscb_lock);
+
+ /* Flush all cache levels for this cluster. */
+ v7_exit_coherency_flush(all);
+
+ /*
+ * This is a harmless no-op. On platforms with a real
+ * outer cache this might either be needed or not,
+ * depending on where the outer cache sits.
+ */
+ outer_flush_all();
+
+ /*
+ * Disable cluster-level coherency by masking
+ * incoming snoops and DVM messages:
+ */
+ cci_disable_port_by_cpu(mpidr);
+
+ __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+ } else {
+ arch_spin_unlock(&dcscb_lock);
+
+ /* Disable and flush the local CPU cache. */
+ v7_exit_coherency_flush(louis);
+ }
+
+ __mcpm_cpu_down(cpu, cluster);
+
+ /* Now we are prepared for power-down, do it: */
+ dsb();
+ if (!skip_wfi)
+ wfi();
+
+ /* Not dead at this point? Let our caller cope. */
+}
+
+static const struct mcpm_platform_ops dcscb_power_ops = {
+ .power_up = dcscb_power_up,
+ .power_down = dcscb_power_down,
+};
+
+static void __init dcscb_usage_count_init(void)
+{
+ unsigned int mpidr, cpu, cluster;
+
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ BUG_ON(cpu >= 4 || cluster >= 2);
+ dcscb_use_count[cpu][cluster] = 1;
+}
+
+extern void dcscb_power_up_setup(unsigned int affinity_level);
+
+static int __init dcscb_init(void)
+{
+ struct device_node *node;
+ unsigned int cfg;
+ int ret;
+
+ ret = psci_probe();
+ if (!ret) {
+ pr_debug("psci found. Aborting native init\n");
+ return -ENODEV;
+ }
+
+ if (!cci_probed())
+ return -ENODEV;
+
+ node = of_find_compatible_node(NULL, NULL, "arm,rtsm,dcscb");
+ if (!node)
+ return -ENODEV;
+ dcscb_base = of_iomap(node, 0);
+ if (!dcscb_base)
+ return -EADDRNOTAVAIL;
+ cfg = readl_relaxed(dcscb_base + DCS_CFG_R);
+ dcscb_allcpus_mask[0] = (1 << (((cfg >> 16) >> (0 << 2)) & 0xf)) - 1;
+ dcscb_allcpus_mask[1] = (1 << (((cfg >> 16) >> (1 << 2)) & 0xf)) - 1;
+ dcscb_usage_count_init();
+
+ ret = mcpm_platform_register(&dcscb_power_ops);
+ if (!ret)
+ ret = mcpm_sync_init(dcscb_power_up_setup);
+ if (ret) {
+ iounmap(dcscb_base);
+ return ret;
+ }
+
+ pr_info("VExpress DCSCB support installed\n");
+
+ /*
+ * Future entries into the kernel can now go
+ * through the cluster entry vectors.
+ */
+ vexpress_flags_set(virt_to_phys(mcpm_entry_point));
+
+ return 0;
+}
+
+early_initcall(dcscb_init);
diff --git a/arch/arm/mach-vexpress/dcscb_setup.S b/arch/arm/mach-vexpress/dcscb_setup.S
new file mode 100644
index 0000000..4bb7fbe
--- /dev/null
+++ b/arch/arm/mach-vexpress/dcscb_setup.S
@@ -0,0 +1,38 @@
+/*
+ * arch/arm/include/asm/dcscb_setup.S
+ *
+ * Created by: Dave Martin, 2012-06-22
+ * Copyright: (C) 2012-2013 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+
+
+ENTRY(dcscb_power_up_setup)
+
+ cmp r0, #0 @ check affinity level
+ beq 2f
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ * The ACTLR SMP bit does not need to be set here, because cpu_resume()
+ * already restores that.
+ *
+ * A15/A7 may not require explicit L2 invalidation on reset, dependent
+ * on hardware integration decisions.
+ * For now, this code assumes that L2 is either already invalidated,
+ * or invalidation is not required.
+ */
+
+ b cci_enable_port_for_self
+
+2: @ Implementation-specific local CPU setup operations should go here,
+ @ if any. In this case, there is nothing to do.
+
+ bx lr
+
+ENDPROC(dcscb_power_up_setup)
diff --git a/arch/arm/mach-vexpress/include/mach/tc2.h b/arch/arm/mach-vexpress/include/mach/tc2.h
new file mode 100644
index 0000000..d3b5a22
--- /dev/null
+++ b/arch/arm/mach-vexpress/include/mach/tc2.h
@@ -0,0 +1,10 @@
+#ifndef __MACH_TC2_H
+#define __MACH_TC2_H
+
+/*
+ * cpu and cluster limits
+ */
+#define TC2_MAX_CPUS 3
+#define TC2_MAX_CLUSTERS 2
+
+#endif
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index dc1ace5..993c9ae 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -12,9 +12,11 @@
#include <linux/errno.h>
#include <linux/smp.h>
#include <linux/io.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/vexpress.h>
+#include <asm/mcpm.h>
#include <asm/smp_scu.h>
#include <asm/mach/map.h>
@@ -203,3 +205,21 @@ struct smp_operations __initdata vexpress_smp_ops = {
.cpu_die = vexpress_cpu_die,
#endif
};
+
+bool __init vexpress_smp_init_ops(void)
+{
+#ifdef CONFIG_MCPM
+ /*
+ * The best way to detect a multi-cluster configuration at the moment
+ * is to look for the presence of a CCI in the system.
+ * Override the default vexpress_smp_ops if so.
+ */
+ struct device_node *node;
+ node = of_find_compatible_node(NULL, NULL, "arm,cci-400");
+ if (node && of_device_is_available(node)) {
+ mcpm_smp_set_ops();
+ return true;
+ }
+#endif
+ return false;
+}
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
new file mode 100644
index 0000000..9fc264a
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -0,0 +1,277 @@
+/*
+ * arch/arm/mach-vexpress/tc2_pm.c - TC2 power management support
+ *
+ * Created by: Nicolas Pitre, October 2012
+ * Copyright: (C) 2012 Linaro Limited
+ *
+ * Some portions of this file were originally written by Achin Gupta
+ * Copyright: (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/psci.h>
+
+#include <mach/motherboard.h>
+#include <mach/tc2.h>
+
+#include <linux/vexpress.h>
+#include <linux/arm-cci.h>
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() after its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t tc2_pm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+static int tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS];
+
+static int tc2_pm_power_up(unsigned int cpu, unsigned int cluster)
+{
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ if (cluster >= TC2_MAX_CLUSTERS ||
+ cpu >= vexpress_spc_get_nb_cpus(cluster))
+ return -EINVAL;
+
+ /*
+ * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+ * variant exists, we need to disable IRQs manually here.
+ */
+ local_irq_disable();
+ arch_spin_lock(&tc2_pm_lock);
+
+ if (!tc2_pm_use_count[0][cluster] &&
+ !tc2_pm_use_count[1][cluster] &&
+ !tc2_pm_use_count[2][cluster])
+ vexpress_spc_powerdown_enable(cluster, 0);
+
+ tc2_pm_use_count[cpu][cluster]++;
+ if (tc2_pm_use_count[cpu][cluster] == 1) {
+ vexpress_spc_write_resume_reg(cluster, cpu,
+ virt_to_phys(mcpm_entry_point));
+ vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1);
+ } else if (tc2_pm_use_count[cpu][cluster] != 2) {
+ /*
+ * The only possible values are:
+ * 0 = CPU down
+ * 1 = CPU (still) up
+ * 2 = CPU requested to be up before it had a chance
+ * to actually make itself down.
+ * Any other value is a bug.
+ */
+ BUG();
+ }
+
+ arch_spin_unlock(&tc2_pm_lock);
+ local_irq_enable();
+
+ return 0;
+}
+
+static void tc2_pm_down(u64 residency)
+{
+ unsigned int mpidr, cpu, cluster;
+ bool last_man = false, skip_wfi = false;
+
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+ cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+ __mcpm_cpu_going_down(cpu, cluster);
+
+ arch_spin_lock(&tc2_pm_lock);
+ BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+ tc2_pm_use_count[cpu][cluster]--;
+ if (tc2_pm_use_count[cpu][cluster] == 0) {
+ vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 1);
+ if (!tc2_pm_use_count[0][cluster] &&
+ !tc2_pm_use_count[1][cluster] &&
+ !tc2_pm_use_count[2][cluster] &&
+ (!residency || residency > 5000)) {
+ vexpress_spc_powerdown_enable(cluster, 1);
+ vexpress_spc_set_global_wakeup_intr(1);
+ last_man = true;
+ }
+ } else if (tc2_pm_use_count[cpu][cluster] == 1) {
+ /*
+ * A power_up request went ahead of us.
+ * Even if we do not want to shut this CPU down,
+ * the caller expects a certain state as if the WFI
+ * was aborted. So let's continue with cache cleaning.
+ */
+ skip_wfi = true;
+ } else
+ BUG();
+
+ /*
+ * If the CPU is committed to power down, make sure
+ * the power controller will be in charge of waking it
+ * up upon IRQ, ie IRQ lines are cut from GIC CPU IF
+ * to the CPU by disabling the GIC CPU IF to prevent wfi
+ * from completing execution behind power controller back
+ */
+ if (!skip_wfi)
+ gic_cpu_if_down();
+
+ if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+ arch_spin_unlock(&tc2_pm_lock);
+
+ if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+ /*
+ * On the Cortex-A15 we need to disable
+ * L2 prefetching before flushing the cache.
+ */
+ asm volatile(
+ "mcr p15, 1, %0, c15, c0, 3 \n\t"
+ "isb \n\t"
+ "dsb "
+ : : "r" (0x400) );
+ }
+
+ v7_exit_coherency_flush(all);
+
+ cci_disable_port_by_cpu(mpidr);
+
+ __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+ } else {
+ /*
+ * If last man then undo any setup done previously.
+ */
+ if (last_man) {
+ vexpress_spc_powerdown_enable(cluster, 0);
+ vexpress_spc_set_global_wakeup_intr(0);
+ }
+
+ arch_spin_unlock(&tc2_pm_lock);
+
+ v7_exit_coherency_flush(louis);
+ }
+
+ __mcpm_cpu_down(cpu, cluster);
+
+ /* Now we are prepared for power-down, do it: */
+ if (!skip_wfi)
+ wfi();
+
+ /* Not dead at this point? Let our caller cope. */
+}
+
+static void tc2_pm_power_down(void)
+{
+ tc2_pm_down(0);
+}
+
+static void tc2_pm_suspend(u64 residency)
+{
+ extern void tc2_resume(void);
+ unsigned int mpidr, cpu, cluster;
+
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ vexpress_spc_write_resume_reg(cluster, cpu,
+ virt_to_phys(tc2_resume));
+
+ tc2_pm_down(residency);
+}
+
+static void tc2_pm_powered_up(void)
+{
+ unsigned int mpidr, cpu, cluster;
+ unsigned long flags;
+
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+ cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+ local_irq_save(flags);
+ arch_spin_lock(&tc2_pm_lock);
+
+ if (!tc2_pm_use_count[0][cluster] &&
+ !tc2_pm_use_count[1][cluster] &&
+ !tc2_pm_use_count[2][cluster]) {
+ vexpress_spc_powerdown_enable(cluster, 0);
+ vexpress_spc_set_global_wakeup_intr(0);
+ }
+
+ if (!tc2_pm_use_count[cpu][cluster])
+ tc2_pm_use_count[cpu][cluster] = 1;
+
+ vexpress_spc_set_cpu_wakeup_irq(cpu, cluster, 0);
+ vexpress_spc_write_resume_reg(cluster, cpu, 0);
+
+ arch_spin_unlock(&tc2_pm_lock);
+ local_irq_restore(flags);
+}
+
+static const struct mcpm_platform_ops tc2_pm_power_ops = {
+ .power_up = tc2_pm_power_up,
+ .power_down = tc2_pm_power_down,
+ .suspend = tc2_pm_suspend,
+ .powered_up = tc2_pm_powered_up,
+};
+
+static void __init tc2_pm_usage_count_init(void)
+{
+ unsigned int mpidr, cpu, cluster;
+
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+ cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+ tc2_pm_use_count[cpu][cluster] = 1;
+}
+
+extern void tc2_pm_power_up_setup(unsigned int affinity_level);
+
+static int __init tc2_pm_init(void)
+{
+ int ret;
+
+ ret = psci_probe();
+ if (!ret) {
+ pr_debug("psci found. Aborting native init\n");
+ return -ENODEV;
+ }
+
+ if (!vexpress_spc_check_loaded())
+ return -ENODEV;
+
+ tc2_pm_usage_count_init();
+
+ ret = mcpm_platform_register(&tc2_pm_power_ops);
+ if (!ret)
+ ret = mcpm_sync_init(tc2_pm_power_up_setup);
+ if (!ret)
+ pr_info("TC2 power management initialized\n");
+ return ret;
+}
+
+early_initcall(tc2_pm_init);
diff --git a/arch/arm/mach-vexpress/tc2_pm_psci.c b/arch/arm/mach-vexpress/tc2_pm_psci.c
new file mode 100644
index 0000000..c2fdc22
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm_psci.c
@@ -0,0 +1,173 @@
+/*
+ * arch/arm/mach-vexpress/tc2_pm_psci.c - TC2 PSCI support
+ *
+ * Created by: Achin Gupta, December 2012
+ * Copyright: (C) 2012 ARM Limited
+ *
+ * Some portions of this file were originally written by Nicolas Pitre
+ * Copyright: (C) 2012 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+
+#include <asm/mcpm.h>
+#include <asm/proc-fns.h>
+#include <asm/cacheflush.h>
+#include <asm/psci.h>
+#include <asm/atomic.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+
+#include <mach/motherboard.h>
+#include <mach/tc2.h>
+
+#include <linux/vexpress.h>
+
+/*
+ * Platform specific state id understood by the firmware and used to
+ * program the power controller
+ */
+#define PSCI_POWER_STATE_ID 0
+
+static atomic_t tc2_pm_use_count[TC2_MAX_CPUS][TC2_MAX_CLUSTERS];
+
+static int tc2_pm_psci_power_up(unsigned int cpu, unsigned int cluster)
+{
+ unsigned int mpidr = (cluster << 8) | cpu;
+ int ret = 0;
+
+ BUG_ON(!psci_ops.cpu_on);
+
+ switch (atomic_inc_return(&tc2_pm_use_count[cpu][cluster])) {
+ case 1:
+ /*
+ * This is a request to power up a cpu that linux thinks has
+ * been powered down. Retries are needed if the firmware has
+ * seen the power down request as yet.
+ */
+ do
+ ret = psci_ops.cpu_on(mpidr,
+ virt_to_phys(mcpm_entry_point));
+ while (ret == -EAGAIN);
+
+ return ret;
+ case 2:
+ /* This power up request has overtaken a power down request */
+ return ret;
+ default:
+ /* Any other value is a bug */
+ BUG();
+ }
+}
+
+static void tc2_pm_psci_power_down(void)
+{
+ struct psci_power_state power_state;
+ unsigned int mpidr, cpu, cluster;
+
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+ BUG_ON(!psci_ops.cpu_off);
+
+ switch (atomic_dec_return(&tc2_pm_use_count[cpu][cluster])) {
+ case 1:
+ /*
+ * Overtaken by a power up. Flush caches, exit coherency,
+ * return & fake a reset
+ */
+ set_cr(get_cr() & ~CR_C);
+
+ flush_cache_louis();
+
+ asm volatile ("clrex");
+ set_auxcr(get_auxcr() & ~(1 << 6));
+
+ return;
+ case 0:
+ /* A normal request to possibly power down the cluster */
+ power_state.id = PSCI_POWER_STATE_ID;
+ power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN;
+ power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1;
+
+ psci_ops.cpu_off(power_state);
+
+ /* On success this function never returns */
+ default:
+ /* Any other value is a bug */
+ BUG();
+ }
+}
+
+static void tc2_pm_psci_suspend(u64 unused)
+{
+ struct psci_power_state power_state;
+
+ BUG_ON(!psci_ops.cpu_suspend);
+
+ /* On TC2 always attempt to power down the cluster */
+ power_state.id = PSCI_POWER_STATE_ID;
+ power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN;
+ power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1;
+
+ psci_ops.cpu_suspend(power_state, virt_to_phys(mcpm_entry_point));
+
+ /* On success this function never returns */
+ BUG();
+}
+
+static const struct mcpm_platform_ops tc2_pm_power_ops = {
+ .power_up = tc2_pm_psci_power_up,
+ .power_down = tc2_pm_psci_power_down,
+ .suspend = tc2_pm_psci_suspend,
+};
+
+static void __init tc2_pm_usage_count_init(void)
+{
+ unsigned int mpidr, cpu, cluster;
+
+ mpidr = read_cpuid_mpidr();
+ cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+ cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+ pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+ BUG_ON(cluster >= TC2_MAX_CLUSTERS ||
+ cpu >= vexpress_spc_get_nb_cpus(cluster));
+
+ atomic_set(&tc2_pm_use_count[cpu][cluster], 1);
+}
+
+static int __init tc2_pm_psci_init(void)
+{
+ int ret;
+
+ ret = psci_probe();
+ if (ret) {
+ pr_debug("psci not found. Aborting psci init\n");
+ return -ENODEV;
+ }
+
+ if (!vexpress_spc_check_loaded()) {
+ pr_debug("spc not found. Aborting psci init\n");
+ return -ENODEV;
+ }
+
+ tc2_pm_usage_count_init();
+
+ ret = mcpm_platform_register(&tc2_pm_power_ops);
+ if (!ret)
+ ret = mcpm_sync_init(NULL);
+ if (!ret)
+ pr_info("TC2 power management initialized\n");
+ return ret;
+}
+
+early_initcall(tc2_pm_psci_init);
diff --git a/arch/arm/mach-vexpress/tc2_pm_setup.S b/arch/arm/mach-vexpress/tc2_pm_setup.S
new file mode 100644
index 0000000..a18dafe
--- /dev/null
+++ b/arch/arm/mach-vexpress/tc2_pm_setup.S
@@ -0,0 +1,68 @@
+/*
+ * arch/arm/include/asm/tc2_pm_setup.S
+ *
+ * Created by: Nicolas Pitre, October 2012
+ ( (based on dcscb_setup.S by Dave Martin)
+ * Copyright: (C) 2012 Linaro Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+
+#include <linux/linkage.h>
+#include <asm/mcpm.h>
+
+
+#define SPC_PHYS_BASE 0x7FFF0000
+#define SPC_WAKE_INT_STAT 0xb2c
+
+#define SNOOP_CTL_A15 0x404
+#define SNOOP_CTL_A7 0x504
+
+#define A15_SNOOP_MASK (0x3 << 7)
+#define A7_SNOOP_MASK (0x1 << 13)
+
+#define A15_BX_ADDR0 0xB68
+
+
+ENTRY(tc2_resume)
+ mrc p15, 0, r0, c0, c0, 5
+ ubfx r1, r0, #0, #4 @ r1 = cpu
+ ubfx r2, r0, #8, #4 @ r2 = cluster
+ add r1, r1, r2, lsl #2 @ r1 = index of CPU in WAKE_INT_STAT
+ ldr r3, =SPC_PHYS_BASE + SPC_WAKE_INT_STAT
+ ldr r3, [r3]
+ lsr r3, r1
+ tst r3, #1
+ wfieq @ if no pending IRQ reenters wfi
+ b mcpm_entry_point
+ENDPROC(tc2_resume)
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ * The ACTLR SMP bit does not need to be set here, because cpu_resume()
+ * already restores that.
+ */
+
+ENTRY(tc2_pm_power_up_setup)
+
+ cmp r0, #0
+ beq 2f
+
+ b cci_enable_port_for_self
+
+2: @ Clear the BX addr register
+ ldr r3, =SPC_PHYS_BASE + A15_BX_ADDR0
+ mrc p15, 0, r0, c0, c0, 5 @ MPIDR
+ ubfx r1, r0, #8, #4 @ cluster
+ ubfx r0, r0, #0, #4 @ cpu
+ add r3, r3, r1, lsl #4
+ mov r1, #0
+ str r1, [r3, r0, lsl #2]
+ dsb
+
+ bx lr
+
+ENDPROC(tc2_pm_power_up_setup)
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 8802030..057f99b 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -10,6 +10,7 @@
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/irqchip.h>
+#include <linux/memblock.h>
#include <linux/of_address.h>
#include <linux/of_fdt.h>
#include <linux/of_irq.h>
@@ -373,6 +374,31 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express")
.init_machine = v2m_init,
MACHINE_END
+static void __init v2m_dt_hdlcd_init(void)
+{
+ struct device_node *node;
+ int len, na, ns;
+ const __be32 *prop;
+ phys_addr_t fb_base, fb_size;
+
+ node = of_find_compatible_node(NULL, NULL, "arm,hdlcd");
+ if (!node)
+ return;
+
+ na = of_n_addr_cells(node);
+ ns = of_n_size_cells(node);
+
+ prop = of_get_property(node, "framebuffer", &len);
+ if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop)))
+ return;
+
+ fb_base = of_read_number(prop, na);
+ fb_size = of_read_number(prop + na, ns);
+
+ if (WARN_ON(memblock_remove(fb_base, fb_size)))
+ return;
+};
+
static struct map_desc v2m_rs1_io_desc __initdata = {
.virtual = V2M_PERIPH,
.pfn = __phys_to_pfn(0x1c000000),
@@ -423,6 +449,8 @@ void __init v2m_dt_init_early(void)
pr_warning("vexpress: DT HBI (%x) is not matching "
"hardware (%x)!\n", dt_hbi, hbi);
}
+
+ v2m_dt_hdlcd_init();
}
static void __init v2m_dt_timer_init(void)
@@ -456,6 +484,7 @@ static const char * const v2m_dt_match[] __initconst = {
DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
.dt_compat = v2m_dt_match,
.smp = smp_ops(vexpress_smp_ops),
+ .smp_init = smp_init_ops(vexpress_smp_init_ops),
.map_io = v2m_dt_map_io,
.init_early = v2m_dt_init_early,
.init_irq = irqchip_init,
diff --git a/arch/arm/mach-virt/Makefile b/arch/arm/mach-virt/Makefile
index 042afc1..7ddbfa6 100644
--- a/arch/arm/mach-virt/Makefile
+++ b/arch/arm/mach-virt/Makefile
@@ -3,4 +3,3 @@
#
obj-y := virt.o
-obj-$(CONFIG_SMP) += platsmp.o
diff --git a/arch/arm/mach-virt/platsmp.c b/arch/arm/mach-virt/platsmp.c
deleted file mode 100644
index f4143f5..0000000
--- a/arch/arm/mach-virt/platsmp.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Dummy Virtual Machine - does what it says on the tin.
- *
- * Copyright (C) 2012 ARM Ltd
- * Author: Will Deacon <will.deacon@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/of.h>
-
-#include <asm/psci.h>
-#include <asm/smp_plat.h>
-
-extern void secondary_startup(void);
-
-static void __init virt_smp_init_cpus(void)
-{
-}
-
-static void __init virt_smp_prepare_cpus(unsigned int max_cpus)
-{
-}
-
-static int __cpuinit virt_boot_secondary(unsigned int cpu,
- struct task_struct *idle)
-{
- if (psci_ops.cpu_on)
- return psci_ops.cpu_on(cpu_logical_map(cpu),
- __pa(secondary_startup));
- return -ENODEV;
-}
-
-struct smp_operations __initdata virt_smp_ops = {
- .smp_init_cpus = virt_smp_init_cpus,
- .smp_prepare_cpus = virt_smp_prepare_cpus,
- .smp_boot_secondary = virt_boot_secondary,
-};
diff --git a/arch/arm/mach-virt/virt.c b/arch/arm/mach-virt/virt.c
index 061f283..a67d2dd 100644
--- a/arch/arm/mach-virt/virt.c
+++ b/arch/arm/mach-virt/virt.c
@@ -36,11 +36,8 @@ static const char *virt_dt_match[] = {
NULL
};
-extern struct smp_operations virt_smp_ops;
-
DT_MACHINE_START(VIRT, "Dummy Virtual Machine")
.init_irq = irqchip_init,
.init_machine = virt_init,
- .smp = smp_ops(virt_smp_ops),
.dt_compat = virt_dt_match,
MACHINE_END
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 515b000..a84e053 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -146,18 +146,18 @@ flush_levels:
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 @ extract max number of the index size
loop1:
- mov r9, r4 @ create working copy of max way size
+ mov r9, r7 @ create working copy of max index
loop2:
- ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11
- THUMB( lsl r6, r9, r5 )
+ ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11
+ THUMB( lsl r6, r4, r5 )
THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11
- ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11
- THUMB( lsl r6, r7, r2 )
+ ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11
+ THUMB( lsl r6, r9, r2 )
THUMB( orr r11, r11, r6 ) @ factor index number into r11
mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way
- subs r9, r9, #1 @ decrement the way
+ subs r9, r9, #1 @ decrement the index
bge loop2
- subs r7, r7, #1 @ decrement the index
+ subs r4, r4, #1 @ decrement the way
bge loop1
skip:
add r10, r10, #2 @ increment cache number
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 5dbf13f..e207aa5 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -446,8 +446,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
if (pud_none(*pud_k))
goto bad_area;
- if (!pud_present(*pud))
+ if (!pud_present(*pud)) {
set_pud(pud, *pud_k);
+ /*
+ * There is a small window during free_pgtables() where the
+ * user *pud entry is 0 but the TLB has not been invalidated
+ * and we get a level 2 (pmd) translation fault caused by the
+ * intermediate TLB caching of the old level 1 (pud) entry.
+ */
+ flush_tlb_kernel_page(addr);
+ }
pmd = pmd_offset(pud, addr);
pmd_k = pmd_offset(pud_k, addr);
@@ -470,8 +478,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr,
#endif
if (pmd_none(pmd_k[index]))
goto bad_area;
+ if (!pmd_present(pmd[index]))
+ copy_pmd(pmd, pmd_k);
- copy_pmd(pmd, pmd_k);
return 0;
bad_area:
diff --git a/arch/arm/plat-versatile/headsmp.S b/arch/arm/plat-versatile/headsmp.S
index b178d44..2677bc3 100644
--- a/arch/arm/plat-versatile/headsmp.S
+++ b/arch/arm/plat-versatile/headsmp.S
@@ -11,8 +11,6 @@
#include <linux/linkage.h>
#include <linux/init.h>
- __INIT
-
/*
* Realview/Versatile Express specific entry point for secondary CPUs.
* This provides a "holding pen" into which all secondary cores are held
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 56b3f6d..a95dc90 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -144,12 +144,139 @@ config SMP
If you don't know what to do here, say N.
+config ARM_CPU_TOPOLOGY
+ bool "Support CPU topology definition"
+ depends on SMP
+ default y
+ help
+ Support CPU topology definition, based on configuration
+ provided by the firmware.
+
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on ARM_CPU_TOPOLOGY
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places. If unsure say N here.
+
+config SCHED_SMT
+ bool "SMT scheduler support"
+ depends on ARM_CPU_TOPOLOGY
+ help
+ Improves the CPU scheduler's decision making when dealing with
+ MultiThreading at a cost of slightly increased overhead in some
+ places. If unsure say N here.
+
+config DISABLE_CPU_SCHED_DOMAIN_BALANCE
+ bool "(EXPERIMENTAL) Disable CPU level scheduler load-balancing"
+ help
+ Disables scheduler load-balancing at CPU sched domain level.
+
+config SCHED_HMP
+ bool "(EXPERIMENTAL) Heterogenous multiprocessor scheduling"
+ depends on DISABLE_CPU_SCHED_DOMAIN_BALANCE && SCHED_MC && FAIR_GROUP_SCHED && !SCHED_AUTOGROUP
+ help
+ Experimental scheduler optimizations for heterogeneous platforms.
+ Attempts to introspectively select task affinity to optimize power
+ and performance. Basic support for multiple (>2) cpu types is in place,
+ but it has only been tested with two types of cpus.
+ There is currently no support for migration of task groups, hence
+ !SCHED_AUTOGROUP. Furthermore, normal load-balancing must be disabled
+ between cpus of different type (DISABLE_CPU_SCHED_DOMAIN_BALANCE).
+
+config SCHED_HMP_PRIO_FILTER
+ bool "(EXPERIMENTAL) Filter HMP migrations by task priority"
+ depends on SCHED_HMP
+ help
+ Enables task priority based HMP migration filter. Any task with
+ a NICE value above the threshold will always be on low-power cpus
+ with less compute capacity.
+
+config SCHED_HMP_PRIO_FILTER_VAL
+ int "NICE priority threshold"
+ default 5
+ depends on SCHED_HMP_PRIO_FILTER
+
+config HMP_FAST_CPU_MASK
+ string "HMP scheduler fast CPU mask"
+ depends on SCHED_HMP
+ help
+ Leave empty to use device tree information.
+ Specify the cpuids of the fast CPUs in the system as a list string,
+ e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_SLOW_CPU_MASK
+ string "HMP scheduler slow CPU mask"
+ depends on SCHED_HMP
+ help
+ Leave empty to use device tree information.
+ Specify the cpuids of the slow CPUs in the system as a list string,
+ e.g. cpuid 0+1 should be specified as 0-1.
+
+config HMP_VARIABLE_SCALE
+ bool "Allows changing the load tracking scale through sysfs"
+ depends on SCHED_HMP
+ help
+ When turned on, this option exports the thresholds and load average
+ period value for the load tracking patches through sysfs.
+ The values can be modified to change the rate of load accumulation
+ and the thresholds used for HMP migration.
+ The load_avg_period_ms is the time in ms to reach a load average of
+ 0.5 for an idle task of 0 load average ratio that start a busy loop.
+ The up_threshold and down_threshold is the value to go to a faster
+ CPU or to go back to a slower cpu.
+ The {up,down}_threshold are devided by 1024 before being compared
+ to the load average.
+ For examples, with load_avg_period_ms = 128 and up_threshold = 512,
+ a running task with a load of 0 will be migrated to a bigger CPU after
+ 128ms, because after 128ms its load_avg_ratio is 0.5 and the real
+ up_threshold is 0.5.
+ This patch has the same behavior as changing the Y of the load
+ average computation to
+ (1002/1024)^(LOAD_AVG_PERIOD/load_avg_period_ms)
+ but it remove intermadiate overflows in computation.
+
+config HMP_FREQUENCY_INVARIANT_SCALE
+ bool "(EXPERIMENTAL) Frequency-Invariant Tracked Load for HMP"
+ depends on HMP_VARIABLE_SCALE && CPU_FREQ
+ help
+ Scales the current load contribution in line with the frequency
+ of the CPU that the task was executed on.
+ In this version, we use a simple linear scale derived from the
+ maximum frequency reported by CPUFreq.
+ Restricting tracked load to be scaled by the CPU's frequency
+ represents the consumption of possible compute capacity
+ (rather than consumption of actual instantaneous capacity as
+ normal) and allows the HMP migration's simple threshold
+ migration strategy to interact more predictably with CPUFreq's
+ asynchronous compute capacity changes.
+
+config SCHED_HMP_LITTLE_PACKING
+ bool "Small task packing for HMP"
+ depends on SCHED_HMP
+ default n
+ help
+ Allows the HMP Scheduler to pack small tasks into CPUs in the
+ smallest HMP domain.
+ Controlled by two sysfs files in sys/kernel/hmp.
+ packing_enable: 1 to enable, 0 to disable packing. Default 1.
+ packing_limit: runqueue load ratio where a RQ is considered
+ to be full. Default is NICE_0_LOAD * 9/8.
+
config NR_CPUS
int "Maximum number of CPUs (2-32)"
range 2 32
depends on SMP
default "4"
+config HOTPLUG_CPU
+ bool "Support for hot-pluggable CPUs"
+ depends on SMP
+ help
+ Say Y here to experiment with turning CPUs off and on. CPUs
+ can be controlled through /sys/devices/system/cpu.
+
source kernel/Kconfig.preempt
config HZ
@@ -229,6 +356,14 @@ config SYSVIPC_COMPAT
endmenu
+menu "Power management options"
+
+source "kernel/power/Kconfig"
+
+source "drivers/cpufreq/Kconfig"
+
+endmenu
+
source "net/Kconfig"
source "drivers/Kconfig"
diff --git a/arch/arm64/boot/dts/Makefile b/arch/arm64/boot/dts/Makefile
index 68457e9..24be429 100644
--- a/arch/arm64/boot/dts/Makefile
+++ b/arch/arm64/boot/dts/Makefile
@@ -1,4 +1,5 @@
-dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb
+dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb foundation-v8.dtb \
+ fvp-base-gicv2-psci.dtb
targets += dtbs
targets += $(dtb-y)
diff --git a/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
new file mode 100644
index 0000000..79ddd46
--- /dev/null
+++ b/arch/arm64/boot/dts/fvp-base-gicv2-psci.dts
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2013, ARM Limited. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/memreserve/ 0x80000000 0x00010000;
+
+/ {
+};
+
+/ {
+ model = "FVP Base";
+ compatible = "arm,vfp-base", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ serial1 = &v2m_serial1;
+ serial2 = &v2m_serial2;
+ serial3 = &v2m_serial3;
+ };
+
+ psci {
+ compatible = "arm,psci";
+ method = "smc";
+ cpu_suspend = <0xc4000001>;
+ cpu_off = <0x84000002>;
+ cpu_on = <0xc4000003>;
+ };
+
+ cpus {
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ big0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ big1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ big2: cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x2>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ big3: cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57", "arm,armv8";
+ reg = <0x0 0x3>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little0: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little1: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x101>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little2: cpu@102 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x102>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+ little3: cpu@103 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53", "arm,armv8";
+ reg = <0x0 0x103>;
+ enable-method = "psci";
+ clock-frequency = <1000000>;
+ };
+
+ cpu-map {
+ cluster0 {
+ core0 {
+ cpu = <&big0>;
+ };
+ core1 {
+ cpu = <&big1>;
+ };
+ core2 {
+ cpu = <&big2>;
+ };
+ core3 {
+ cpu = <&big3>;
+ };
+ };
+ cluster1 {
+ core0 {
+ cpu = <&little0>;
+ };
+ core1 {
+ cpu = <&little1>;
+ };
+ core2 {
+ cpu = <&little2>;
+ };
+ core3 {
+ cpu = <&little3>;
+ };
+ };
+ };
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x00000000 0x80000000 0 0x80000000>,
+ <0x00000008 0x80000000 0 0x80000000>;
+ };
+
+ gic: interrupt-controller@2f000000 {
+ compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0x0 0x2f000000 0 0x10000>,
+ <0x0 0x2c000000 0 0x2000>,
+ <0x0 0x2c010000 0 0x2000>,
+ <0x0 0x2c02F000 0 0x2000>;
+ interrupts = <1 9 0xf04>;
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts = <1 13 0xff01>,
+ <1 14 0xff01>,
+ <1 11 0xff01>,
+ <1 10 0xff01>;
+ clock-frequency = <100000000>;
+ };
+
+ timer@2a810000 {
+ compatible = "arm,armv7-timer-mem";
+ reg = <0x0 0x2a810000 0x0 0x10000>;
+ clock-frequency = <100000000>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+ frame@2a820000 {
+ frame-number = <0>;
+ interrupts = <0 25 4>;
+ reg = <0x0 0x2a820000 0x0 0x10000>;
+ };
+ };
+
+ pmu {
+ compatible = "arm,armv8-pmuv3";
+ interrupts = <0 60 4>,
+ <0 61 4>,
+ <0 62 4>,
+ <0 63 4>;
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0 0x08000000 0x04000000>,
+ <1 0 0 0x14000000 0x04000000>,
+ <2 0 0 0x18000000 0x04000000>,
+ <3 0 0 0x1c000000 0x04000000>,
+ <4 0 0 0x0c000000 0x04000000>,
+ <5 0 0 0x10000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <0 0 0 &gic 0 0 4>,
+ <0 0 1 &gic 0 1 4>,
+ <0 0 2 &gic 0 2 4>,
+ <0 0 3 &gic 0 3 4>,
+ <0 0 4 &gic 0 4 4>,
+ <0 0 5 &gic 0 5 4>,
+ <0 0 6 &gic 0 6 4>,
+ <0 0 7 &gic 0 7 4>,
+ <0 0 8 &gic 0 8 4>,
+ <0 0 9 &gic 0 9 4>,
+ <0 0 10 &gic 0 10 4>,
+ <0 0 11 &gic 0 11 4>,
+ <0 0 12 &gic 0 12 4>,
+ <0 0 13 &gic 0 13 4>,
+ <0 0 14 &gic 0 14 4>,
+ <0 0 15 &gic 0 15 4>,
+ <0 0 16 &gic 0 16 4>,
+ <0 0 17 &gic 0 17 4>,
+ <0 0 18 &gic 0 18 4>,
+ <0 0 19 &gic 0 19 4>,
+ <0 0 20 &gic 0 20 4>,
+ <0 0 21 &gic 0 21 4>,
+ <0 0 22 &gic 0 22 4>,
+ <0 0 23 &gic 0 23 4>,
+ <0 0 24 &gic 0 24 4>,
+ <0 0 25 &gic 0 25 4>,
+ <0 0 26 &gic 0 26 4>,
+ <0 0 27 &gic 0 27 4>,
+ <0 0 28 &gic 0 28 4>,
+ <0 0 29 &gic 0 29 4>,
+ <0 0 30 &gic 0 30 4>,
+ <0 0 31 &gic 0 31 4>,
+ <0 0 32 &gic 0 32 4>,
+ <0 0 33 &gic 0 33 4>,
+ <0 0 34 &gic 0 34 4>,
+ <0 0 35 &gic 0 35 4>,
+ <0 0 36 &gic 0 36 4>,
+ <0 0 37 &gic 0 37 4>,
+ <0 0 38 &gic 0 38 4>,
+ <0 0 39 &gic 0 39 4>,
+ <0 0 40 &gic 0 40 4>,
+ <0 0 41 &gic 0 41 4>,
+ <0 0 42 &gic 0 42 4>;
+
+ /include/ "rtsm_ve-motherboard.dtsi"
+ };
+
+ panels {
+ panel@0 {
+ compatible = "panel";
+ mode = "XVGA";
+ refresh = <60>;
+ xres = <1024>;
+ yres = <768>;
+ pixclock = <15748>;
+ left_margin = <152>;
+ right_margin = <48>;
+ upper_margin = <23>;
+ lower_margin = <3>;
+ hsync_len = <104>;
+ vsync_len = <4>;
+ sync = <0>;
+ vmode = "FB_VMODE_NONINTERLACED";
+ tim2 = "TIM2_BCD", "TIM2_IPC";
+ cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)";
+ caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888";
+ bpp = <16>;
+ };
+ };
+};
diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
index b45e5f3..b683d47 100644
--- a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
+++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
@@ -182,6 +182,15 @@
interrupts = <14>;
clocks = <&v2m_oscclk1>, <&v2m_clk24mhz>;
clock-names = "clcdclk", "apb_pclk";
+ mode = "XVGA";
+ use_dma = <0>;
+ framebuffer = <0x18000000 0x00180000>;
+ };
+
+ virtio_block@0130000 {
+ compatible = "virtio,mmio";
+ reg = <0x130000 0x200>;
+ interrupts = <42>;
};
};
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 8a8ce0e..68d7c93 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -158,17 +158,23 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
return ret;
}
-#define cmpxchg(ptr,o,n) \
- ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \
- (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
-
-#define cmpxchg_local(ptr,o,n) \
- ((__typeof__(*(ptr)))__cmpxchg((ptr), \
- (unsigned long)(o), \
- (unsigned long)(n), \
- sizeof(*(ptr))))
+#define cmpxchg(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
+ sizeof(*(ptr))); \
+ __ret; \
+})
+
+#define cmpxchg_local(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __cmpxchg((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ __ret; \
+})
#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n))
#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n))
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
new file mode 100644
index 0000000..c4cdb5e
--- /dev/null
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CPU_OPS_H
+#define __ASM_CPU_OPS_H
+
+#include <linux/init.h>
+#include <linux/threads.h>
+
+struct device_node;
+
+/**
+ * struct cpu_operations - Callback operations for hotplugging CPUs.
+ *
+ * @name: Name of the property as appears in a devicetree cpu node's
+ * enable-method property.
+ * @cpu_init: Reads any data necessary for a specific enable-method from the
+ * devicetree, for a given cpu node and proposed logical id.
+ * @cpu_prepare: Early one-time preparation step for a cpu. If there is a
+ * mechanism for doing so, tests whether it is possible to boot
+ * the given CPU.
+ * @cpu_boot: Boots a cpu into the kernel.
+ * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary
+ * synchronisation. Called from the cpu being booted.
+ * @cpu_disable: Prepares a cpu to die. May fail for some mechanism-specific
+ * reason, which will cause the hot unplug to be aborted. Called
+ * from the cpu to be killed.
+ * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the
+ * cpu being killed.
+ */
+struct cpu_operations {
+ const char *name;
+ int (*cpu_init)(struct device_node *, unsigned int);
+ int (*cpu_prepare)(unsigned int);
+ int (*cpu_boot)(unsigned int);
+ void (*cpu_postboot)(void);
+#ifdef CONFIG_HOTPLUG_CPU
+ int (*cpu_disable)(unsigned int cpu);
+ void (*cpu_die)(unsigned int cpu);
+#endif
+};
+
+extern const struct cpu_operations *cpu_ops[NR_CPUS];
+extern int __init cpu_read_ops(struct device_node *dn, int cpu);
+extern void __init cpu_read_bootcpu_ops(void);
+
+#endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 0332fc0..e1f7ecd 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -4,6 +4,7 @@
#include <asm-generic/irq.h>
extern void (*handle_arch_irq)(struct pt_regs *);
+extern void migrate_irqs(void);
extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
#endif
diff --git a/arch/arm64/include/asm/pgtable-3level-types.h b/arch/arm64/include/asm/pgtable-3level-types.h
index 4489615..4e94424 100644
--- a/arch/arm64/include/asm/pgtable-3level-types.h
+++ b/arch/arm64/include/asm/pgtable-3level-types.h
@@ -16,6 +16,8 @@
#ifndef __ASM_PGTABLE_3LEVEL_TYPES_H
#define __ASM_PGTABLE_3LEVEL_TYPES_H
+#include <asm/types.h>
+
typedef u64 pteval_t;
typedef u64 pmdval_t;
typedef u64 pgdval_t;
diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h
index 0604237..e5312ea 100644
--- a/arch/arm64/include/asm/psci.h
+++ b/arch/arm64/include/asm/psci.h
@@ -14,25 +14,6 @@
#ifndef __ASM_PSCI_H
#define __ASM_PSCI_H
-#define PSCI_POWER_STATE_TYPE_STANDBY 0
-#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1
-
-struct psci_power_state {
- u16 id;
- u8 type;
- u8 affinity_level;
-};
-
-struct psci_operations {
- int (*cpu_suspend)(struct psci_power_state state,
- unsigned long entry_point);
- int (*cpu_off)(struct psci_power_state state);
- int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
- int (*migrate)(unsigned long cpuid);
-};
-
-extern struct psci_operations psci_ops;
-
int psci_init(void);
#endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 4b8023c..a498f2c 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -60,21 +60,14 @@ struct secondary_data {
void *stack;
};
extern struct secondary_data secondary_data;
-extern void secondary_holding_pen(void);
-extern volatile unsigned long secondary_holding_pen_release;
+extern void secondary_entry(void);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
-struct device_node;
+extern int __cpu_disable(void);
-struct smp_enable_ops {
- const char *name;
- int (*init_cpu)(struct device_node *, int);
- int (*prepare_cpu)(int);
-};
-
-extern const struct smp_enable_ops smp_spin_table_ops;
-extern const struct smp_enable_ops smp_psci_ops;
+extern void __cpu_die(unsigned int cpu);
+extern void cpu_die(void);
#endif /* ifndef __ASM_SMP_H */
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
new file mode 100644
index 0000000..983fa7c
--- /dev/null
+++ b/arch/arm64/include/asm/topology.h
@@ -0,0 +1,73 @@
+#ifndef _ASM_ARM_TOPOLOGY_H
+#define _ASM_ARM_TOPOLOGY_H
+
+#ifdef CONFIG_ARM_CPU_TOPOLOGY
+
+#include <linux/cpumask.h>
+
+struct cputopo_arm {
+ int thread_id;
+ int core_id;
+ int socket_id;
+ cpumask_t thread_sibling;
+ cpumask_t core_sibling;
+};
+
+extern struct cputopo_arm cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
+#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling)
+#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling)
+
+#define mc_capable() (cpu_topology[0].socket_id != -1)
+#define smt_capable() (cpu_topology[0].thread_id != -1)
+
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask);
+
+#ifdef CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE
+/* Common values for CPUs */
+#ifndef SD_CPU_INIT
+#define SD_CPU_INIT (struct sched_domain) { \
+ .min_interval = 1, \
+ .max_interval = 4, \
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+ .newidle_idx = 0, \
+ .wake_idx = 0, \
+ .forkexec_idx = 0, \
+ \
+ .flags = 0*SD_LOAD_BALANCE \
+ | 1*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_EXEC \
+ | 1*SD_BALANCE_FORK \
+ | 0*SD_BALANCE_WAKE \
+ | 1*SD_WAKE_AFFINE \
+ | 0*SD_SHARE_CPUPOWER \
+ | 0*SD_SHARE_PKG_RESOURCES \
+ | 0*SD_SERIALIZE \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+}
+#endif
+#endif /* CONFIG_DISABLE_CPU_SCHED_DOMAIN_BALANCE */
+
+#else
+
+static inline void init_cpu_topology(void) { }
+static inline void store_cpu_topology(unsigned int cpuid) { }
+static inline int cluster_to_logical_mask(unsigned int socket_id,
+ cpumask_t *cluster_mask) { return -EINVAL; }
+
+#endif
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7b4b564..2d145e3 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -9,15 +9,16 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
sys.o stacktrace.o time.o traps.o io.o vdso.o \
- hyp-stub.o psci.o
+ hyp-stub.o psci.o cpu_ops.o
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o
arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
-arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o
+arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o
arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+arm64-obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
new file mode 100644
index 0000000..04efea8
--- /dev/null
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -0,0 +1,99 @@
+/*
+ * CPU kernel entry/exit control
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/cpu_ops.h>
+#include <asm/smp_plat.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/string.h>
+
+extern const struct cpu_operations smp_spin_table_ops;
+extern const struct cpu_operations cpu_psci_ops;
+
+const struct cpu_operations *cpu_ops[NR_CPUS];
+
+static const struct cpu_operations *supported_cpu_ops[] __initconst = {
+#ifdef CONFIG_SMP
+ &smp_spin_table_ops,
+ &cpu_psci_ops,
+#endif
+ NULL,
+};
+
+static const struct cpu_operations * __init cpu_get_ops(const char *name)
+{
+ const struct cpu_operations **ops = supported_cpu_ops;
+
+ while (*ops) {
+ if (!strcmp(name, (*ops)->name))
+ return *ops;
+
+ ops++;
+ }
+
+ return NULL;
+}
+
+/*
+ * Read a cpu's enable method from the device tree and record it in cpu_ops.
+ */
+int __init cpu_read_ops(struct device_node *dn, int cpu)
+{
+ const char *enable_method = of_get_property(dn, "enable-method", NULL);
+ if (!enable_method) {
+ /*
+ * The boot CPU may not have an enable method (e.g. when
+ * spin-table is used for secondaries). Don't warn spuriously.
+ */
+ if (cpu != 0)
+ pr_err("%s: missing enable-method property\n",
+ dn->full_name);
+ return -ENOENT;
+ }
+
+ cpu_ops[cpu] = cpu_get_ops(enable_method);
+ if (!cpu_ops[cpu]) {
+ pr_warn("%s: unsupported enable-method property: %s\n",
+ dn->full_name, enable_method);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+void __init cpu_read_bootcpu_ops(void)
+{
+ struct device_node *dn = NULL;
+ u64 mpidr = cpu_logical_map(0);
+
+ while ((dn = of_find_node_by_type(dn, "cpu"))) {
+ u64 hwid;
+ const __be32 *prop;
+
+ prop = of_get_property(dn, "reg", NULL);
+ if (!prop)
+ continue;
+
+ hwid = of_read_number(prop, of_n_addr_cells(dn));
+ if (hwid == mpidr) {
+ cpu_read_ops(dn, 0);
+ of_node_put(dn);
+ return;
+ }
+ }
+}
diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c
index 63cfc4a..fd3993c 100644
--- a/arch/arm64/kernel/cputable.c
+++ b/arch/arm64/kernel/cputable.c
@@ -22,7 +22,7 @@
extern unsigned long __cpu_setup(void);
-struct cpu_info __initdata cpu_table[] = {
+struct cpu_info cpu_table[] = {
{
.cpu_id_val = 0x000f0000,
.cpu_id_mask = 0x000f0000,
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 53dcae4..3532ca6 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -217,7 +217,6 @@ ENTRY(__boot_cpu_mode)
.quad PAGE_OFFSET
#ifdef CONFIG_SMP
- .pushsection .smp.pen.text, "ax"
.align 3
1: .quad .
.quad secondary_holding_pen_release
@@ -242,7 +241,16 @@ pen: ldr x4, [x3]
wfe
b pen
ENDPROC(secondary_holding_pen)
- .popsection
+
+ /*
+ * Secondary entry point that jumps straight into the kernel. Only to
+ * be used where CPUs are brought online dynamically by the kernel.
+ */
+ENTRY(secondary_entry)
+ bl __calc_phys_offset // x2=phys offset
+ bl el2_setup // Drop to EL1
+ b secondary_startup
+ENDPROC(secondary_entry)
ENTRY(secondary_startup)
/*
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index ecb3354..473e5db 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -81,3 +81,64 @@ void __init init_IRQ(void)
if (!handle_arch_irq)
panic("No interrupt controller found.");
}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static bool migrate_one_irq(struct irq_desc *desc)
+{
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+ const struct cpumask *affinity = d->affinity;
+ struct irq_chip *c;
+ bool ret = false;
+
+ /*
+ * If this is a per-CPU interrupt, or the affinity does not
+ * include this CPU, then we have nothing to do.
+ */
+ if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
+ return false;
+
+ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+ affinity = cpu_online_mask;
+ ret = true;
+ }
+
+ c = irq_data_get_irq_chip(d);
+ if (!c->irq_set_affinity)
+ pr_debug("IRQ%u: unable to set affinity\n", d->irq);
+ else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret)
+ cpumask_copy(d->affinity, affinity);
+
+ return ret;
+}
+
+/*
+ * The current CPU has been marked offline. Migrate IRQs off this CPU.
+ * If the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ *
+ * Note: we must iterate over all IRQs, whether they have an attached
+ * action structure or not, as we need to get chained interrupts too.
+ */
+void migrate_irqs(void)
+{
+ unsigned int i;
+ struct irq_desc *desc;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ for_each_irq_desc(i, desc) {
+ bool affinity_broken;
+
+ raw_spin_lock(&desc->lock);
+ affinity_broken = migrate_one_irq(desc);
+ raw_spin_unlock(&desc->lock);
+
+ if (affinity_broken)
+ pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
+ i, smp_processor_id());
+ }
+
+ local_irq_restore(flags);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 46f02c3..4caf198 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -102,6 +102,13 @@ void arch_cpu_idle(void)
local_irq_enable();
}
+#ifdef CONFIG_HOTPLUG_CPU
+void arch_cpu_idle_dead(void)
+{
+ cpu_die();
+}
+#endif
+
void machine_shutdown(void)
{
#ifdef CONFIG_SMP
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index 14f73c4..4f97db3 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -17,12 +17,32 @@
#include <linux/init.h>
#include <linux/of.h>
+#include <linux/smp.h>
#include <asm/compiler.h>
+#include <asm/cpu_ops.h>
#include <asm/errno.h>
#include <asm/psci.h>
+#include <asm/smp_plat.h>
-struct psci_operations psci_ops;
+#define PSCI_POWER_STATE_TYPE_STANDBY 0
+#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1
+
+struct psci_power_state {
+ u16 id;
+ u8 type;
+ u8 affinity_level;
+};
+
+struct psci_operations {
+ int (*cpu_suspend)(struct psci_power_state state,
+ unsigned long entry_point);
+ int (*cpu_off)(struct psci_power_state state);
+ int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
+ int (*migrate)(unsigned long cpuid);
+};
+
+static struct psci_operations psci_ops;
static int (*invoke_psci_fn)(u64, u64, u64, u64);
@@ -209,3 +229,68 @@ out_put_node:
of_node_put(np);
return err;
}
+
+#ifdef CONFIG_SMP
+
+static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu)
+{
+ return 0;
+}
+
+static int __init cpu_psci_cpu_prepare(unsigned int cpu)
+{
+ if (!psci_ops.cpu_on) {
+ pr_err("no cpu_on method, not booting CPU%d\n", cpu);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int cpu_psci_cpu_boot(unsigned int cpu)
+{
+ int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry));
+ if (err)
+ pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err);
+
+ return err;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int cpu_psci_cpu_disable(unsigned int cpu)
+{
+ /* Fail early if we don't have CPU_OFF support */
+ if (!psci_ops.cpu_off)
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static void cpu_psci_cpu_die(unsigned int cpu)
+{
+ int ret;
+ /*
+ * There are no known implementations of PSCI actually using the
+ * power state field, pass a sensible default for now.
+ */
+ struct psci_power_state state = {
+ .type = PSCI_POWER_STATE_TYPE_POWER_DOWN,
+ };
+
+ ret = psci_ops.cpu_off(state);
+
+ pr_crit("psci: unable to power off CPU%u (%d)\n", cpu, ret);
+}
+#endif
+
+const struct cpu_operations cpu_psci_ops = {
+ .name = "psci",
+ .cpu_init = cpu_psci_cpu_init,
+ .cpu_prepare = cpu_psci_cpu_prepare,
+ .cpu_boot = cpu_psci_cpu_boot,
+#ifdef CONFIG_HOTPLUG_CPU
+ .cpu_disable = cpu_psci_cpu_disable,
+ .cpu_die = cpu_psci_cpu_die,
+#endif
+};
+
+#endif
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index add6ea6..85afdae 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -45,6 +45,7 @@
#include <asm/cputype.h>
#include <asm/elf.h>
#include <asm/cputable.h>
+#include <asm/cpu_ops.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/smp_plat.h>
@@ -97,6 +98,11 @@ void __init early_print(const char *str, ...)
printk("%s", buf);
}
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+ return phys_id == cpu_logical_map(cpu);
+}
+
static void __init setup_processor(void)
{
struct cpu_info *cpu_info;
@@ -269,6 +275,7 @@ void __init setup_arch(char **cmdline_p)
psci_init();
cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
+ cpu_read_bootcpu_ops();
#ifdef CONFIG_SMP
smp_init_cpus();
#endif
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 9c93e12..35085d6 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -39,6 +39,7 @@
#include <asm/atomic.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
+#include <asm/cpu_ops.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -48,13 +49,15 @@
#include <asm/tlbflush.h>
#include <asm/ptrace.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/arm-ipi.h>
+
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
* where to place its SVC stack
*/
struct secondary_data secondary_data;
-volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
enum ipi_msg_type {
IPI_RESCHEDULE,
@@ -63,61 +66,16 @@ enum ipi_msg_type {
IPI_CPU_STOP,
};
-static DEFINE_RAW_SPINLOCK(boot_lock);
-
-/*
- * Write secondary_holding_pen_release in a way that is guaranteed to be
- * visible to all observers, irrespective of whether they're taking part
- * in coherency or not. This is necessary for the hotplug code to work
- * reliably.
- */
-static void __cpuinit write_pen_release(u64 val)
-{
- void *start = (void *)&secondary_holding_pen_release;
- unsigned long size = sizeof(secondary_holding_pen_release);
-
- secondary_holding_pen_release = val;
- __flush_dcache_area(start, size);
-}
-
/*
* Boot a secondary CPU, and assign it the specified idle task.
* This also gives us the initial stack to use for this CPU.
*/
static int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
{
- unsigned long timeout;
+ if (cpu_ops[cpu]->cpu_boot)
+ return cpu_ops[cpu]->cpu_boot(cpu);
- /*
- * Set synchronisation state between this boot processor
- * and the secondary one
- */
- raw_spin_lock(&boot_lock);
-
- /*
- * Update the pen release flag.
- */
- write_pen_release(cpu_logical_map(cpu));
-
- /*
- * Send an event, causing the secondaries to read pen_release.
- */
- sev();
-
- timeout = jiffies + (1 * HZ);
- while (time_before(jiffies, timeout)) {
- if (secondary_holding_pen_release == INVALID_HWID)
- break;
- udelay(10);
- }
-
- /*
- * Now the secondary core is starting up let it run its
- * calibrations, then wait for it to finish
- */
- raw_spin_unlock(&boot_lock);
-
- return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0;
+ return -EOPNOTSUPP;
}
static DECLARE_COMPLETION(cpu_running);
@@ -158,6 +116,11 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
return ret;
}
+static void __cpuinit smp_store_cpu_info(unsigned int cpuid)
+{
+ store_cpu_topology(cpuid);
+}
+
/*
* This is the secondary CPU boot entry. We're using this CPUs
* idle thread stack, but a set of temporary page tables.
@@ -187,17 +150,15 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
preempt_disable();
trace_hardirqs_off();
- /*
- * Let the primary processor know we're out of the
- * pen, then head off into the C entry point
- */
- write_pen_release(INVALID_HWID);
+ if (cpu_ops[cpu]->cpu_postboot)
+ cpu_ops[cpu]->cpu_postboot();
+
+ smp_store_cpu_info(cpu);
/*
- * Synchronise with the boot thread.
+ * Enable GIC and timers.
*/
- raw_spin_lock(&boot_lock);
- raw_spin_unlock(&boot_lock);
+ notify_cpu_starting(cpu);
/*
* OK, now it's safe to let the boot CPU continue. Wait for
@@ -207,11 +168,6 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
set_cpu_online(cpu, true);
complete(&cpu_running);
- /*
- * Enable GIC and timers.
- */
- notify_cpu_starting(cpu);
-
local_irq_enable();
local_fiq_enable();
@@ -221,43 +177,117 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
cpu_startup_entry(CPUHP_ONLINE);
}
-void __init smp_cpus_done(unsigned int max_cpus)
+#ifdef CONFIG_HOTPLUG_CPU
+static int op_cpu_disable(unsigned int cpu)
{
- unsigned long bogosum = loops_per_jiffy * num_online_cpus();
+ /*
+ * If we don't have a cpu_die method, abort before we reach the point
+ * of no return. CPU0 may not have an cpu_ops, so test for it.
+ */
+ if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die)
+ return -EOPNOTSUPP;
- pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
- num_online_cpus(), bogosum / (500000/HZ),
- (bogosum / (5000/HZ)) % 100);
+ /*
+ * We may need to abort a hot unplug for some other mechanism-specific
+ * reason.
+ */
+ if (cpu_ops[cpu]->cpu_disable)
+ return cpu_ops[cpu]->cpu_disable(cpu);
+
+ return 0;
}
-void __init smp_prepare_boot_cpu(void)
+/*
+ * __cpu_disable runs on the processor to be shutdown.
+ */
+int __cpu_disable(void)
{
-}
+ unsigned int cpu = smp_processor_id();
+ int ret;
-static void (*smp_cross_call)(const struct cpumask *, unsigned int);
+ ret = op_cpu_disable(cpu);
+ if (ret)
+ return ret;
-static const struct smp_enable_ops *enable_ops[] __initconst = {
- &smp_spin_table_ops,
- &smp_psci_ops,
- NULL,
-};
+ /*
+ * Take this CPU offline. Once we clear this, we can't return,
+ * and we must not schedule until we're ready to give up the cpu.
+ */
+ set_cpu_online(cpu, false);
-static const struct smp_enable_ops *smp_enable_ops[NR_CPUS];
+ /*
+ * OK - migrate IRQs away from this CPU
+ */
+ migrate_irqs();
-static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name)
-{
- const struct smp_enable_ops **ops = enable_ops;
+ /*
+ * Remove this CPU from the vm mask set of all processes.
+ */
+ clear_tasks_mm_cpumask(cpu);
+
+ return 0;
+}
- while (*ops) {
- if (!strcmp(name, (*ops)->name))
- return *ops;
+static DECLARE_COMPLETION(cpu_died);
- ops++;
+/*
+ * called on the thread which is asking for a CPU to be shutdown -
+ * waits until shutdown has completed, or it is timed out.
+ */
+void __cpu_die(unsigned int cpu)
+{
+ if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
+ pr_crit("CPU%u: cpu didn't die\n", cpu);
+ return;
}
+ pr_notice("CPU%u: shutdown\n", cpu);
+}
+
+/*
+ * Called from the idle thread for the CPU which has been shutdown.
+ *
+ * Note that we disable IRQs here, but do not re-enable them
+ * before returning to the caller. This is also the behaviour
+ * of the other hotplug-cpu capable cores, so presumably coming
+ * out of idle fixes this.
+ */
+void cpu_die(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ idle_task_exit();
+
+ local_irq_disable();
+
+ /* Tell __cpu_die() that this CPU is now safe to dispose of */
+ complete(&cpu_died);
+
+ /*
+ * Actually shutdown the CPU. This must never fail. The specific hotplug
+ * mechanism must perform all required cache maintenance to ensure that
+ * no dirty lines are lost in the process of shutting down the CPU.
+ */
+ cpu_ops[cpu]->cpu_die(cpu);
+
+ BUG();
+}
+#endif
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+ unsigned long bogosum = loops_per_jiffy * num_online_cpus();
- return NULL;
+ pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+ num_online_cpus(), bogosum / (500000/HZ),
+ (bogosum / (5000/HZ)) % 100);
}
+void __init smp_prepare_boot_cpu(void)
+{
+}
+
+static void (*smp_cross_call)(const struct cpumask *, unsigned int);
+
/*
* Enumerate the possible CPU set from the device tree and build the
* cpu logical map array containing MPIDR values related to logical
@@ -265,9 +295,8 @@ static const struct smp_enable_ops * __init smp_get_enable_ops(const char *name)
*/
void __init smp_init_cpus(void)
{
- const char *enable_method;
struct device_node *dn = NULL;
- int i, cpu = 1;
+ unsigned int i, cpu = 1;
bool bootcpu_valid = false;
while ((dn = of_find_node_by_type(dn, "cpu"))) {
@@ -336,25 +365,10 @@ void __init smp_init_cpus(void)
if (cpu >= NR_CPUS)
goto next;
- /*
- * We currently support only the "spin-table" enable-method.
- */
- enable_method = of_get_property(dn, "enable-method", NULL);
- if (!enable_method) {
- pr_err("%s: missing enable-method property\n",
- dn->full_name);
+ if (cpu_read_ops(dn, cpu) != 0)
goto next;
- }
-
- smp_enable_ops[cpu] = smp_get_enable_ops(enable_method);
- if (!smp_enable_ops[cpu]) {
- pr_err("%s: invalid enable-method property: %s\n",
- dn->full_name, enable_method);
- goto next;
- }
-
- if (smp_enable_ops[cpu]->init_cpu(dn, cpu))
+ if (cpu_ops[cpu]->cpu_init(dn, cpu))
goto next;
pr_debug("cpu logical map 0x%llx\n", hwid);
@@ -384,8 +398,13 @@ next:
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- int cpu, err;
- unsigned int ncores = num_possible_cpus();
+ int err;
+ unsigned int cpu, ncores = num_possible_cpus();
+
+ init_cpu_topology();
+
+ smp_store_cpu_info(smp_processor_id());
+
/*
* are we trying to boot more cores than exist?
@@ -412,10 +431,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
if (cpu == smp_processor_id())
continue;
- if (!smp_enable_ops[cpu])
+ if (!cpu_ops[cpu])
continue;
- err = smp_enable_ops[cpu]->prepare_cpu(cpu);
+ err = cpu_ops[cpu]->cpu_prepare(cpu);
if (err)
continue;
diff --git a/arch/arm64/kernel/smp_psci.c b/arch/arm64/kernel/smp_psci.c
deleted file mode 100644
index 0c53330..0000000
--- a/arch/arm64/kernel/smp_psci.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * PSCI SMP initialisation
- *
- * Copyright (C) 2013 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-#include <linux/of.h>
-#include <linux/smp.h>
-
-#include <asm/psci.h>
-#include <asm/smp_plat.h>
-
-static int __init smp_psci_init_cpu(struct device_node *dn, int cpu)
-{
- return 0;
-}
-
-static int __init smp_psci_prepare_cpu(int cpu)
-{
- int err;
-
- if (!psci_ops.cpu_on) {
- pr_err("psci: no cpu_on method, not booting CPU%d\n", cpu);
- return -ENODEV;
- }
-
- err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_holding_pen));
- if (err) {
- pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err);
- return err;
- }
-
- return 0;
-}
-
-const struct smp_enable_ops smp_psci_ops __initconst = {
- .name = "psci",
- .init_cpu = smp_psci_init_cpu,
- .prepare_cpu = smp_psci_prepare_cpu,
-};
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 7c35fa6..27f0836 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -16,15 +16,39 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/delay.h>
#include <linux/init.h>
#include <linux/of.h>
#include <linux/smp.h>
#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/cputype.h>
+#include <asm/smp_plat.h>
+
+extern void secondary_holding_pen(void);
+volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
static phys_addr_t cpu_release_addr[NR_CPUS];
+static DEFINE_RAW_SPINLOCK(boot_lock);
-static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu)
+/*
+ * Write secondary_holding_pen_release in a way that is guaranteed to be
+ * visible to all observers, irrespective of whether they're taking part
+ * in coherency or not. This is necessary for the hotplug code to work
+ * reliably.
+ */
+static void write_pen_release(u64 val)
+{
+ void *start = (void *)&secondary_holding_pen_release;
+ unsigned long size = sizeof(secondary_holding_pen_release);
+
+ secondary_holding_pen_release = val;
+ __flush_dcache_area(start, size);
+}
+
+
+static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu)
{
/*
* Determine the address from which the CPU is polling.
@@ -40,7 +64,7 @@ static int __init smp_spin_table_init_cpu(struct device_node *dn, int cpu)
return 0;
}
-static int __init smp_spin_table_prepare_cpu(int cpu)
+static int smp_spin_table_cpu_prepare(unsigned int cpu)
{
void **release_addr;
@@ -59,8 +83,60 @@ static int __init smp_spin_table_prepare_cpu(int cpu)
return 0;
}
-const struct smp_enable_ops smp_spin_table_ops __initconst = {
+static int smp_spin_table_cpu_boot(unsigned int cpu)
+{
+ unsigned long timeout;
+
+ /*
+ * Set synchronisation state between this boot processor
+ * and the secondary one
+ */
+ raw_spin_lock(&boot_lock);
+
+ /*
+ * Update the pen release flag.
+ */
+ write_pen_release(cpu_logical_map(cpu));
+
+ /*
+ * Send an event, causing the secondaries to read pen_release.
+ */
+ sev();
+
+ timeout = jiffies + (1 * HZ);
+ while (time_before(jiffies, timeout)) {
+ if (secondary_holding_pen_release == INVALID_HWID)
+ break;
+ udelay(10);
+ }
+
+ /*
+ * Now the secondary core is starting up let it run its
+ * calibrations, then wait for it to finish
+ */
+ raw_spin_unlock(&boot_lock);
+
+ return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0;
+}
+
+void smp_spin_table_cpu_postboot(void)
+{
+ /*
+ * Let the primary processor know we're out of the pen.
+ */
+ write_pen_release(INVALID_HWID);
+
+ /*
+ * Synchronise with the boot thread.
+ */
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
+}
+
+const struct cpu_operations smp_spin_table_ops = {
.name = "spin-table",
- .init_cpu = smp_spin_table_init_cpu,
- .prepare_cpu = smp_spin_table_prepare_cpu,
+ .cpu_init = smp_spin_table_cpu_init,
+ .cpu_prepare = smp_spin_table_cpu_prepare,
+ .cpu_boot = smp_spin_table_cpu_boot,
+ .cpu_postboot = smp_spin_table_cpu_postboot,
};
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
new file mode 100644
index 0000000..971064a
--- /dev/null
+++ b/arch/arm64/kernel/topology.c
@@ -0,0 +1,537 @@
+/*
+ * arch/arm64/kernel/topology.c
+ *
+ * Copyright (C) 2011,2013 Linaro Limited.
+ * Written by: Vincent Guittot
+ *
+ * based on arch/sh/kernel/topology.c
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/cputype.h>
+#include <asm/smp_plat.h>
+#include <asm/topology.h>
+
+/*
+ * cpu power scale management
+ */
+
+/*
+ * cpu power table
+ * This per cpu data structure describes the relative capacity of each core.
+ * On a heteregenous system, cores don't have the same computation capacity
+ * and we reflect that difference in the cpu_power field so the scheduler can
+ * take this difference into account during load balance. A per cpu structure
+ * is preferred because each CPU updates its own cpu_power field during the
+ * load balance except for idle cores. One idle core is selected to run the
+ * rebalance_domains for all idle cores and the cpu_power can be updated
+ * during this sequence.
+ */
+static DEFINE_PER_CPU(unsigned long, cpu_scale);
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+ return per_cpu(cpu_scale, cpu);
+}
+
+static void set_power_scale(unsigned int cpu, unsigned long power)
+{
+ per_cpu(cpu_scale, cpu) = power;
+}
+
+#ifdef CONFIG_OF
+struct cpu_efficiency {
+ const char *compatible;
+ unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+static const struct cpu_efficiency table_efficiency[] = {
+ { "arm,cortex-a57", 3891 },
+ { "arm,cortex-a53", 2048 },
+ { NULL, },
+};
+
+static unsigned long *__cpu_capacity;
+#define cpu_capacity(cpu) __cpu_capacity[cpu]
+
+static unsigned long middle_capacity = 1;
+static int cluster_id;
+
+static int __init get_cpu_for_node(struct device_node *node)
+{
+ struct device_node *cpu_node;
+ int cpu;
+
+ cpu_node = of_parse_phandle(node, "cpu", 0);
+ if (!cpu_node) {
+ pr_crit("%s: Unable to parse CPU phandle\n", node->full_name);
+ return -1;
+ }
+
+ for_each_possible_cpu(cpu) {
+ if (of_get_cpu_node(cpu, NULL) == cpu_node)
+ return cpu;
+ }
+
+ pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name);
+ return -1;
+}
+
+static void __init parse_core(struct device_node *core, int core_id)
+{
+ char name[10];
+ bool leaf = true;
+ int i, cpu;
+ struct device_node *t;
+
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "thread%d", i);
+ t = of_get_child_by_name(core, name);
+ if (t) {
+ leaf = false;
+ cpu = get_cpu_for_node(t);
+ if (cpu) {
+ pr_info("CPU%d: socket %d core %d thread %d\n",
+ cpu, cluster_id, core_id, i);
+ cpu_topology[cpu].socket_id = cluster_id;
+ cpu_topology[cpu].core_id = core_id;
+ cpu_topology[cpu].thread_id = i;
+ } else {
+ pr_err("%s: Can't get CPU for thread\n",
+ t->full_name);
+ }
+ }
+ i++;
+ } while (t);
+
+ cpu = get_cpu_for_node(core);
+ if (cpu >= 0) {
+ if (!leaf) {
+ pr_err("%s: Core has both threads and CPU\n",
+ core->full_name);
+ return;
+ }
+
+ pr_info("CPU%d: socket %d core %d\n",
+ cpu, cluster_id, core_id);
+ cpu_topology[cpu].socket_id = cluster_id;
+ cpu_topology[cpu].core_id = core_id;
+ } else if (leaf) {
+ pr_err("%s: Can't get CPU for leaf core\n", core->full_name);
+ }
+}
+
+static void __init parse_cluster(struct device_node *cluster)
+{
+ char name[10];
+ bool leaf = true;
+ bool has_cores = false;
+ struct device_node *c;
+ int core_id = 0;
+ int i;
+
+ /*
+ * First check for child clusters; we currently ignore any
+ * information about the nesting of clusters and present the
+ * scheduler with a flat list of them.
+ */
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "cluster%d", i);
+ c = of_get_child_by_name(cluster, name);
+ if (c) {
+ parse_cluster(c);
+ leaf = false;
+ }
+ i++;
+ } while (c);
+
+ /* Now check for cores */
+ i = 0;
+ do {
+ snprintf(name, sizeof(name), "core%d", i);
+ c = of_get_child_by_name(cluster, name);
+ if (c) {
+ has_cores = true;
+
+ if (leaf)
+ parse_core(c, core_id++);
+ else
+ pr_err("%s: Non-leaf cluster with core %s\n",
+ cluster->full_name, name);
+ }
+ i++;
+ } while (c);
+
+ if (leaf && !has_cores)
+ pr_warn("%s: empty cluster\n", cluster->full_name);
+
+ if (leaf)
+ cluster_id++;
+}
+
+/*
+ * Iterate all CPUs' descriptor in DT and compute the efficiency
+ * (as per table_efficiency). Also calculate a middle efficiency
+ * as close as possible to (max{eff_i} - min{eff_i}) / 2
+ * This is later used to scale the cpu_power field such that an
+ * 'average' CPU is of middle power. Also see the comments near
+ * table_efficiency[] and update_cpu_power().
+ */
+static void __init parse_dt_topology(void)
+{
+ const struct cpu_efficiency *cpu_eff;
+ struct device_node *cn = NULL;
+ unsigned long min_capacity = (unsigned long)(-1);
+ unsigned long max_capacity = 0;
+ unsigned long capacity = 0;
+ int alloc_size, cpu;
+
+ alloc_size = nr_cpu_ids * sizeof(*__cpu_capacity);
+ __cpu_capacity = kzalloc(alloc_size, GFP_NOWAIT);
+
+ cn = of_find_node_by_path("/cpus");
+ if (!cn) {
+ pr_err("No CPU information found in DT\n");
+ return;
+ }
+
+ /*
+ * If topology is provided as a cpu-map it is essentially a
+ * root cluster.
+ */
+ cn = of_find_node_by_name(cn, "cpu-map");
+ if (!cn)
+ return;
+ parse_cluster(cn);
+
+ for_each_possible_cpu(cpu) {
+ const u32 *rate;
+ int len;
+
+ /* Too early to use cpu->of_node */
+ cn = of_get_cpu_node(cpu, NULL);
+ if (!cn) {
+ pr_err("Missing device node for CPU %d\n", cpu);
+ continue;
+ }
+
+ /* check if the cpu is marked as "disabled", if so ignore */
+ if (!of_device_is_available(cn))
+ continue;
+
+ for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
+ if (of_device_is_compatible(cn, cpu_eff->compatible))
+ break;
+
+ if (cpu_eff->compatible == NULL) {
+ pr_warn("%s: Unknown CPU type\n", cn->full_name);
+ continue;
+ }
+
+ rate = of_get_property(cn, "clock-frequency", &len);
+ if (!rate || len != 4) {
+ pr_err("%s: Missing clock-frequency property\n",
+ cn->full_name);
+ continue;
+ }
+
+ capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
+
+ /* Save min capacity of the system */
+ if (capacity < min_capacity)
+ min_capacity = capacity;
+
+ /* Save max capacity of the system */
+ if (capacity > max_capacity)
+ max_capacity = capacity;
+
+ cpu_capacity(cpu) = capacity;
+ }
+
+ /* If min and max capacities are equal we bypass the update of the
+ * cpu_scale because all CPUs have the same capacity. Otherwise, we
+ * compute a middle_capacity factor that will ensure that the capacity
+ * of an 'average' CPU of the system will be as close as possible to
+ * SCHED_POWER_SCALE, which is the default value, but with the
+ * constraint explained near table_efficiency[].
+ */
+ if (min_capacity == max_capacity)
+ return;
+ else if (4 * max_capacity < (3 * (max_capacity + min_capacity)))
+ middle_capacity = (min_capacity + max_capacity)
+ >> (SCHED_POWER_SHIFT+1);
+ else
+ middle_capacity = ((max_capacity / 3)
+ >> (SCHED_POWER_SHIFT-1)) + 1;
+
+}
+
+/*
+ * Look for a customed capacity of a CPU in the cpu_topo_data table during the
+ * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
+ * function returns directly for SMP system.
+ */
+static void update_cpu_power(unsigned int cpu)
+{
+ if (!cpu_capacity(cpu))
+ return;
+
+ set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+
+ pr_info("CPU%u: update cpu_power %lu\n",
+ cpu, arch_scale_freq_power(NULL, cpu));
+}
+
+#else
+static inline void parse_dt_topology(void) {}
+static inline void update_cpu_power(unsigned int cpuid) {}
+#endif
+
+/*
+ * cpu topology table
+ */
+struct cputopo_arm cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return &cpu_topology[cpu].core_sibling;
+}
+
+static void update_siblings_masks(unsigned int cpuid)
+{
+ struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+ int cpu;
+
+ /* update core and thread sibling masks */
+ for_each_possible_cpu(cpu) {
+ cpu_topo = &cpu_topology[cpu];
+
+ if (cpuid_topo->socket_id != cpu_topo->socket_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+ if (cpuid_topo->core_id != cpu_topo->core_id)
+ continue;
+
+ cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+ if (cpu != cpuid)
+ cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+ }
+ smp_wmb();
+}
+
+void store_cpu_topology(unsigned int cpuid)
+{
+ struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
+
+ /* DT should have been parsed by the time we get here */
+ if (cpuid_topo->core_id == -1)
+ pr_info("CPU%u: No topology information configured\n", cpuid);
+ else
+ update_siblings_masks(cpuid);
+
+ update_cpu_power(cpuid);
+}
+
+#ifdef CONFIG_SCHED_HMP
+
+/*
+ * Retrieve logical cpu index corresponding to a given MPIDR[23:0]
+ * - mpidr: MPIDR[23:0] to be used for the look-up
+ *
+ * Returns the cpu logical index or -EINVAL on look-up error
+ */
+static inline int get_logical_index(u32 mpidr)
+{
+ int cpu;
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+ if (cpu_logical_map(cpu) == mpidr)
+ return cpu;
+ return -EINVAL;
+}
+
+static const char * const little_cores[] = {
+ "arm,cortex-a53",
+ NULL,
+};
+
+static bool is_little_cpu(struct device_node *cn)
+{
+ const char * const *lc;
+ for (lc = little_cores; *lc; lc++)
+ if (of_device_is_compatible(cn, *lc))
+ return true;
+ return false;
+}
+
+void __init arch_get_fast_and_slow_cpus(struct cpumask *fast,
+ struct cpumask *slow)
+{
+ struct device_node *cn = NULL;
+ int cpu;
+
+ cpumask_clear(fast);
+ cpumask_clear(slow);
+
+ /*
+ * Use the config options if they are given. This helps testing
+ * HMP scheduling on systems without a big.LITTLE architecture.
+ */
+ if (strlen(CONFIG_HMP_FAST_CPU_MASK) && strlen(CONFIG_HMP_SLOW_CPU_MASK)) {
+ if (cpulist_parse(CONFIG_HMP_FAST_CPU_MASK, fast))
+ WARN(1, "Failed to parse HMP fast cpu mask!\n");
+ if (cpulist_parse(CONFIG_HMP_SLOW_CPU_MASK, slow))
+ WARN(1, "Failed to parse HMP slow cpu mask!\n");
+ return;
+ }
+
+ /*
+ * Else, parse device tree for little cores.
+ */
+ while ((cn = of_find_node_by_type(cn, "cpu"))) {
+
+ const u32 *mpidr;
+ int len;
+
+ mpidr = of_get_property(cn, "reg", &len);
+ if (!mpidr || len != 8) {
+ pr_err("%s missing reg property\n", cn->full_name);
+ continue;
+ }
+
+ cpu = get_logical_index(be32_to_cpup(mpidr+1));
+ if (cpu == -EINVAL) {
+ pr_err("couldn't get logical index for mpidr %x\n",
+ be32_to_cpup(mpidr+1));
+ break;
+ }
+
+ if (is_little_cpu(cn))
+ cpumask_set_cpu(cpu, slow);
+ else
+ cpumask_set_cpu(cpu, fast);
+ }
+
+ if (!cpumask_empty(fast) && !cpumask_empty(slow))
+ return;
+
+ /*
+ * We didn't find both big and little cores so let's call all cores
+ * fast as this will keep the system running, with all cores being
+ * treated equal.
+ */
+ cpumask_setall(fast);
+ cpumask_clear(slow);
+}
+
+struct cpumask hmp_slow_cpu_mask;
+
+void __init arch_get_hmp_domains(struct list_head *hmp_domains_list)
+{
+ struct cpumask hmp_fast_cpu_mask;
+ struct hmp_domain *domain;
+
+ arch_get_fast_and_slow_cpus(&hmp_fast_cpu_mask, &hmp_slow_cpu_mask);
+
+ /*
+ * Initialize hmp_domains
+ * Must be ordered with respect to compute capacity.
+ * Fastest domain at head of list.
+ */
+ if(!cpumask_empty(&hmp_slow_cpu_mask)) {
+ domain = (struct hmp_domain *)
+ kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+ cpumask_copy(&domain->possible_cpus, &hmp_slow_cpu_mask);
+ cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+ list_add(&domain->hmp_domains, hmp_domains_list);
+ }
+ domain = (struct hmp_domain *)
+ kmalloc(sizeof(struct hmp_domain), GFP_KERNEL);
+ cpumask_copy(&domain->possible_cpus, &hmp_fast_cpu_mask);
+ cpumask_and(&domain->cpus, cpu_online_mask, &domain->possible_cpus);
+ list_add(&domain->hmp_domains, hmp_domains_list);
+}
+#endif /* CONFIG_SCHED_HMP */
+
+/*
+ * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster
+ * @socket_id: cluster HW identifier
+ * @cluster_mask: the cpumask location to be initialized, modified by the
+ * function only if return value == 0
+ *
+ * Return:
+ *
+ * 0 on success
+ * -EINVAL if cluster_mask is NULL or there is no record matching socket_id
+ */
+int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask)
+{
+ int cpu;
+
+ if (!cluster_mask)
+ return -EINVAL;
+
+ for_each_online_cpu(cpu) {
+ if (socket_id == topology_physical_package_id(cpu)) {
+ cpumask_copy(cluster_mask, topology_core_cpumask(cpu));
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+/*
+ * init_cpu_topology is called at boot when only one cpu is running
+ * which prevent simultaneous write access to cpu_topology array
+ */
+void __init init_cpu_topology(void)
+{
+ unsigned int cpu;
+
+ /* init core mask and power*/
+ for_each_possible_cpu(cpu) {
+ struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
+
+ cpu_topo->thread_id = -1;
+ cpu_topo->core_id = -1;
+ cpu_topo->socket_id = -1;
+ cpumask_clear(&cpu_topo->core_sibling);
+ cpumask_clear(&cpu_topo->thread_sibling);
+
+ set_power_scale(cpu, SCHED_POWER_SCALE);
+ }
+ smp_wmb();
+
+ parse_dt_topology();
+}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 3fae2be..2c8a95b 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -41,7 +41,6 @@ SECTIONS
}
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
- *(.smp.pen.text)
__exception_text_start = .;
*(.exception.text)
__exception_text_end = .;
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index bc2da15..ac204e0 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -43,9 +43,6 @@ void of_parse_dma_window(struct device_node *dn, const void *dma_window_prop,
extern void kdump_move_device_tree(void);
-/* CPU OF node matching */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
-
/* cache lookup */
struct device_node *of_find_next_cache_node(struct device_node *np);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 8b6f7a9..00610a3 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -827,49 +827,10 @@ static int __init prom_reconfig_setup(void)
__initcall(prom_reconfig_setup);
#endif
-/* Find the device node for a given logical cpu number, also returns the cpu
- * local thread number (index in ibm,interrupt-server#s) if relevant and
- * asked for (non NULL)
- */
-struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
- int hardid;
- struct device_node *np;
-
- hardid = get_hard_smp_processor_id(cpu);
-
- for_each_node_by_type(np, "cpu") {
- const u32 *intserv;
- unsigned int plen, t;
-
- /* Check for ibm,ppc-interrupt-server#s. If it doesn't exist
- * fallback to "reg" property and assume no threads
- */
- intserv = of_get_property(np, "ibm,ppc-interrupt-server#s",
- &plen);
- if (intserv == NULL) {
- const u32 *reg = of_get_property(np, "reg", NULL);
- if (reg == NULL)
- continue;
- if (*reg == hardid) {
- if (thread)
- *thread = 0;
- return np;
- }
- } else {
- plen /= sizeof(u32);
- for (t = 0; t < plen; t++) {
- if (hardid == intserv[t]) {
- if (thread)
- *thread = t;
- return np;
- }
- }
- }
- }
- return NULL;
+ return (int)phys_id == get_hard_smp_processor_id(cpu);
}
-EXPORT_SYMBOL(of_get_cpu_node);
#if defined(CONFIG_DEBUG_FS) && defined(DEBUG)
static struct debugfs_blob_wrapper flat_dt_blob;
diff --git a/block/blk-core.c b/block/blk-core.c
index 2c66dab..3d37cde 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -3191,7 +3191,8 @@ int __init blk_dev_init(void)
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd",
- WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+ WQ_MEM_RECLAIM | WQ_HIGHPRI |
+ WQ_POWER_EFFICIENT, 0);
if (!kblockd_workqueue)
panic("Failed to create kblockd\n");
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 9c4bb82..4464c82 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -144,7 +144,8 @@ void put_io_context(struct io_context *ioc)
if (atomic_long_dec_and_test(&ioc->refcount)) {
spin_lock_irqsave(&ioc->lock, flags);
if (!hlist_empty(&ioc->icq_list))
- schedule_work(&ioc->release_work);
+ queue_work(system_power_efficient_wq,
+ &ioc->release_work);
else
free_ioc = true;
spin_unlock_irqrestore(&ioc->lock, flags);
diff --git a/block/genhd.c b/block/genhd.c
index cdeb527..dadf42b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1489,9 +1489,11 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
intv = disk_events_poll_jiffies(disk);
set_timer_slack(&ev->dwork.timer, intv / 4);
if (check_now)
- queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
+ queue_delayed_work(system_freezable_power_efficient_wq,
+ &ev->dwork, 0);
else if (intv)
- queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
+ queue_delayed_work(system_freezable_power_efficient_wq,
+ &ev->dwork, intv);
out_unlock:
spin_unlock_irqrestore(&ev->lock, flags);
}
@@ -1534,7 +1536,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
spin_lock_irq(&ev->lock);
ev->clearing |= mask;
if (!ev->block)
- mod_delayed_work(system_freezable_wq, &ev->dwork, 0);
+ mod_delayed_work(system_freezable_power_efficient_wq,
+ &ev->dwork, 0);
spin_unlock_irq(&ev->lock);
}
@@ -1627,7 +1630,8 @@ static void disk_check_events(struct disk_events *ev,
intv = disk_events_poll_jiffies(disk);
if (!ev->block && intv)
- queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
+ queue_delayed_work(system_freezable_power_efficient_wq,
+ &ev->dwork, intv);
spin_unlock_irq(&ev->lock);
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 9953a42..d27feb5 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -166,4 +166,6 @@ source "drivers/ipack/Kconfig"
source "drivers/reset/Kconfig"
+source "drivers/gator/Kconfig"
+
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 130abc1..092a62e 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -152,3 +152,5 @@ obj-$(CONFIG_IIO) += iio/
obj-$(CONFIG_VME_BUS) += vme/
obj-$(CONFIG_IPACK_BUS) += ipack/
obj-$(CONFIG_NTB) += ntb/
+
+obj-$(CONFIG_GATOR) += gator/
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index b05ecab..5286e2d 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -26,4 +26,11 @@ config OMAP_INTERCONNECT
help
Driver to enable OMAP interconnect error handling driver.
+
+config ARM_CCI
+ bool "ARM CCI driver support"
+ depends on ARM
+ help
+ Driver supporting the CCI cache coherent interconnect for ARM
+ platforms.
endmenu
diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
index 3c7b53c..670cea4 100644
--- a/drivers/bus/Makefile
+++ b/drivers/bus/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_OMAP_OCP2SCP) += omap-ocp2scp.o
# Interconnect bus driver for OMAP SoCs.
obj-$(CONFIG_OMAP_INTERCONNECT) += omap_l3_smx.o omap_l3_noc.o
+# CCI cache coherent interconnect for ARM platforms
+obj-$(CONFIG_ARM_CCI) += arm-cci.o
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
new file mode 100644
index 0000000..2d1387b
--- /dev/null
+++ b/drivers/bus/arm-cci.c
@@ -0,0 +1,945 @@
+/*
+ * CCI cache coherent interconnect driver
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/arm-cci.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+
+#include <asm/cacheflush.h>
+#include <asm/irq_regs.h>
+#include <asm/pmu.h>
+#include <asm/smp_plat.h>
+
+#define DRIVER_NAME "CCI"
+
+#define CCI_PORT_CTRL 0x0
+#define CCI_CTRL_STATUS 0xc
+
+#define CCI_ENABLE_SNOOP_REQ 0x1
+#define CCI_ENABLE_DVM_REQ 0x2
+#define CCI_ENABLE_REQ (CCI_ENABLE_SNOOP_REQ | CCI_ENABLE_DVM_REQ)
+
+struct cci_nb_ports {
+ unsigned int nb_ace;
+ unsigned int nb_ace_lite;
+};
+
+enum cci_ace_port_type {
+ ACE_INVALID_PORT = 0x0,
+ ACE_PORT,
+ ACE_LITE_PORT,
+};
+
+struct cci_ace_port {
+ void __iomem *base;
+ unsigned long phys;
+ enum cci_ace_port_type type;
+ struct device_node *dn;
+};
+
+static struct cci_ace_port *ports;
+static unsigned int nb_cci_ports;
+
+static void __iomem *cci_ctrl_base;
+static unsigned long cci_ctrl_phys;
+
+#ifdef CONFIG_HW_PERF_EVENTS
+
+static void __iomem *cci_pmu_base;
+
+#define CCI400_PMCR 0x0100
+
+#define CCI400_PMU_CYCLE_CNTR_BASE 0x0000
+#define CCI400_PMU_CNTR_BASE(idx) (CCI400_PMU_CYCLE_CNTR_BASE + (idx) * 0x1000)
+
+#define CCI400_PMCR_CEN 0x00000001
+#define CCI400_PMCR_RST 0x00000002
+#define CCI400_PMCR_CCR 0x00000004
+#define CCI400_PMCR_CCD 0x00000008
+#define CCI400_PMCR_EX 0x00000010
+#define CCI400_PMCR_DP 0x00000020
+#define CCI400_PMCR_NCNT_MASK 0x0000F800
+#define CCI400_PMCR_NCNT_SHIFT 11
+
+#define CCI400_PMU_EVT_SEL 0x000
+#define CCI400_PMU_CNTR 0x004
+#define CCI400_PMU_CNTR_CTRL 0x008
+#define CCI400_PMU_OVERFLOW 0x00C
+
+#define CCI400_PMU_OVERFLOW_FLAG 1
+
+enum cci400_perf_events {
+ CCI400_PMU_CYCLES = 0xFF
+};
+
+#define CCI400_PMU_EVENT_MASK 0xff
+#define CCI400_PMU_EVENT_SOURCE(event) ((event >> 5) & 0x7)
+#define CCI400_PMU_EVENT_CODE(event) (event & 0x1f)
+
+#define CCI400_PMU_EVENT_SOURCE_S0 0
+#define CCI400_PMU_EVENT_SOURCE_S4 4
+#define CCI400_PMU_EVENT_SOURCE_M0 5
+#define CCI400_PMU_EVENT_SOURCE_M2 7
+
+#define CCI400_PMU_EVENT_SLAVE_MIN 0x0
+#define CCI400_PMU_EVENT_SLAVE_MAX 0x13
+
+#define CCI400_PMU_EVENT_MASTER_MIN 0x14
+#define CCI400_PMU_EVENT_MASTER_MAX 0x1A
+
+#define CCI400_PMU_MAX_HW_EVENTS 5 /* CCI PMU has 4 counters + 1 cycle counter */
+
+#define CCI400_PMU_CYCLE_COUNTER_IDX 0
+#define CCI400_PMU_COUNTER0_IDX 1
+#define CCI400_PMU_COUNTER_LAST(cci_pmu) (CCI400_PMU_CYCLE_COUNTER_IDX + cci_pmu->num_events - 1)
+
+
+static struct perf_event *events[CCI400_PMU_MAX_HW_EVENTS];
+static unsigned long used_mask[BITS_TO_LONGS(CCI400_PMU_MAX_HW_EVENTS)];
+static struct pmu_hw_events cci_hw_events = {
+ .events = events,
+ .used_mask = used_mask,
+};
+
+static int cci_pmu_validate_hw_event(u8 hw_event)
+{
+ u8 ev_source = CCI400_PMU_EVENT_SOURCE(hw_event);
+ u8 ev_code = CCI400_PMU_EVENT_CODE(hw_event);
+
+ if (ev_source <= CCI400_PMU_EVENT_SOURCE_S4 &&
+ ev_code <= CCI400_PMU_EVENT_SLAVE_MAX)
+ return hw_event;
+ else if (CCI400_PMU_EVENT_SOURCE_M0 <= ev_source &&
+ ev_source <= CCI400_PMU_EVENT_SOURCE_M2 &&
+ CCI400_PMU_EVENT_MASTER_MIN <= ev_code &&
+ ev_code <= CCI400_PMU_EVENT_MASTER_MAX)
+ return hw_event;
+
+ return -EINVAL;
+}
+
+static inline int cci_pmu_counter_is_valid(struct arm_pmu *cci_pmu, int idx)
+{
+ return CCI400_PMU_CYCLE_COUNTER_IDX <= idx &&
+ idx <= CCI400_PMU_COUNTER_LAST(cci_pmu);
+}
+
+static inline u32 cci_pmu_read_register(int idx, unsigned int offset)
+{
+ return readl_relaxed(cci_pmu_base + CCI400_PMU_CNTR_BASE(idx) + offset);
+}
+
+static inline void cci_pmu_write_register(u32 value, int idx, unsigned int offset)
+{
+ return writel_relaxed(value, cci_pmu_base + CCI400_PMU_CNTR_BASE(idx) + offset);
+}
+
+static inline void cci_pmu_disable_counter(int idx)
+{
+ cci_pmu_write_register(0, idx, CCI400_PMU_CNTR_CTRL);
+}
+
+static inline void cci_pmu_enable_counter(int idx)
+{
+ cci_pmu_write_register(1, idx, CCI400_PMU_CNTR_CTRL);
+}
+
+static inline void cci_pmu_select_event(int idx, unsigned long event)
+{
+ event &= CCI400_PMU_EVENT_MASK;
+ cci_pmu_write_register(event, idx, CCI400_PMU_EVT_SEL);
+}
+
+static u32 cci_pmu_get_max_counters(void)
+{
+ u32 n_cnts = (readl_relaxed(cci_ctrl_base + CCI400_PMCR) &
+ CCI400_PMCR_NCNT_MASK) >> CCI400_PMCR_NCNT_SHIFT;
+
+ /* add 1 for cycle counter */
+ return n_cnts + 1;
+}
+
+static struct pmu_hw_events *cci_pmu_get_hw_events(void)
+{
+ return &cci_hw_events;
+}
+
+static int cci_pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event)
+{
+ struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hw_event = &event->hw;
+ unsigned long cci_event = hw_event->config_base & CCI400_PMU_EVENT_MASK;
+ int idx;
+
+ if (cci_event == CCI400_PMU_CYCLES) {
+ if (test_and_set_bit(CCI400_PMU_CYCLE_COUNTER_IDX, hw->used_mask))
+ return -EAGAIN;
+
+ return CCI400_PMU_CYCLE_COUNTER_IDX;
+ }
+
+ for (idx = CCI400_PMU_COUNTER0_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); ++idx) {
+ if (!test_and_set_bit(idx, hw->used_mask))
+ return idx;
+ }
+
+ /* No counters available */
+ return -EAGAIN;
+}
+
+static int cci_pmu_map_event(struct perf_event *event)
+{
+ int mapping;
+ u8 config = event->attr.config & CCI400_PMU_EVENT_MASK;
+
+ if (event->attr.type < PERF_TYPE_MAX)
+ return -ENOENT;
+
+ /* 0xff is used to represent CCI Cycles */
+ if (config == 0xff)
+ mapping = config;
+ else
+ mapping = cci_pmu_validate_hw_event(config);
+
+ return mapping;
+}
+
+static int cci_pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
+{
+ int irq, err, i = 0;
+ struct platform_device *pmu_device = cci_pmu->plat_device;
+
+ if (unlikely(!pmu_device))
+ return -ENODEV;
+
+ /* CCI exports 6 interrupts - 1 nERRORIRQ + 5 nEVNTCNTOVERFLOW (PMU)
+ nERRORIRQ will be handled by secure firmware on TC2. So we
+ assume that all CCI interrupts listed in the linux device
+ tree are PMU interrupts.
+
+ The following code should then be able to handle different routing
+ of the CCI PMU interrupts.
+ */
+ while ((irq = platform_get_irq(pmu_device, i)) > 0) {
+ err = request_irq(irq, handler, 0, "arm-cci-pmu", cci_pmu);
+ if (err) {
+ dev_err(&pmu_device->dev, "unable to request IRQ%d for ARM CCI PMU counters\n",
+ irq);
+ return err;
+ }
+ i++;
+ }
+
+ return 0;
+}
+
+static irqreturn_t cci_pmu_handle_irq(int irq_num, void *dev)
+{
+ struct arm_pmu *cci_pmu = (struct arm_pmu *)dev;
+ struct pmu_hw_events *events = cci_pmu->get_hw_events();
+ struct perf_sample_data data;
+ struct pt_regs *regs;
+ int idx;
+
+ regs = get_irq_regs();
+
+ /* Iterate over counters and update the corresponding perf events.
+ This should work regardless of whether we have per-counter overflow
+ interrupt or a combined overflow interrupt. */
+ for (idx = CCI400_PMU_CYCLE_COUNTER_IDX; idx <= CCI400_PMU_COUNTER_LAST(cci_pmu); idx++) {
+ struct perf_event *event = events->events[idx];
+ struct hw_perf_event *hw_counter;
+
+ if (!event)
+ continue;
+
+ hw_counter = &event->hw;
+
+ /* Did this counter overflow? */
+ if (!(cci_pmu_read_register(idx, CCI400_PMU_OVERFLOW) & CCI400_PMU_OVERFLOW_FLAG))
+ continue;
+ cci_pmu_write_register(CCI400_PMU_OVERFLOW_FLAG, idx, CCI400_PMU_OVERFLOW);
+
+ armpmu_event_update(event);
+ perf_sample_data_init(&data, 0, hw_counter->last_period);
+ if (!armpmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cci_pmu->disable(event);
+ }
+
+ irq_work_run();
+ return IRQ_HANDLED;
+}
+
+static void cci_pmu_free_irq(struct arm_pmu *cci_pmu)
+{
+ int irq, i = 0;
+ struct platform_device *pmu_device = cci_pmu->plat_device;
+
+ while ((irq = platform_get_irq(pmu_device, i)) > 0) {
+ free_irq(irq, cci_pmu);
+ i++;
+ }
+}
+
+static void cci_pmu_enable_event(struct perf_event *event)
+{
+ unsigned long flags;
+ struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+ struct pmu_hw_events *events = cci_pmu->get_hw_events();
+ struct hw_perf_event *hw_counter = &event->hw;
+ int idx = hw_counter->idx;
+
+ if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) {
+ dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+ return;
+ }
+
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /* Configure the event to count, unless you are counting cycles */
+ if (idx != CCI400_PMU_CYCLE_COUNTER_IDX)
+ cci_pmu_select_event(idx, hw_counter->config_base);
+
+ cci_pmu_enable_counter(idx);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void cci_pmu_disable_event(struct perf_event *event)
+{
+ unsigned long flags;
+ struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+ struct pmu_hw_events *events = cci_pmu->get_hw_events();
+ struct hw_perf_event *hw_counter = &event->hw;
+ int idx = hw_counter->idx;
+
+ if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) {
+ dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+ return;
+ }
+
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ cci_pmu_disable_counter(idx);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void cci_pmu_start(struct arm_pmu *cci_pmu)
+{
+ u32 val;
+ unsigned long flags;
+ struct pmu_hw_events *events = cci_pmu->get_hw_events();
+
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /* Enable all the PMU counters. */
+ val = readl(cci_ctrl_base + CCI400_PMCR) | CCI400_PMCR_CEN;
+ writel(val, cci_ctrl_base + CCI400_PMCR);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void cci_pmu_stop(struct arm_pmu *cci_pmu)
+{
+ u32 val;
+ unsigned long flags;
+ struct pmu_hw_events *events = cci_pmu->get_hw_events();
+
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /* Disable all the PMU counters. */
+ val = readl(cci_ctrl_base + CCI400_PMCR) & ~CCI400_PMCR_CEN;
+ writel(val, cci_ctrl_base + CCI400_PMCR);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static u32 cci_pmu_read_counter(struct perf_event *event)
+{
+ struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hw_counter = &event->hw;
+ int idx = hw_counter->idx;
+ u32 value;
+
+ if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx))) {
+ dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+ return 0;
+ }
+ value = cci_pmu_read_register(idx, CCI400_PMU_CNTR);
+
+ return value;
+}
+
+static void cci_pmu_write_counter(struct perf_event *event, u32 value)
+{
+ struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
+ struct hw_perf_event *hw_counter = &event->hw;
+ int idx = hw_counter->idx;
+
+ if (unlikely(!cci_pmu_counter_is_valid(cci_pmu, idx)))
+ dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
+ else
+ cci_pmu_write_register(value, idx, CCI400_PMU_CNTR);
+}
+
+static struct arm_pmu cci_pmu = {
+ .name = DRIVER_NAME,
+ .max_period = (1LLU << 32) - 1,
+ .get_hw_events = cci_pmu_get_hw_events,
+ .get_event_idx = cci_pmu_get_event_idx,
+ .map_event = cci_pmu_map_event,
+ .request_irq = cci_pmu_request_irq,
+ .handle_irq = cci_pmu_handle_irq,
+ .free_irq = cci_pmu_free_irq,
+ .enable = cci_pmu_enable_event,
+ .disable = cci_pmu_disable_event,
+ .start = cci_pmu_start,
+ .stop = cci_pmu_stop,
+ .read_counter = cci_pmu_read_counter,
+ .write_counter = cci_pmu_write_counter,
+};
+
+static int cci_pmu_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ cci_pmu_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(cci_pmu_base))
+ return PTR_ERR(cci_pmu_base);
+
+ cci_pmu.plat_device = pdev;
+ cci_pmu.num_events = cci_pmu_get_max_counters();
+ raw_spin_lock_init(&cci_hw_events.pmu_lock);
+ cpumask_setall(&cci_pmu.valid_cpus);
+
+ return armpmu_register(&cci_pmu, -1);
+}
+
+static const struct of_device_id arm_cci_pmu_matches[] = {
+ {.compatible = "arm,cci-400-pmu"},
+ {},
+};
+
+static struct platform_driver cci_pmu_platform_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ .of_match_table = arm_cci_pmu_matches,
+ },
+ .probe = cci_pmu_probe,
+};
+
+static int __init cci_pmu_init(void)
+{
+ if (platform_driver_register(&cci_pmu_platform_driver))
+ WARN(1, "unable to register CCI platform driver\n");
+ return 0;
+}
+
+#else
+
+static int __init cci_pmu_init(void)
+{
+ return 0;
+}
+
+#endif /* CONFIG_HW_PERF_EVENTS */
+
+struct cpu_port {
+ u64 mpidr;
+ u32 port;
+};
+
+/*
+ * Use the port MSB as valid flag, shift can be made dynamic
+ * by computing number of bits required for port indexes.
+ * Code disabling CCI cpu ports runs with D-cache invalidated
+ * and SCTLR bit clear so data accesses must be kept to a minimum
+ * to improve performance; for now shift is left static to
+ * avoid one more data access while disabling the CCI port.
+ */
+#define PORT_VALID_SHIFT 31
+#define PORT_VALID (0x1 << PORT_VALID_SHIFT)
+
+static inline void init_cpu_port(struct cpu_port *port, u32 index, u64 mpidr)
+{
+ port->port = PORT_VALID | index;
+ port->mpidr = mpidr;
+}
+
+static inline bool cpu_port_is_valid(struct cpu_port *port)
+{
+ return !!(port->port & PORT_VALID);
+}
+
+static inline bool cpu_port_match(struct cpu_port *port, u64 mpidr)
+{
+ return port->mpidr == (mpidr & MPIDR_HWID_BITMASK);
+}
+
+static struct cpu_port cpu_port[NR_CPUS];
+
+/**
+ * __cci_ace_get_port - Function to retrieve the port index connected to
+ * a cpu or device.
+ *
+ * @dn: device node of the device to look-up
+ * @type: port type
+ *
+ * Return value:
+ * - CCI port index if success
+ * - -ENODEV if failure
+ */
+static int __cci_ace_get_port(struct device_node *dn, int type)
+{
+ int i;
+ bool ace_match;
+ struct device_node *cci_portn;
+
+ cci_portn = of_parse_phandle(dn, "cci-control-port", 0);
+ for (i = 0; i < nb_cci_ports; i++) {
+ ace_match = ports[i].type == type;
+ if (ace_match && cci_portn == ports[i].dn)
+ return i;
+ }
+ return -ENODEV;
+}
+
+int cci_ace_get_port(struct device_node *dn)
+{
+ return __cci_ace_get_port(dn, ACE_LITE_PORT);
+}
+EXPORT_SYMBOL_GPL(cci_ace_get_port);
+
+static void __init cci_ace_init_ports(void)
+{
+ int port, ac, cpu;
+ u64 hwid;
+ const u32 *cell;
+ struct device_node *cpun, *cpus;
+
+ cpus = of_find_node_by_path("/cpus");
+ if (WARN(!cpus, "Missing cpus node, bailing out\n"))
+ return;
+
+ if (WARN_ON(of_property_read_u32(cpus, "#address-cells", &ac)))
+ ac = of_n_addr_cells(cpus);
+
+ /*
+ * Port index look-up speeds up the function disabling ports by CPU,
+ * since the logical to port index mapping is done once and does
+ * not change after system boot.
+ * The stashed index array is initialized for all possible CPUs
+ * at probe time.
+ */
+ for_each_child_of_node(cpus, cpun) {
+ if (of_node_cmp(cpun->type, "cpu"))
+ continue;
+ cell = of_get_property(cpun, "reg", NULL);
+ if (WARN(!cell, "%s: missing reg property\n", cpun->full_name))
+ continue;
+
+ hwid = of_read_number(cell, ac);
+ cpu = get_logical_index(hwid & MPIDR_HWID_BITMASK);
+
+ if (cpu < 0 || !cpu_possible(cpu))
+ continue;
+ port = __cci_ace_get_port(cpun, ACE_PORT);
+ if (port < 0)
+ continue;
+
+ init_cpu_port(&cpu_port[cpu], port, cpu_logical_map(cpu));
+ }
+
+ for_each_possible_cpu(cpu) {
+ WARN(!cpu_port_is_valid(&cpu_port[cpu]),
+ "CPU %u does not have an associated CCI port\n",
+ cpu);
+ }
+}
+/*
+ * Functions to enable/disable a CCI interconnect slave port
+ *
+ * They are called by low-level power management code to disable slave
+ * interfaces snoops and DVM broadcast.
+ * Since they may execute with cache data allocation disabled and
+ * after the caches have been cleaned and invalidated the functions provide
+ * no explicit locking since they may run with D-cache disabled, so normal
+ * cacheable kernel locks based on ldrex/strex may not work.
+ * Locking has to be provided by BSP implementations to ensure proper
+ * operations.
+ */
+
+/**
+ * cci_port_control() - function to control a CCI port
+ *
+ * @port: index of the port to setup
+ * @enable: if true enables the port, if false disables it
+ */
+static void notrace cci_port_control(unsigned int port, bool enable)
+{
+ void __iomem *base = ports[port].base;
+
+ writel_relaxed(enable ? CCI_ENABLE_REQ : 0, base + CCI_PORT_CTRL);
+ /*
+ * This function is called from power down procedures
+ * and must not execute any instruction that might
+ * cause the processor to be put in a quiescent state
+ * (eg wfi). Hence, cpu_relax() can not be added to this
+ * read loop to optimize power, since it might hide possibly
+ * disruptive operations.
+ */
+ while (readl_relaxed(cci_ctrl_base + CCI_CTRL_STATUS) & 0x1)
+ ;
+}
+
+/**
+ * cci_disable_port_by_cpu() - function to disable a CCI port by CPU
+ * reference
+ *
+ * @mpidr: mpidr of the CPU whose CCI port should be disabled
+ *
+ * Disabling a CCI port for a CPU implies disabling the CCI port
+ * controlling that CPU cluster. Code disabling CPU CCI ports
+ * must make sure that the CPU running the code is the last active CPU
+ * in the cluster ie all other CPUs are quiescent in a low power state.
+ *
+ * Return:
+ * 0 on success
+ * -ENODEV on port look-up failure
+ */
+int notrace cci_disable_port_by_cpu(u64 mpidr)
+{
+ int cpu;
+ bool is_valid;
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
+ is_valid = cpu_port_is_valid(&cpu_port[cpu]);
+ if (is_valid && cpu_port_match(&cpu_port[cpu], mpidr)) {
+ cci_port_control(cpu_port[cpu].port, false);
+ return 0;
+ }
+ }
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(cci_disable_port_by_cpu);
+
+/**
+ * cci_enable_port_for_self() - enable a CCI port for calling CPU
+ *
+ * Enabling a CCI port for the calling CPU implies enabling the CCI
+ * port controlling that CPU's cluster. Caller must make sure that the
+ * CPU running the code is the first active CPU in the cluster and all
+ * other CPUs are quiescent in a low power state or waiting for this CPU
+ * to complete the CCI initialization.
+ *
+ * Because this is called when the MMU is still off and with no stack,
+ * the code must be position independent and ideally rely on callee
+ * clobbered registers only. To achieve this we must code this function
+ * entirely in assembler.
+ *
+ * On success this returns with the proper CCI port enabled. In case of
+ * any failure this never returns as the inability to enable the CCI is
+ * fatal and there is no possible recovery at this stage.
+ */
+asmlinkage void __naked cci_enable_port_for_self(void)
+{
+ asm volatile ("\n"
+
+" mrc p15, 0, r0, c0, c0, 5 @ get MPIDR value \n"
+" and r0, r0, #"__stringify(MPIDR_HWID_BITMASK)" \n"
+" adr r1, 5f \n"
+" ldr r2, [r1] \n"
+" add r1, r1, r2 @ &cpu_port \n"
+" add ip, r1, %[sizeof_cpu_port] \n"
+
+ /* Loop over the cpu_port array looking for a matching MPIDR */
+"1: ldr r2, [r1, %[offsetof_cpu_port_mpidr_lsb]] \n"
+" cmp r2, r0 @ compare MPIDR \n"
+" bne 2f \n"
+
+ /* Found a match, now test port validity */
+" ldr r3, [r1, %[offsetof_cpu_port_port]] \n"
+" tst r3, #"__stringify(PORT_VALID)" \n"
+" bne 3f \n"
+
+ /* no match, loop with the next cpu_port entry */
+"2: add r1, r1, %[sizeof_struct_cpu_port] \n"
+" cmp r1, ip @ done? \n"
+" blo 1b \n"
+
+ /* CCI port not found -- cheaply try to stall this CPU */
+"cci_port_not_found: \n"
+" wfi \n"
+" wfe \n"
+" b cci_port_not_found \n"
+
+ /* Use matched port index to look up the corresponding ports entry */
+"3: bic r3, r3, #"__stringify(PORT_VALID)" \n"
+" adr r0, 6f \n"
+" ldmia r0, {r1, r2} \n"
+" sub r1, r1, r0 @ virt - phys \n"
+" ldr r0, [r0, r2] @ *(&ports) \n"
+" mov r2, %[sizeof_struct_ace_port] \n"
+" mla r0, r2, r3, r0 @ &ports[index] \n"
+" sub r0, r0, r1 @ virt_to_phys() \n"
+
+ /* Enable the CCI port */
+" ldr r0, [r0, %[offsetof_port_phys]] \n"
+" mov r3, #"__stringify(CCI_ENABLE_REQ)" \n"
+" str r3, [r0, #"__stringify(CCI_PORT_CTRL)"] \n"
+
+ /* poll the status reg for completion */
+" adr r1, 7f \n"
+" ldr r0, [r1] \n"
+" ldr r0, [r0, r1] @ cci_ctrl_base \n"
+"4: ldr r1, [r0, #"__stringify(CCI_CTRL_STATUS)"] \n"
+" tst r1, #1 \n"
+" bne 4b \n"
+
+" mov r0, #0 \n"
+" bx lr \n"
+
+" .align 2 \n"
+"5: .word cpu_port - . \n"
+"6: .word . \n"
+" .word ports - 6b \n"
+"7: .word cci_ctrl_phys - . \n"
+ : :
+ [sizeof_cpu_port] "i" (sizeof(cpu_port)),
+#ifndef __ARMEB__
+ [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)),
+#else
+ [offsetof_cpu_port_mpidr_lsb] "i" (offsetof(struct cpu_port, mpidr)+4),
+#endif
+ [offsetof_cpu_port_port] "i" (offsetof(struct cpu_port, port)),
+ [sizeof_struct_cpu_port] "i" (sizeof(struct cpu_port)),
+ [sizeof_struct_ace_port] "i" (sizeof(struct cci_ace_port)),
+ [offsetof_port_phys] "i" (offsetof(struct cci_ace_port, phys)) );
+
+ unreachable();
+}
+
+/**
+ * __cci_control_port_by_device() - function to control a CCI port by device
+ * reference
+ *
+ * @dn: device node pointer of the device whose CCI port should be
+ * controlled
+ * @enable: if true enables the port, if false disables it
+ *
+ * Return:
+ * 0 on success
+ * -ENODEV on port look-up failure
+ */
+int notrace __cci_control_port_by_device(struct device_node *dn, bool enable)
+{
+ int port;
+
+ if (!dn)
+ return -ENODEV;
+
+ port = __cci_ace_get_port(dn, ACE_LITE_PORT);
+ if (WARN_ONCE(port < 0, "node %s ACE lite port look-up failure\n",
+ dn->full_name))
+ return -ENODEV;
+ cci_port_control(port, enable);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__cci_control_port_by_device);
+
+/**
+ * __cci_control_port_by_index() - function to control a CCI port by port index
+ *
+ * @port: port index previously retrieved with cci_ace_get_port()
+ * @enable: if true enables the port, if false disables it
+ *
+ * Return:
+ * 0 on success
+ * -ENODEV on port index out of range
+ * -EPERM if operation carried out on an ACE PORT
+ */
+int notrace __cci_control_port_by_index(u32 port, bool enable)
+{
+ if (port >= nb_cci_ports || ports[port].type == ACE_INVALID_PORT)
+ return -ENODEV;
+ /*
+ * CCI control for ports connected to CPUS is extremely fragile
+ * and must be made to go through a specific and controlled
+ * interface (ie cci_disable_port_by_cpu(); control by general purpose
+ * indexing is therefore disabled for ACE ports.
+ */
+ if (ports[port].type == ACE_PORT)
+ return -EPERM;
+
+ cci_port_control(port, enable);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__cci_control_port_by_index);
+
+static const struct cci_nb_ports cci400_ports = {
+ .nb_ace = 2,
+ .nb_ace_lite = 3
+};
+
+static const struct of_device_id arm_cci_matches[] = {
+ {.compatible = "arm,cci-400", .data = &cci400_ports },
+ {},
+};
+
+static const struct of_device_id arm_cci_ctrl_if_matches[] = {
+ {.compatible = "arm,cci-400-ctrl-if", },
+ {},
+};
+
+static int __init cci_probe(void)
+{
+ struct cci_nb_ports const *cci_config;
+ int ret, i, nb_ace = 0, nb_ace_lite = 0;
+ struct device_node *np, *cp;
+ struct resource res;
+ const char *match_str;
+ bool is_ace;
+
+ np = of_find_matching_node(NULL, arm_cci_matches);
+ if (!np)
+ return -ENODEV;
+
+ cci_config = of_match_node(arm_cci_matches, np)->data;
+ if (!cci_config)
+ return -ENODEV;
+
+ nb_cci_ports = cci_config->nb_ace + cci_config->nb_ace_lite;
+
+ ports = kcalloc(sizeof(*ports), nb_cci_ports, GFP_KERNEL);
+ if (!ports)
+ return -ENOMEM;
+
+ ret = of_address_to_resource(np, 0, &res);
+ if (!ret) {
+ cci_ctrl_base = ioremap(res.start, resource_size(&res));
+ cci_ctrl_phys = res.start;
+ }
+ if (ret || !cci_ctrl_base) {
+ WARN(1, "unable to ioremap CCI ctrl\n");
+ ret = -ENXIO;
+ goto memalloc_err;
+ }
+
+ for_each_child_of_node(np, cp) {
+ if (!of_match_node(arm_cci_ctrl_if_matches, cp))
+ continue;
+
+ i = nb_ace + nb_ace_lite;
+
+ if (i >= nb_cci_ports)
+ break;
+
+ if (of_property_read_string(cp, "interface-type",
+ &match_str)) {
+ WARN(1, "node %s missing interface-type property\n",
+ cp->full_name);
+ continue;
+ }
+ is_ace = strcmp(match_str, "ace") == 0;
+ if (!is_ace && strcmp(match_str, "ace-lite")) {
+ WARN(1, "node %s containing invalid interface-type property, skipping it\n",
+ cp->full_name);
+ continue;
+ }
+
+ ret = of_address_to_resource(cp, 0, &res);
+ if (!ret) {
+ ports[i].base = ioremap(res.start, resource_size(&res));
+ ports[i].phys = res.start;
+ }
+ if (ret || !ports[i].base) {
+ WARN(1, "unable to ioremap CCI port %d\n", i);
+ continue;
+ }
+
+ if (is_ace) {
+ if (WARN_ON(nb_ace >= cci_config->nb_ace))
+ continue;
+ ports[i].type = ACE_PORT;
+ ++nb_ace;
+ } else {
+ if (WARN_ON(nb_ace_lite >= cci_config->nb_ace_lite))
+ continue;
+ ports[i].type = ACE_LITE_PORT;
+ ++nb_ace_lite;
+ }
+ ports[i].dn = cp;
+ }
+
+ /* initialize a stashed array of ACE ports to speed-up look-up */
+ cci_ace_init_ports();
+
+ /*
+ * Multi-cluster systems may need this data when non-coherent, during
+ * cluster power-up/power-down. Make sure it reaches main memory.
+ */
+ sync_cache_w(&cci_ctrl_base);
+ sync_cache_w(&cci_ctrl_phys);
+ sync_cache_w(&ports);
+ sync_cache_w(&cpu_port);
+ __sync_cache_range_w(ports, sizeof(*ports) * nb_cci_ports);
+ pr_info("ARM CCI driver probed\n");
+ return 0;
+
+memalloc_err:
+
+ kfree(ports);
+ return ret;
+}
+
+static int cci_init_status = -EAGAIN;
+static DEFINE_MUTEX(cci_probing);
+
+static int __init cci_init(void)
+{
+ if (cci_init_status != -EAGAIN)
+ return cci_init_status;
+
+ mutex_lock(&cci_probing);
+ if (cci_init_status == -EAGAIN)
+ cci_init_status = cci_probe();
+ mutex_unlock(&cci_probing);
+ return cci_init_status;
+}
+
+/*
+ * To sort out early init calls ordering a helper function is provided to
+ * check if the CCI driver has beed initialized. Function check if the driver
+ * has been initialized, if not it calls the init function that probes
+ * the driver and updates the return value.
+ */
+bool __init cci_probed(void)
+{
+ return cci_init() == 0;
+}
+EXPORT_SYMBOL_GPL(cci_probed);
+
+early_initcall(cci_init);
+core_initcall(cci_pmu_init);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ARM CCI support");
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 0357ac4..d0d9b21 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -42,7 +42,7 @@ config COMMON_CLK_WM831X
config COMMON_CLK_VERSATILE
bool "Clock driver for ARM Reference designs"
- depends on ARCH_INTEGRATOR || ARCH_REALVIEW || ARCH_VEXPRESS
+ depends on ARCH_INTEGRATOR || ARCH_REALVIEW || ARCH_VEXPRESS || ARM64
---help---
Supports clocking on ARM Reference designs:
- Integrator/AP and Integrator/CP
diff --git a/drivers/clk/versatile/Makefile b/drivers/clk/versatile/Makefile
index c16ca78..6e76bf8 100644
--- a/drivers/clk/versatile/Makefile
+++ b/drivers/clk/versatile/Makefile
@@ -4,4 +4,4 @@ obj-$(CONFIG_ARCH_INTEGRATOR) += clk-integrator.o
obj-$(CONFIG_INTEGRATOR_IMPD1) += clk-impd1.o
obj-$(CONFIG_ARCH_REALVIEW) += clk-realview.o
obj-$(CONFIG_ARCH_VEXPRESS) += clk-vexpress.o clk-sp810.o
-obj-$(CONFIG_VEXPRESS_CONFIG) += clk-vexpress-osc.o
+obj-$(CONFIG_VEXPRESS_CONFIG) += clk-vexpress-osc.o clk-vexpress-spc.o
diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c
index 256c8be..2dc8b41 100644
--- a/drivers/clk/versatile/clk-vexpress-osc.c
+++ b/drivers/clk/versatile/clk-vexpress-osc.c
@@ -107,7 +107,7 @@ void __init vexpress_osc_of_setup(struct device_node *node)
osc->func = vexpress_config_func_get_by_node(node);
if (!osc->func) {
pr_err("Failed to obtain config func for node '%s'!\n",
- node->name);
+ node->full_name);
goto error;
}
@@ -119,7 +119,7 @@ void __init vexpress_osc_of_setup(struct device_node *node)
of_property_read_string(node, "clock-output-names", &init.name);
if (!init.name)
- init.name = node->name;
+ init.name = node->full_name;
init.ops = &vexpress_osc_ops;
init.flags = CLK_IS_ROOT;
diff --git a/drivers/clk/versatile/clk-vexpress-spc.c b/drivers/clk/versatile/clk-vexpress-spc.c
new file mode 100644
index 0000000..bb566e2
--- /dev/null
+++ b/drivers/clk/versatile/clk-vexpress-spc.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2012 ARM Limited
+ * Copyright (C) 2012 Linaro
+ *
+ * Author: Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+/* SPC clock programming interface for Vexpress cpus */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vexpress.h>
+
+struct clk_spc {
+ struct clk_hw hw;
+ spinlock_t *lock;
+ int cluster;
+};
+
+#define to_clk_spc(spc) container_of(spc, struct clk_spc, hw)
+
+static unsigned long spc_recalc_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+{
+ struct clk_spc *spc = to_clk_spc(hw);
+ u32 freq;
+
+ if (vexpress_spc_get_performance(spc->cluster, &freq)) {
+ return -EIO;
+ pr_err("%s: Failed", __func__);
+ }
+
+ return freq * 1000;
+}
+
+static long spc_round_rate(struct clk_hw *hw, unsigned long drate,
+ unsigned long *parent_rate)
+{
+ return drate;
+}
+
+static int spc_set_rate(struct clk_hw *hw, unsigned long rate,
+ unsigned long parent_rate)
+{
+ struct clk_spc *spc = to_clk_spc(hw);
+
+ return vexpress_spc_set_performance(spc->cluster, rate / 1000);
+}
+
+static struct clk_ops clk_spc_ops = {
+ .recalc_rate = spc_recalc_rate,
+ .round_rate = spc_round_rate,
+ .set_rate = spc_set_rate,
+};
+
+struct clk *vexpress_clk_register_spc(const char *name, int cluster_id)
+{
+ struct clk_init_data init;
+ struct clk_spc *spc;
+ struct clk *clk;
+
+ if (!name) {
+ pr_err("Invalid name passed");
+ return ERR_PTR(-EINVAL);
+ }
+
+ spc = kzalloc(sizeof(*spc), GFP_KERNEL);
+ if (!spc) {
+ pr_err("could not allocate spc clk\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ spc->hw.init = &init;
+ spc->cluster = cluster_id;
+
+ init.name = name;
+ init.ops = &clk_spc_ops;
+ init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE;
+ init.num_parents = 0;
+
+ clk = clk_register(NULL, &spc->hw);
+ if (!IS_ERR_OR_NULL(clk))
+ return clk;
+
+ pr_err("clk register failed\n");
+ kfree(spc);
+
+ return NULL;
+}
+
+#if defined(CONFIG_OF)
+void __init vexpress_clk_of_register_spc(void)
+{
+ char name[14] = "cpu-cluster.X";
+ struct device_node *node = NULL;
+ struct clk *clk;
+ const u32 *val;
+ int cluster_id = 0, len;
+
+ if (!of_find_compatible_node(NULL, NULL, "arm,vexpress-spc")) {
+ pr_debug("%s: No SPC found, Exiting!!\n", __func__);
+ return;
+ }
+
+ while ((node = of_find_node_by_name(node, "cluster"))) {
+ val = of_get_property(node, "reg", &len);
+ if (val && len == 4)
+ cluster_id = be32_to_cpup(val);
+
+ name[12] = cluster_id + '0';
+ clk = vexpress_clk_register_spc(name, cluster_id);
+ if (IS_ERR(clk))
+ return;
+
+ pr_debug("Registered clock '%s'\n", name);
+ clk_register_clkdev(clk, NULL, name);
+ }
+}
+CLK_OF_DECLARE(spc, "arm,vexpress-spc", vexpress_clk_of_register_spc);
+#endif
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 534fcb8..a9c1324 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -201,7 +201,7 @@ source "drivers/cpufreq/Kconfig.x86"
endmenu
menu "ARM CPU frequency scaling drivers"
-depends on ARM
+depends on ARM || ARM64
source "drivers/cpufreq/Kconfig.arm"
endmenu
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 6e57543..8327444 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -4,7 +4,7 @@
config ARM_BIG_LITTLE_CPUFREQ
tristate "Generic ARM big LITTLE CPUfreq driver"
- depends on ARM_CPU_TOPOLOGY && PM_OPP && HAVE_CLK
+ depends on ARM_CPU_TOPOLOGY && PM_OPP && HAVE_CLK && BIG_LITTLE
help
This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
@@ -15,6 +15,14 @@ config ARM_DT_BL_CPUFREQ
This enables probing via DT for Generic CPUfreq driver for ARM
big.LITTLE platform. This gets frequency tables from DT.
+config ARM_VEXPRESS_BL_CPUFREQ
+ tristate "ARM Vexpress big LITTLE CPUfreq driver"
+ select ARM_BIG_LITTLE_CPUFREQ
+ depends on VEXPRESS_SPC
+ help
+ This enables the CPUfreq driver for ARM Vexpress big.LITTLE platform.
+ If in doubt, say N.
+
config ARM_EXYNOS_CPUFREQ
bool "SAMSUNG EXYNOS SoCs"
depends on ARCH_EXYNOS
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 315b923..1db9b49 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o
obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o
# big LITTLE per platform glues. Keep DT_BL_CPUFREQ as the last entry in all big
# LITTLE drivers, so that it is probed last.
+obj-$(CONFIG_ARM_VEXPRESS_BL_CPUFREQ) += vexpress_big_little.o
obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o
obj-$(CONFIG_ARCH_DAVINCI_DA850) += davinci-cpufreq.o
diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 5d7f53f..076f25a 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -24,27 +24,148 @@
#include <linux/cpufreq.h>
#include <linux/cpumask.h>
#include <linux/export.h>
+#include <linux/mutex.h>
#include <linux/of_platform.h>
#include <linux/opp.h>
#include <linux/slab.h>
#include <linux/topology.h>
#include <linux/types.h>
+#include <asm/bL_switcher.h>
#include "arm_big_little.h"
-/* Currently we support only two clusters */
-#define MAX_CLUSTERS 2
+#ifdef CONFIG_BL_SWITCHER
+bool bL_switching_enabled;
+#endif
+
+#define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq)
+#define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
static struct cpufreq_arm_bL_ops *arm_bL_ops;
static struct clk *clk[MAX_CLUSTERS];
-static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS];
-static atomic_t cluster_usage[MAX_CLUSTERS] = {ATOMIC_INIT(0), ATOMIC_INIT(0)};
+static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
+static atomic_t cluster_usage[MAX_CLUSTERS + 1] = {ATOMIC_INIT(0),
+ ATOMIC_INIT(0)};
+
+static unsigned int clk_big_min; /* (Big) clock frequencies */
+static unsigned int clk_little_max; /* Maximum clock frequency (Little) */
+
+static DEFINE_PER_CPU(unsigned int, physical_cluster);
+static DEFINE_PER_CPU(unsigned int, cpu_last_req_freq);
+
+static struct mutex cluster_lock[MAX_CLUSTERS];
+
+static unsigned int find_cluster_maxfreq(int cluster)
+{
+ int j;
+ u32 max_freq = 0, cpu_freq;
+
+ for_each_online_cpu(j) {
+ cpu_freq = per_cpu(cpu_last_req_freq, j);
+
+ if ((cluster == per_cpu(physical_cluster, j)) &&
+ (max_freq < cpu_freq))
+ max_freq = cpu_freq;
+ }
-static unsigned int bL_cpufreq_get(unsigned int cpu)
+ pr_debug("%s: cluster: %d, max freq: %d\n", __func__, cluster,
+ max_freq);
+
+ return max_freq;
+}
+
+static unsigned int clk_get_cpu_rate(unsigned int cpu)
{
- u32 cur_cluster = cpu_to_cluster(cpu);
+ u32 cur_cluster = per_cpu(physical_cluster, cpu);
+ u32 rate = clk_get_rate(clk[cur_cluster]) / 1000;
+
+ /* For switcher we use virtual A15 clock rates */
+ if (is_bL_switching_enabled())
+ rate = VIRT_FREQ(cur_cluster, rate);
+
+ pr_debug("%s: cpu: %d, cluster: %d, freq: %u\n", __func__, cpu,
+ cur_cluster, rate);
- return clk_get_rate(clk[cur_cluster]) / 1000;
+ return rate;
+}
+
+static unsigned int bL_cpufreq_get_rate(unsigned int cpu)
+{
+ if (is_bL_switching_enabled()) {
+ pr_debug("%s: freq: %d\n", __func__, per_cpu(cpu_last_req_freq,
+ cpu));
+
+ return per_cpu(cpu_last_req_freq, cpu);
+ } else {
+ return clk_get_cpu_rate(cpu);
+ }
+}
+
+static unsigned int
+bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
+{
+ u32 new_rate, prev_rate;
+ int ret;
+ bool bLs = is_bL_switching_enabled();
+
+ mutex_lock(&cluster_lock[new_cluster]);
+
+ if (bLs) {
+ prev_rate = per_cpu(cpu_last_req_freq, cpu);
+ per_cpu(cpu_last_req_freq, cpu) = rate;
+ per_cpu(physical_cluster, cpu) = new_cluster;
+
+ new_rate = find_cluster_maxfreq(new_cluster);
+ new_rate = ACTUAL_FREQ(new_cluster, new_rate);
+ } else {
+ new_rate = rate;
+ }
+
+ pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d, freq: %d\n",
+ __func__, cpu, old_cluster, new_cluster, new_rate);
+
+ ret = clk_set_rate(clk[new_cluster], new_rate * 1000);
+ if (WARN_ON(ret)) {
+ pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret,
+ new_cluster);
+ if (bLs) {
+ per_cpu(cpu_last_req_freq, cpu) = prev_rate;
+ per_cpu(physical_cluster, cpu) = old_cluster;
+ }
+
+ mutex_unlock(&cluster_lock[new_cluster]);
+
+ return ret;
+ }
+
+ mutex_unlock(&cluster_lock[new_cluster]);
+
+ /* Recalc freq for old cluster when switching clusters */
+ if (old_cluster != new_cluster) {
+ pr_debug("%s: cpu: %d, old cluster: %d, new cluster: %d\n",
+ __func__, cpu, old_cluster, new_cluster);
+
+ /* Switch cluster */
+ bL_switch_request(cpu, new_cluster);
+
+ mutex_lock(&cluster_lock[old_cluster]);
+
+ /* Set freq of old cluster if there are cpus left on it */
+ new_rate = find_cluster_maxfreq(old_cluster);
+ new_rate = ACTUAL_FREQ(old_cluster, new_rate);
+
+ if (new_rate) {
+ pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n",
+ __func__, old_cluster, new_rate);
+
+ if (clk_set_rate(clk[old_cluster], new_rate * 1000))
+ pr_err("%s: clk_set_rate failed: %d, old cluster: %d\n",
+ __func__, ret, old_cluster);
+ }
+ mutex_unlock(&cluster_lock[old_cluster]);
+ }
+
+ return 0;
}
/* Validate policy frequency range */
@@ -60,12 +181,14 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
unsigned int target_freq, unsigned int relation)
{
struct cpufreq_freqs freqs;
- u32 cpu = policy->cpu, freq_tab_idx, cur_cluster;
+ u32 cpu = policy->cpu, freq_tab_idx, cur_cluster, new_cluster,
+ actual_cluster;
int ret = 0;
- cur_cluster = cpu_to_cluster(policy->cpu);
+ cur_cluster = cpu_to_cluster(cpu);
+ new_cluster = actual_cluster = per_cpu(physical_cluster, cpu);
- freqs.old = bL_cpufreq_get(policy->cpu);
+ freqs.old = bL_cpufreq_get_rate(cpu);
/* Determine valid target frequency using freq_table */
cpufreq_frequency_table_target(policy, freq_table[cur_cluster],
@@ -79,13 +202,21 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
if (freqs.old == freqs.new)
return 0;
+ if (is_bL_switching_enabled()) {
+ if ((actual_cluster == A15_CLUSTER) &&
+ (freqs.new < clk_big_min)) {
+ new_cluster = A7_CLUSTER;
+ } else if ((actual_cluster == A7_CLUSTER) &&
+ (freqs.new > clk_little_max)) {
+ new_cluster = A15_CLUSTER;
+ }
+ }
+
cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
- ret = clk_set_rate(clk[cur_cluster], freqs.new * 1000);
- if (ret) {
- pr_err("clk_set_rate failed: %d\n", ret);
+ ret = bL_cpufreq_set_rate(cpu, actual_cluster, new_cluster, freqs.new);
+ if (ret)
return ret;
- }
policy->cur = freqs.new;
@@ -94,7 +225,73 @@ static int bL_cpufreq_set_target(struct cpufreq_policy *policy,
return ret;
}
-static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
+static inline u32 get_table_count(struct cpufreq_frequency_table *table)
+{
+ int count;
+
+ for (count = 0; table[count].frequency != CPUFREQ_TABLE_END; count++)
+ ;
+
+ return count;
+}
+
+/* get the minimum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_min(struct cpufreq_frequency_table *table)
+{
+ int i;
+ uint32_t min_freq = ~0;
+ for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+ if (table[i].frequency < min_freq)
+ min_freq = table[i].frequency;
+ return min_freq;
+}
+
+/* get the maximum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_max(struct cpufreq_frequency_table *table)
+{
+ int i;
+ uint32_t max_freq = 0;
+ for (i = 0; (table[i].frequency != CPUFREQ_TABLE_END); i++)
+ if (table[i].frequency > max_freq)
+ max_freq = table[i].frequency;
+ return max_freq;
+}
+
+static int merge_cluster_tables(void)
+{
+ int i, j, k = 0, count = 1;
+ struct cpufreq_frequency_table *table;
+
+ for (i = 0; i < MAX_CLUSTERS; i++)
+ count += get_table_count(freq_table[i]);
+
+ table = kzalloc(sizeof(*table) * count, GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ freq_table[MAX_CLUSTERS] = table;
+
+ /* Add in reverse order to get freqs in increasing order */
+ for (i = MAX_CLUSTERS - 1; i >= 0; i--) {
+ for (j = 0; freq_table[i][j].frequency != CPUFREQ_TABLE_END;
+ j++) {
+ table[k].frequency = VIRT_FREQ(i,
+ freq_table[i][j].frequency);
+ pr_debug("%s: index: %d, freq: %d\n", __func__, k,
+ table[k].frequency);
+ k++;
+ }
+ }
+
+ table[k].index = k;
+ table[k].frequency = CPUFREQ_TABLE_END;
+
+ pr_debug("%s: End, table: %p, count: %d\n", __func__, table, k);
+
+ return 0;
+}
+
+static void _put_cluster_clk_and_freq_table(struct device *cpu_dev)
{
u32 cluster = cpu_to_cluster(cpu_dev->id);
@@ -105,10 +302,35 @@ static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
}
}
-static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+static void put_cluster_clk_and_freq_table(struct device *cpu_dev)
{
u32 cluster = cpu_to_cluster(cpu_dev->id);
- char name[14] = "cpu-cluster.";
+ int i;
+
+ if (cluster < MAX_CLUSTERS)
+ return _put_cluster_clk_and_freq_table(cpu_dev);
+
+ if (atomic_dec_return(&cluster_usage[MAX_CLUSTERS]))
+ return;
+
+ for (i = 0; i < MAX_CLUSTERS; i++) {
+ struct device *cdev = get_cpu_device(i);
+ if (!cdev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__, i);
+ return;
+ }
+
+ _put_cluster_clk_and_freq_table(cdev);
+ }
+
+ /* free virtual table */
+ kfree(freq_table[MAX_CLUSTERS]);
+}
+
+static int _get_cluster_clk_and_freq_table(struct device *cpu_dev)
+{
+ u32 cluster = cpu_to_cluster(cpu_dev->id);
+ char name[14] = "cpu-cluster.X";
int ret;
if (atomic_inc_return(&cluster_usage[cluster]) != 1)
@@ -149,6 +371,62 @@ atomic_dec:
return ret;
}
+static int get_cluster_clk_and_freq_table(struct device *cpu_dev)
+{
+ u32 cluster = cpu_to_cluster(cpu_dev->id);
+ int i, ret;
+
+ if (cluster < MAX_CLUSTERS)
+ return _get_cluster_clk_and_freq_table(cpu_dev);
+
+ if (atomic_inc_return(&cluster_usage[MAX_CLUSTERS]) != 1)
+ return 0;
+
+ /*
+ * Get data for all clusters and fill virtual cluster with a merge of
+ * both
+ */
+ for (i = 0; i < MAX_CLUSTERS; i++) {
+ struct device *cdev = get_cpu_device(i);
+ if (!cdev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__, i);
+ return -ENODEV;
+ }
+
+ ret = _get_cluster_clk_and_freq_table(cdev);
+ if (ret)
+ goto put_clusters;
+ }
+
+ ret = merge_cluster_tables();
+ if (ret)
+ goto put_clusters;
+
+ /* Assuming 2 cluster, set clk_big_min and clk_little_max */
+ clk_big_min = get_table_min(freq_table[0]);
+ clk_little_max = VIRT_FREQ(1, get_table_max(freq_table[1]));
+
+ pr_debug("%s: cluster: %d, clk_big_min: %d, clk_little_max: %d\n",
+ __func__, cluster, clk_big_min, clk_little_max);
+
+ return 0;
+
+put_clusters:
+ while (i--) {
+ struct device *cdev = get_cpu_device(i);
+ if (!cdev) {
+ pr_err("%s: failed to get cpu%d device\n", __func__, i);
+ return -ENODEV;
+ }
+
+ _put_cluster_clk_and_freq_table(cdev);
+ }
+
+ atomic_dec(&cluster_usage[MAX_CLUSTERS]);
+
+ return ret;
+}
+
/* Per-CPU initialization */
static int bL_cpufreq_init(struct cpufreq_policy *policy)
{
@@ -177,37 +455,30 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
cpufreq_frequency_table_get_attr(freq_table[cur_cluster], policy->cpu);
+ if (cur_cluster < MAX_CLUSTERS) {
+ cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+
+ per_cpu(physical_cluster, policy->cpu) = cur_cluster;
+ } else {
+ /* Assumption: during init, we are always running on A15 */
+ per_cpu(physical_cluster, policy->cpu) = A15_CLUSTER;
+ }
+
if (arm_bL_ops->get_transition_latency)
policy->cpuinfo.transition_latency =
arm_bL_ops->get_transition_latency(cpu_dev);
else
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
- policy->cur = bL_cpufreq_get(policy->cpu);
+ policy->cur = clk_get_cpu_rate(policy->cpu);
- cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+ if (is_bL_switching_enabled())
+ per_cpu(cpu_last_req_freq, policy->cpu) = policy->cur;
dev_info(cpu_dev, "%s: CPU %d initialized\n", __func__, policy->cpu);
return 0;
}
-static int bL_cpufreq_exit(struct cpufreq_policy *policy)
-{
- struct device *cpu_dev;
-
- cpu_dev = get_cpu_device(policy->cpu);
- if (!cpu_dev) {
- pr_err("%s: failed to get cpu%d device\n", __func__,
- policy->cpu);
- return -ENODEV;
- }
-
- put_cluster_clk_and_freq_table(cpu_dev);
- dev_dbg(cpu_dev, "%s: Exited, cpu: %d\n", __func__, policy->cpu);
-
- return 0;
-}
-
/* Export freq_table to sysfs */
static struct freq_attr *bL_cpufreq_attr[] = {
&cpufreq_freq_attr_scaling_available_freqs,
@@ -219,16 +490,47 @@ static struct cpufreq_driver bL_cpufreq_driver = {
.flags = CPUFREQ_STICKY,
.verify = bL_cpufreq_verify_policy,
.target = bL_cpufreq_set_target,
- .get = bL_cpufreq_get,
+ .get = bL_cpufreq_get_rate,
.init = bL_cpufreq_init,
- .exit = bL_cpufreq_exit,
.have_governor_per_policy = true,
.attr = bL_cpufreq_attr,
};
+static int bL_cpufreq_switcher_notifier(struct notifier_block *nfb,
+ unsigned long action, void *_arg)
+{
+ pr_debug("%s: action: %ld\n", __func__, action);
+
+ switch (action) {
+ case BL_NOTIFY_PRE_ENABLE:
+ case BL_NOTIFY_PRE_DISABLE:
+ cpufreq_unregister_driver(&bL_cpufreq_driver);
+ break;
+
+ case BL_NOTIFY_POST_ENABLE:
+ set_switching_enabled(true);
+ cpufreq_register_driver(&bL_cpufreq_driver);
+ break;
+
+ case BL_NOTIFY_POST_DISABLE:
+ set_switching_enabled(false);
+ cpufreq_register_driver(&bL_cpufreq_driver);
+ break;
+
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block bL_switcher_notifier = {
+ .notifier_call = bL_cpufreq_switcher_notifier,
+};
+
int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops)
{
- int ret;
+ int ret, i;
if (arm_bL_ops) {
pr_debug("%s: Already registered: %s, exiting\n", __func__,
@@ -243,16 +545,29 @@ int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops)
arm_bL_ops = ops;
+ ret = bL_switcher_get_enabled();
+ set_switching_enabled(ret);
+
+ for (i = 0; i < MAX_CLUSTERS; i++)
+ mutex_init(&cluster_lock[i]);
+
ret = cpufreq_register_driver(&bL_cpufreq_driver);
if (ret) {
pr_info("%s: Failed registering platform driver: %s, err: %d\n",
__func__, ops->name, ret);
arm_bL_ops = NULL;
} else {
- pr_info("%s: Registered platform driver: %s\n", __func__,
- ops->name);
+ ret = bL_switcher_register_notifier(&bL_switcher_notifier);
+ if (ret) {
+ cpufreq_unregister_driver(&bL_cpufreq_driver);
+ arm_bL_ops = NULL;
+ } else {
+ pr_info("%s: Registered platform driver: %s\n",
+ __func__, ops->name);
+ }
}
+ bL_switcher_put_enabled();
return ret;
}
EXPORT_SYMBOL_GPL(bL_cpufreq_register);
@@ -265,9 +580,31 @@ void bL_cpufreq_unregister(struct cpufreq_arm_bL_ops *ops)
return;
}
+ bL_switcher_get_enabled();
+ bL_switcher_unregister_notifier(&bL_switcher_notifier);
cpufreq_unregister_driver(&bL_cpufreq_driver);
+ bL_switcher_put_enabled();
pr_info("%s: Un-registered platform driver: %s\n", __func__,
arm_bL_ops->name);
+
+ /* For saving table get/put on every cpu in/out */
+ if (is_bL_switching_enabled()) {
+ put_cluster_clk_and_freq_table(get_cpu_device(0));
+ } else {
+ int i;
+
+ for (i = 0; i < MAX_CLUSTERS; i++) {
+ struct device *cdev = get_cpu_device(i);
+ if (!cdev) {
+ pr_err("%s: failed to get cpu%d device\n",
+ __func__, i);
+ return;
+ }
+
+ put_cluster_clk_and_freq_table(cdev);
+ }
+ }
+
arm_bL_ops = NULL;
}
EXPORT_SYMBOL_GPL(bL_cpufreq_unregister);
diff --git a/drivers/cpufreq/arm_big_little.h b/drivers/cpufreq/arm_big_little.h
index 79b2ce1..4f5a03d 100644
--- a/drivers/cpufreq/arm_big_little.h
+++ b/drivers/cpufreq/arm_big_little.h
@@ -23,6 +23,20 @@
#include <linux/device.h>
#include <linux/types.h>
+/* Currently we support only two clusters */
+#define A15_CLUSTER 0
+#define A7_CLUSTER 1
+#define MAX_CLUSTERS 2
+
+#ifdef CONFIG_BL_SWITCHER
+extern bool bL_switching_enabled;
+#define is_bL_switching_enabled() bL_switching_enabled
+#define set_switching_enabled(x) (bL_switching_enabled = (x))
+#else
+#define is_bL_switching_enabled() false
+#define set_switching_enabled(x) do { } while (0)
+#endif
+
struct cpufreq_arm_bL_ops {
char name[CPUFREQ_NAME_LEN];
int (*get_transition_latency)(struct device *cpu_dev);
@@ -36,7 +50,8 @@ struct cpufreq_arm_bL_ops {
static inline int cpu_to_cluster(int cpu)
{
- return topology_physical_package_id(cpu);
+ return is_bL_switching_enabled() ? MAX_CLUSTERS:
+ topology_physical_package_id(cpu);
}
int bL_cpufreq_register(struct cpufreq_arm_bL_ops *ops);
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index bfd6273..66733f1 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -21,6 +21,9 @@
#include <linux/spinlock.h>
#include <linux/notifier.h>
#include <asm/cputime.h>
+#ifdef CONFIG_BL_SWITCHER
+#include <asm/bL_switcher.h>
+#endif
static spinlock_t cpufreq_stats_lock;
@@ -378,7 +381,7 @@ static struct notifier_block notifier_trans_block = {
.notifier_call = cpufreq_stat_notifier_trans
};
-static int __init cpufreq_stats_init(void)
+static int cpufreq_stats_setup(void)
{
int ret;
unsigned int cpu;
@@ -406,7 +409,8 @@ static int __init cpufreq_stats_init(void)
return 0;
}
-static void __exit cpufreq_stats_exit(void)
+
+static void cpufreq_stats_cleanup(void)
{
unsigned int cpu;
@@ -421,6 +425,54 @@ static void __exit cpufreq_stats_exit(void)
}
}
+#ifdef CONFIG_BL_SWITCHER
+static int cpufreq_stats_switcher_notifier(struct notifier_block *nfb,
+ unsigned long action, void *_arg)
+{
+ switch (action) {
+ case BL_NOTIFY_PRE_ENABLE:
+ case BL_NOTIFY_PRE_DISABLE:
+ cpufreq_stats_cleanup();
+ break;
+
+ case BL_NOTIFY_POST_ENABLE:
+ case BL_NOTIFY_POST_DISABLE:
+ cpufreq_stats_setup();
+ break;
+
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block switcher_notifier = {
+ .notifier_call = cpufreq_stats_switcher_notifier,
+};
+#endif
+
+static int __init cpufreq_stats_init(void)
+{
+ int ret;
+ spin_lock_init(&cpufreq_stats_lock);
+
+ ret = cpufreq_stats_setup();
+#ifdef CONFIG_BL_SWITCHER
+ if (!ret)
+ bL_switcher_register_notifier(&switcher_notifier);
+#endif
+ return ret;
+}
+
+static void __exit cpufreq_stats_exit(void)
+{
+#ifdef CONFIG_BL_SWITCHER
+ bL_switcher_unregister_notifier(&switcher_notifier);
+#endif
+ cpufreq_stats_cleanup();
+}
+
MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>");
MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats "
"through sysfs filesystem");
diff --git a/drivers/cpufreq/vexpress_big_little.c b/drivers/cpufreq/vexpress_big_little.c
new file mode 100644
index 0000000..1abb883
--- /dev/null
+++ b/drivers/cpufreq/vexpress_big_little.c
@@ -0,0 +1,86 @@
+/*
+ * Vexpress big.LITTLE CPUFreq Interface driver
+ *
+ * It provides necessary ops to arm_big_little cpufreq driver and gets
+ * Frequency information from Device Tree. Freq table in DT must be in KHz.
+ *
+ * Copyright (C) 2013 Linaro.
+ * Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/cpufreq.h>
+#include <linux/export.h>
+#include <linux/opp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/vexpress.h>
+#include "arm_big_little.h"
+
+static int vexpress_init_opp_table(struct device *cpu_dev)
+{
+ int i = -1, count, cluster = cpu_to_cluster(cpu_dev->id);
+ u32 *table;
+ int ret;
+
+ count = vexpress_spc_get_freq_table(cluster, &table);
+ if (!table || !count) {
+ pr_err("SPC controller returned invalid freq table");
+ return -EINVAL;
+ }
+
+ while (++i < count) {
+ /* FIXME: Voltage value */
+ ret = opp_add(cpu_dev, table[i] * 1000, 900000);
+ if (ret) {
+ dev_warn(cpu_dev, "%s: Failed to add OPP %d, err: %d\n",
+ __func__, table[i] * 1000, ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int vexpress_get_transition_latency(struct device *cpu_dev)
+{
+ /* 1 ms */
+ return 1000000;
+}
+
+static struct cpufreq_arm_bL_ops vexpress_bL_ops = {
+ .name = "vexpress-bL",
+ .get_transition_latency = vexpress_get_transition_latency,
+ .init_opp_table = vexpress_init_opp_table,
+};
+
+static int vexpress_bL_init(void)
+{
+ if (!vexpress_spc_check_loaded()) {
+ pr_info("%s: No SPC found\n", __func__);
+ return -ENOENT;
+ }
+
+ return bL_cpufreq_register(&vexpress_bL_ops);
+}
+module_init(vexpress_bL_init);
+
+static void vexpress_bL_exit(void)
+{
+ return bL_cpufreq_unregister(&vexpress_bL_ops);
+}
+module_exit(vexpress_bL_exit);
+
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>");
+MODULE_DESCRIPTION("ARM Vexpress big LITTLE cpufreq driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 0d8bd55..7d8256a 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -4,6 +4,6 @@
obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
-
+obj-$(CONFIG_BIG_LITTLE) += arm_big_little.o
obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o
obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o
diff --git a/drivers/cpuidle/arm_big_little.c b/drivers/cpuidle/arm_big_little.c
new file mode 100644
index 0000000..e537889
--- /dev/null
+++ b/drivers/cpuidle/arm_big_little.c
@@ -0,0 +1,183 @@
+/*
+ * big.LITTLE CPU idle driver.
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/arm-cci.h>
+#include <linux/bitmap.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/clockchips.h>
+#include <linux/debugfs.h>
+#include <linux/hrtimer.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/tick.h>
+#include <linux/vexpress.h>
+#include <asm/mcpm.h>
+#include <asm/cpuidle.h>
+#include <asm/cputype.h>
+#include <asm/idmap.h>
+#include <asm/proc-fns.h>
+#include <asm/suspend.h>
+#include <linux/of.h>
+
+static int bl_cpuidle_simple_enter(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ ktime_t time_start, time_end;
+ s64 diff;
+
+ time_start = ktime_get();
+
+ cpu_do_idle();
+
+ time_end = ktime_get();
+
+ local_irq_enable();
+
+ diff = ktime_to_us(ktime_sub(time_end, time_start));
+ if (diff > INT_MAX)
+ diff = INT_MAX;
+
+ dev->last_residency = (int) diff;
+
+ return index;
+}
+
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx);
+
+static struct cpuidle_state bl_cpuidle_set[] __initdata = {
+ [0] = {
+ .enter = bl_cpuidle_simple_enter,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .power_usage = UINT_MAX,
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .name = "WFI",
+ .desc = "ARM WFI",
+ },
+ [1] = {
+ .enter = bl_enter_powerdown,
+ .exit_latency = 300,
+ .target_residency = 1000,
+ .flags = CPUIDLE_FLAG_TIME_VALID,
+ .name = "C1",
+ .desc = "ARM power down",
+ },
+};
+
+struct cpuidle_driver bl_idle_driver = {
+ .name = "bl_idle",
+ .owner = THIS_MODULE,
+ .safe_state_index = 0
+};
+
+static DEFINE_PER_CPU(struct cpuidle_device, bl_idle_dev);
+
+static int notrace bl_powerdown_finisher(unsigned long arg)
+{
+ unsigned int mpidr = read_cpuid_mpidr();
+ unsigned int cluster = (mpidr >> 8) & 0xf;
+ unsigned int cpu = mpidr & 0xf;
+
+ mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+ mcpm_cpu_suspend(0); /* 0 should be replaced with better value here */
+ return 1;
+}
+
+/*
+ * bl_enter_powerdown - Programs CPU to enter the specified state
+ * @dev: cpuidle device
+ * @drv: The target state to be programmed
+ * @idx: state index
+ *
+ * Called from the CPUidle framework to program the device to the
+ * specified target state selected by the governor.
+ */
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
+{
+ struct timespec ts_preidle, ts_postidle, ts_idle;
+ int ret;
+
+ /* Used to keep track of the total time in idle */
+ getnstimeofday(&ts_preidle);
+
+ BUG_ON(!irqs_disabled());
+
+ cpu_pm_enter();
+
+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+
+ ret = cpu_suspend((unsigned long) dev, bl_powerdown_finisher);
+ if (ret)
+ BUG();
+
+ mcpm_cpu_powered_up();
+
+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+
+ cpu_pm_exit();
+
+ getnstimeofday(&ts_postidle);
+ local_irq_enable();
+ ts_idle = timespec_sub(ts_postidle, ts_preidle);
+
+ dev->last_residency = ts_idle.tv_nsec / NSEC_PER_USEC +
+ ts_idle.tv_sec * USEC_PER_SEC;
+ return idx;
+}
+
+/*
+ * bl_idle_init
+ *
+ * Registers the bl specific cpuidle driver with the cpuidle
+ * framework with the valid set of states.
+ */
+int __init bl_idle_init(void)
+{
+ struct cpuidle_device *dev;
+ int i, cpu_id;
+ struct cpuidle_driver *drv = &bl_idle_driver;
+
+ if (!of_find_compatible_node(NULL, NULL, "arm,generic")) {
+ pr_info("%s: No compatible node found\n", __func__);
+ return -ENODEV;
+ }
+
+ drv->state_count = (sizeof(bl_cpuidle_set) /
+ sizeof(struct cpuidle_state));
+
+ for (i = 0; i < drv->state_count; i++) {
+ memcpy(&drv->states[i], &bl_cpuidle_set[i],
+ sizeof(struct cpuidle_state));
+ }
+
+ cpuidle_register_driver(drv);
+
+ for_each_cpu(cpu_id, cpu_online_mask) {
+ pr_err("CPUidle for CPU%d registered\n", cpu_id);
+ dev = &per_cpu(bl_idle_dev, cpu_id);
+ dev->cpu = cpu_id;
+
+ dev->state_count = drv->state_count;
+
+ if (cpuidle_register_device(dev)) {
+ printk(KERN_ERR "%s: Cpuidle register device failed\n",
+ __func__);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+device_initcall(bl_idle_init);
diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c
index 2233791..0e6e408 100644
--- a/drivers/cpuidle/cpuidle-calxeda.c
+++ b/drivers/cpuidle/cpuidle-calxeda.c
@@ -37,20 +37,6 @@
extern void highbank_set_cpu_jump(int cpu, void *jump_addr);
extern void *scu_base_addr;
-static inline unsigned int get_auxcr(void)
-{
- unsigned int val;
- asm("mrc p15, 0, %0, c1, c0, 1 @ get AUXCR" : "=r" (val) : : "cc");
- return val;
-}
-
-static inline void set_auxcr(unsigned int val)
-{
- asm volatile("mcr p15, 0, %0, c1, c0, 1 @ set AUXCR"
- : : "r" (val) : "cc");
- isb();
-}
-
static noinline void calxeda_idle_restore(void)
{
set_cr(get_cr() | CR_C);
diff --git a/drivers/extcon/extcon-adc-jack.c b/drivers/extcon/extcon-adc-jack.c
index d0233cd..5985807 100644
--- a/drivers/extcon/extcon-adc-jack.c
+++ b/drivers/extcon/extcon-adc-jack.c
@@ -87,7 +87,8 @@ static irqreturn_t adc_jack_irq_thread(int irq, void *_data)
{
struct adc_jack_data *data = _data;
- schedule_delayed_work(&data->handler, data->handling_delay);
+ queue_delayed_work(system_power_efficient_wq,
+ &data->handler, data->handling_delay);
return IRQ_HANDLED;
}
diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index 02bec32..f874c30 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -56,7 +56,7 @@ static irqreturn_t gpio_irq_handler(int irq, void *dev_id)
{
struct gpio_extcon_data *extcon_data = dev_id;
- schedule_delayed_work(&extcon_data->work,
+ queue_delayed_work(system_power_efficient_wq, &extcon_data->work,
extcon_data->debounce_jiffies);
return IRQ_HANDLED;
}
diff --git a/drivers/gator/Kconfig b/drivers/gator/Kconfig
new file mode 100644
index 0000000..7ea0fcc
--- /dev/null
+++ b/drivers/gator/Kconfig
@@ -0,0 +1,33 @@
+config GATOR
+ tristate "Gator module for ARM's Streamline Performance Analyzer"
+ default m if (ARM || ARM64)
+ depends on PROFILING
+ depends on HIGH_RES_TIMERS
+ depends on LOCAL_TIMERS || !(ARM && SMP)
+ select TRACING
+
+config GATOR_WITH_MALI_SUPPORT
+ bool
+
+choice
+ prompt "Enable Mali GPU support in Gator"
+ depends on GATOR
+ optional
+
+config GATOR_MALI_400MP
+ bool "Mali-400MP"
+ select GATOR_WITH_MALI_SUPPORT
+
+config GATOR_MALI_T6XX
+ bool "Mali-T604 or Mali-T658"
+ select GATOR_WITH_MALI_SUPPORT
+
+endchoice
+
+config GATOR_MALI_PATH
+ string "Path to Mali driver"
+ depends on GATOR_WITH_MALI_SUPPORT
+ default "drivers/gpu/arm/mali400mp"
+ help
+ The gator code adds this to its include path so it can get the Mali
+ trace headers with: #include "linux/mali_linux_trace.h"
diff --git a/drivers/gator/LICENSE b/drivers/gator/LICENSE
new file mode 100644
index 0000000..d159169
--- /dev/null
+++ b/drivers/gator/LICENSE
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/drivers/gator/Makefile b/drivers/gator/Makefile
new file mode 100644
index 0000000..3dc9d05
--- /dev/null
+++ b/drivers/gator/Makefile
@@ -0,0 +1,76 @@
+ifneq ($(KERNELRELEASE),)
+
+# Uncomment the following line to enable kernel stack unwinding within gator, or update gator_backtrace.c
+# EXTRA_CFLAGS += -DGATOR_KERNEL_STACK_UNWINDING
+
+CONFIG_GATOR ?= m
+obj-$(CONFIG_GATOR) := gator.o
+
+gator-y := gator_main.o \
+ gator_events_irq.o \
+ gator_events_sched.o \
+ gator_events_net.o \
+ gator_events_block.o \
+ gator_events_meminfo.o \
+ gator_events_perf_pmu.o \
+ gator_events_mmapped.o \
+
+# Convert the old GATOR_WITH_MALI_SUPPORT to the new kernel flags
+ifneq ($(GATOR_WITH_MALI_SUPPORT),)
+ CONFIG_GATOR_WITH_MALI_SUPPORT := y
+ ifeq ($(GATOR_WITH_MALI_SUPPORT),MALI_T6xx)
+ CONFIG_GATOR_MALI_4XXMP := n
+ CONFIG_GATOR_MALI_T6XX := y
+ else
+ CONFIG_GATOR_MALI_4XXMP := y
+ CONFIG_GATOR_MALI_T6XX := n
+ endif
+ EXTRA_CFLAGS += -DMALI_SUPPORT=$(GATOR_WITH_MALI_SUPPORT)
+ ifneq ($(GATOR_MALI_INTERFACE_STYLE),)
+ EXTRA_CFLAGS += -DGATOR_MALI_INTERFACE_STYLE=$(GATOR_MALI_INTERFACE_STYLE)
+ endif
+endif
+
+ifeq ($(CONFIG_GATOR_WITH_MALI_SUPPORT),y)
+ ifeq ($(CONFIG_GATOR_MALI_T6XX),y)
+ gator-y += gator_events_mali_t6xx.o \
+ gator_events_mali_t6xx_hw.o
+ include $(src)/mali_t6xx.mk
+ else
+ gator-y += gator_events_mali_4xx.o
+ endif
+ gator-y += gator_events_mali_common.o
+
+ ifneq ($(CONFIG_GATOR_MALI_PATH),)
+ ccflags-y += -I$(CONFIG_GATOR_MALI_PATH)
+ endif
+ ccflags-$(CONFIG_GATOR_MALI_4XXMP) += -DMALI_SUPPORT=MALI_4xx
+ ccflags-$(CONFIG_GATOR_MALI_T6XX) += -DMALI_SUPPORT=MALI_T6xx
+endif
+
+# GATOR_TEST controls whether to include (=1) or exclude (=0) test code.
+GATOR_TEST ?= 0
+EXTRA_CFLAGS += -DGATOR_TEST=$(GATOR_TEST)
+
+gator-$(CONFIG_ARM) += gator_events_armv6.o \
+ gator_events_armv7.o \
+ gator_events_ccn-504.o \
+ gator_events_l2c-310.o \
+ gator_events_scorpion.o
+
+gator-$(CONFIG_ARM64) += gator_events_ccn-504.o
+
+else
+
+all:
+ @echo
+ @echo "usage:"
+ @echo " make -C <kernel_build_dir> M=\`pwd\` ARCH=arm CROSS_COMPILE=<...> modules"
+ @echo
+ $(error)
+
+clean:
+ rm -f *.o .*.cmd modules.order Module.symvers gator.ko gator.mod.c
+ rm -rf .tmp_versions
+
+endif
diff --git a/drivers/gator/gator.h b/drivers/gator/gator.h
new file mode 100644
index 0000000..d8981ed
--- /dev/null
+++ b/drivers/gator/gator.h
@@ -0,0 +1,142 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef GATOR_H_
+#define GATOR_H_
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/list.h>
+
+#define GATOR_PERF_SUPPORT LINUX_VERSION_CODE >= KERNEL_VERSION(3, 0, 0)
+#define GATOR_PERF_PMU_SUPPORT GATOR_PERF_SUPPORT && defined(CONFIG_PERF_EVENTS) && (!(defined(__arm__) || defined(__aarch64__)) || defined(CONFIG_HW_PERF_EVENTS))
+#define GATOR_NO_PERF_SUPPORT (!(GATOR_PERF_SUPPORT))
+#define GATOR_CPU_FREQ_SUPPORT (LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)) && defined(CONFIG_CPU_FREQ)
+#define GATOR_IKS_SUPPORT defined(CONFIG_BL_SWITCHER)
+
+// cpu ids
+#define ARM1136 0xb36
+#define ARM1156 0xb56
+#define ARM1176 0xb76
+#define ARM11MPCORE 0xb02
+#define CORTEX_A5 0xc05
+#define CORTEX_A7 0xc07
+#define CORTEX_A8 0xc08
+#define CORTEX_A9 0xc09
+#define CORTEX_A12 0xc0d
+#define CORTEX_A15 0xc0f
+#define SCORPION 0x00f
+#define SCORPIONMP 0x02d
+#define KRAITSIM 0x049
+#define KRAIT 0x04d
+#define KRAIT_S4_PRO 0x06f
+#define CORTEX_A53 0xd03
+#define CORTEX_A57 0xd07
+#define AARCH64 0xd0f
+#define OTHER 0xfff
+
+#define MAXSIZE_CORE_NAME 32
+
+struct gator_cpu {
+ const int cpuid;
+ // Human readable name
+ const char core_name[MAXSIZE_CORE_NAME];
+ // Perf PMU name
+ const char * const pmu_name;
+ // gatorfs event name
+ const char * const pmnc_name;
+ // compatible from Documentation/devicetree/bindings/arm/cpus.txt
+ const char * const dt_name;
+ const int pmnc_counters;
+};
+
+const struct gator_cpu *gator_find_cpu_by_cpuid(const u32 cpuid);
+const struct gator_cpu *gator_find_cpu_by_pmu_name(const char *const name);
+
+/******************************************************************************
+ * Filesystem
+ ******************************************************************************/
+int gatorfs_create_file_perm(struct super_block *sb, struct dentry *root,
+ char const *name,
+ const struct file_operations *fops, int perm);
+
+struct dentry *gatorfs_mkdir(struct super_block *sb, struct dentry *root,
+ char const *name);
+
+int gatorfs_create_ulong(struct super_block *sb, struct dentry *root,
+ char const *name, unsigned long *val);
+
+int gatorfs_create_ro_ulong(struct super_block *sb, struct dentry *root,
+ char const *name, unsigned long *val);
+
+void gator_op_create_files(struct super_block *sb, struct dentry *root);
+
+/******************************************************************************
+ * Tracepoints
+ ******************************************************************************/
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
+# error Kernels prior to 2.6.32 not supported
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 35)
+# define GATOR_DEFINE_PROBE(probe_name, proto) \
+ static void probe_##probe_name(PARAMS(proto))
+# define GATOR_REGISTER_TRACE(probe_name) \
+ register_trace_##probe_name(probe_##probe_name)
+# define GATOR_UNREGISTER_TRACE(probe_name) \
+ unregister_trace_##probe_name(probe_##probe_name)
+#else
+# define GATOR_DEFINE_PROBE(probe_name, proto) \
+ static void probe_##probe_name(void *data, PARAMS(proto))
+# define GATOR_REGISTER_TRACE(probe_name) \
+ register_trace_##probe_name(probe_##probe_name, NULL)
+# define GATOR_UNREGISTER_TRACE(probe_name) \
+ unregister_trace_##probe_name(probe_##probe_name, NULL)
+#endif
+
+/******************************************************************************
+ * Events
+ ******************************************************************************/
+struct gator_interface {
+ void (*shutdown)(void); // Complementary function to init
+ int (*create_files)(struct super_block *sb, struct dentry *root);
+ int (*start)(void);
+ void (*stop)(void); // Complementary function to start
+ int (*online)(int **buffer, bool migrate);
+ int (*offline)(int **buffer, bool migrate);
+ void (*online_dispatch)(int cpu, bool migrate); // called in process context but may not be running on core 'cpu'
+ void (*offline_dispatch)(int cpu, bool migrate); // called in process context but may not be running on core 'cpu'
+ int (*read)(int **buffer);
+ int (*read64)(long long **buffer);
+ int (*read_proc)(long long **buffer, struct task_struct *);
+ struct list_head list;
+};
+
+int gator_events_install(struct gator_interface *interface);
+int gator_events_get_key(void);
+u32 gator_cpuid(void);
+
+void gator_backtrace_handler(struct pt_regs *const regs);
+
+#if !GATOR_IKS_SUPPORT
+
+#define get_physical_cpu() smp_processor_id()
+#define lcpu_to_pcpu(lcpu) lcpu
+#define pcpu_to_lcpu(pcpu) pcpu
+
+#else
+
+#define get_physical_cpu() lcpu_to_pcpu(get_logical_cpu())
+int lcpu_to_pcpu(const int lcpu);
+int pcpu_to_lcpu(const int pcpu);
+
+#endif
+
+#define get_logical_cpu() smp_processor_id()
+#define on_primary_core() (get_logical_cpu() == 0)
+
+#endif // GATOR_H_
diff --git a/drivers/gator/gator_annotate.c b/drivers/gator/gator_annotate.c
new file mode 100644
index 0000000..5b9399b
--- /dev/null
+++ b/drivers/gator/gator_annotate.c
@@ -0,0 +1,186 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <asm/current.h>
+#include <linux/spinlock.h>
+
+static DEFINE_SPINLOCK(annotate_lock);
+static bool collect_annotations = false;
+
+static int annotate_copy(struct file *file, char const __user *buf, size_t count)
+{
+ int cpu = 0;
+ int write = per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF];
+
+ if (file == NULL) {
+ // copy from kernel
+ memcpy(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count);
+ } else {
+ // copy from user space
+ if (copy_from_user(&per_cpu(gator_buffer, cpu)[ANNOTATE_BUF][write], buf, count) != 0)
+ return -1;
+ }
+ per_cpu(gator_buffer_write, cpu)[ANNOTATE_BUF] = (write + count) & gator_buffer_mask[ANNOTATE_BUF];
+
+ return 0;
+}
+
+static ssize_t annotate_write(struct file *file, char const __user *buf, size_t count_orig, loff_t *offset)
+{
+ int pid, cpu, header_size, available, contiguous, length1, length2, size, count = count_orig & 0x7fffffff;
+ bool interrupt_context;
+
+ if (*offset) {
+ return -EINVAL;
+ }
+
+ interrupt_context = in_interrupt();
+ // Annotations are not supported in interrupt context, but may work if you comment out the the next four lines of code.
+ // By doing so, annotations in interrupt context can result in deadlocks and lost data.
+ if (interrupt_context) {
+ printk(KERN_WARNING "gator: Annotations are not supported in interrupt context. Edit gator_annotate.c in the gator driver to enable annotations in interrupt context.\n");
+ return -EINVAL;
+ }
+
+ retry:
+ // synchronize between cores and with collect_annotations
+ spin_lock(&annotate_lock);
+
+ if (!collect_annotations) {
+ // Not collecting annotations, tell the caller everything was written
+ size = count_orig;
+ goto annotate_write_out;
+ }
+
+ // Annotation only uses a single per-cpu buffer as the data must be in order to the engine
+ cpu = 0;
+
+ if (current == NULL) {
+ pid = 0;
+ } else {
+ pid = current->pid;
+ }
+
+ // determine total size of the payload
+ header_size = MAXSIZE_PACK32 * 3 + MAXSIZE_PACK64;
+ available = buffer_bytes_available(cpu, ANNOTATE_BUF) - header_size;
+ size = count < available ? count : available;
+
+ if (size <= 0) {
+ // Buffer is full, wait until space is available
+ spin_unlock(&annotate_lock);
+
+ // Drop the annotation as blocking is not allowed in interrupt context
+ if (interrupt_context) {
+ return -EINVAL;
+ }
+
+ wait_event_interruptible(gator_annotate_wait, buffer_bytes_available(cpu, ANNOTATE_BUF) > header_size || !collect_annotations);
+
+ // Check to see if a signal is pending
+ if (signal_pending(current)) {
+ return -EINTR;
+ }
+
+ goto retry;
+ }
+
+ // synchronize shared variables annotateBuf and annotatePos
+ if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF]) {
+ u64 time = gator_get_time();
+ gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu());
+ gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid);
+ gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, time);
+ gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, size);
+
+ // determine the sizes to capture, length1 + length2 will equal size
+ contiguous = contiguous_space_available(cpu, ANNOTATE_BUF);
+ if (size < contiguous) {
+ length1 = size;
+ length2 = 0;
+ } else {
+ length1 = contiguous;
+ length2 = size - contiguous;
+ }
+
+ if (annotate_copy(file, buf, length1) != 0) {
+ size = -EINVAL;
+ goto annotate_write_out;
+ }
+
+ if (length2 > 0 && annotate_copy(file, &buf[length1], length2) != 0) {
+ size = -EINVAL;
+ goto annotate_write_out;
+ }
+
+ // Check and commit; commit is set to occur once buffer is 3/4 full
+ buffer_check(cpu, ANNOTATE_BUF, time);
+ }
+
+annotate_write_out:
+ spin_unlock(&annotate_lock);
+
+ // return the number of bytes written
+ return size;
+}
+
+#include "gator_annotate_kernel.c"
+
+static int annotate_release(struct inode *inode, struct file *file)
+{
+ int cpu = 0;
+
+ // synchronize between cores
+ spin_lock(&annotate_lock);
+
+ if (per_cpu(gator_buffer, cpu)[ANNOTATE_BUF] && buffer_check_space(cpu, ANNOTATE_BUF, MAXSIZE_PACK64 + 3 * MAXSIZE_PACK32)) {
+ uint32_t pid = current->pid;
+ gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, get_physical_cpu());
+ gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, pid);
+ gator_buffer_write_packed_int64(cpu, ANNOTATE_BUF, 0); // time
+ gator_buffer_write_packed_int(cpu, ANNOTATE_BUF, 0); // size
+ }
+
+ // Check and commit; commit is set to occur once buffer is 3/4 full
+ buffer_check(cpu, ANNOTATE_BUF, gator_get_time());
+
+ spin_unlock(&annotate_lock);
+
+ return 0;
+}
+
+static const struct file_operations annotate_fops = {
+ .write = annotate_write,
+ .release = annotate_release
+};
+
+static int gator_annotate_create_files(struct super_block *sb, struct dentry *root)
+{
+ return gatorfs_create_file_perm(sb, root, "annotate", &annotate_fops, 0666);
+}
+
+static int gator_annotate_start(void)
+{
+ collect_annotations = true;
+ return 0;
+}
+
+static void gator_annotate_stop(void)
+{
+ // the spinlock here will ensure that when this function exits, we are not in the middle of an annotation
+ spin_lock(&annotate_lock);
+ collect_annotations = false;
+ wake_up(&gator_annotate_wait);
+ spin_unlock(&annotate_lock);
+}
diff --git a/drivers/gator/gator_annotate_kernel.c b/drivers/gator/gator_annotate_kernel.c
new file mode 100644
index 0000000..a406e48
--- /dev/null
+++ b/drivers/gator/gator_annotate_kernel.c
@@ -0,0 +1,200 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define ESCAPE_CODE 0x1c
+#define STRING_ANNOTATION 0x06
+#define NAME_CHANNEL_ANNOTATION 0x07
+#define NAME_GROUP_ANNOTATION 0x08
+#define VISUAL_ANNOTATION 0x04
+#define MARKER_ANNOTATION 0x05
+
+static void kannotate_write(const char *ptr, unsigned int size)
+{
+ int retval;
+ int pos = 0;
+ loff_t offset = 0;
+ while (pos < size) {
+ retval = annotate_write(NULL, &ptr[pos], size - pos, &offset);
+ if (retval < 0) {
+ printk(KERN_WARNING "gator: kannotate_write failed with return value %d\n", retval);
+ return;
+ }
+ pos += retval;
+ }
+}
+
+static void marshal_u16(char *buf, u16 val) {
+ buf[0] = val & 0xff;
+ buf[1] = (val >> 8) & 0xff;
+}
+
+static void marshal_u32(char *buf, u32 val) {
+ buf[0] = val & 0xff;
+ buf[1] = (val >> 8) & 0xff;
+ buf[2] = (val >> 16) & 0xff;
+ buf[3] = (val >> 24) & 0xff;
+}
+
+void gator_annotate_channel(int channel, const char *str)
+{
+ const u16 str_size = strlen(str) & 0xffff;
+ char header[8];
+ header[0] = ESCAPE_CODE;
+ header[1] = STRING_ANNOTATION;
+ marshal_u32(header + 2, channel);
+ marshal_u16(header + 6, str_size);
+ kannotate_write(header, sizeof(header));
+ kannotate_write(str, str_size);
+}
+
+EXPORT_SYMBOL(gator_annotate_channel);
+
+void gator_annotate(const char *str)
+{
+ gator_annotate_channel(0, str);
+}
+
+EXPORT_SYMBOL(gator_annotate);
+
+void gator_annotate_channel_color(int channel, int color, const char *str)
+{
+ const u16 str_size = (strlen(str) + 4) & 0xffff;
+ char header[12];
+ header[0] = ESCAPE_CODE;
+ header[1] = STRING_ANNOTATION;
+ marshal_u32(header + 2, channel);
+ marshal_u16(header + 6, str_size);
+ marshal_u32(header + 8, color);
+ kannotate_write(header, sizeof(header));
+ kannotate_write(str, str_size - 4);
+}
+
+EXPORT_SYMBOL(gator_annotate_channel_color);
+
+void gator_annotate_color(int color, const char *str)
+{
+ gator_annotate_channel_color(0, color, str);
+}
+
+EXPORT_SYMBOL(gator_annotate_color);
+
+void gator_annotate_channel_end(int channel)
+{
+ char header[8];
+ header[0] = ESCAPE_CODE;
+ header[1] = STRING_ANNOTATION;
+ marshal_u32(header + 2, channel);
+ marshal_u16(header + 6, 0);
+ kannotate_write(header, sizeof(header));
+}
+
+EXPORT_SYMBOL(gator_annotate_channel_end);
+
+void gator_annotate_end(void)
+{
+ gator_annotate_channel_end(0);
+}
+
+EXPORT_SYMBOL(gator_annotate_end);
+
+void gator_annotate_name_channel(int channel, int group, const char* str)
+{
+ const u16 str_size = strlen(str) & 0xffff;
+ char header[12];
+ header[0] = ESCAPE_CODE;
+ header[1] = NAME_CHANNEL_ANNOTATION;
+ marshal_u32(header + 2, channel);
+ marshal_u32(header + 6, group);
+ marshal_u16(header + 10, str_size);
+ kannotate_write(header, sizeof(header));
+ kannotate_write(str, str_size);
+}
+
+EXPORT_SYMBOL(gator_annotate_name_channel);
+
+void gator_annotate_name_group(int group, const char* str)
+{
+ const u16 str_size = strlen(str) & 0xffff;
+ char header[8];
+ header[0] = ESCAPE_CODE;
+ header[1] = NAME_GROUP_ANNOTATION;
+ marshal_u32(header + 2, group);
+ marshal_u16(header + 6, str_size);
+ kannotate_write(header, sizeof(header));
+ kannotate_write(str, str_size);
+}
+
+EXPORT_SYMBOL(gator_annotate_name_group);
+
+void gator_annotate_visual(const char *data, unsigned int length, const char *str)
+{
+ const u16 str_size = strlen(str) & 0xffff;
+ char header[4];
+ char header_length[4];
+ header[0] = ESCAPE_CODE;
+ header[1] = VISUAL_ANNOTATION;
+ marshal_u16(header + 2, str_size);
+ marshal_u32(header_length, length);
+ kannotate_write(header, sizeof(header));
+ kannotate_write(str, str_size);
+ kannotate_write(header_length, sizeof(header_length));
+ kannotate_write(data, length);
+}
+
+EXPORT_SYMBOL(gator_annotate_visual);
+
+void gator_annotate_marker(void)
+{
+ char header[4];
+ header[0] = ESCAPE_CODE;
+ header[1] = MARKER_ANNOTATION;
+ marshal_u16(header + 2, 0);
+ kannotate_write(header, sizeof(header));
+}
+
+EXPORT_SYMBOL(gator_annotate_marker);
+
+void gator_annotate_marker_str(const char *str)
+{
+ const u16 str_size = strlen(str) & 0xffff;
+ char header[4];
+ header[0] = ESCAPE_CODE;
+ header[1] = MARKER_ANNOTATION;
+ marshal_u16(header + 2, str_size);
+ kannotate_write(header, sizeof(header));
+ kannotate_write(str, str_size);
+}
+
+EXPORT_SYMBOL(gator_annotate_marker_str);
+
+void gator_annotate_marker_color(int color)
+{
+ char header[8];
+ header[0] = ESCAPE_CODE;
+ header[1] = MARKER_ANNOTATION;
+ marshal_u16(header + 2, 4);
+ marshal_u32(header + 4, color);
+ kannotate_write(header, sizeof(header));
+}
+
+EXPORT_SYMBOL(gator_annotate_marker_color);
+
+void gator_annotate_marker_color_str(int color, const char *str)
+{
+ const u16 str_size = (strlen(str) + 4) & 0xffff;
+ char header[8];
+ header[0] = ESCAPE_CODE;
+ header[1] = MARKER_ANNOTATION;
+ marshal_u16(header + 2, str_size);
+ marshal_u32(header + 4, color);
+ kannotate_write(header, sizeof(header));
+ kannotate_write(str, str_size - 4);
+}
+
+EXPORT_SYMBOL(gator_annotate_marker_color_str);
diff --git a/drivers/gator/gator_backtrace.c b/drivers/gator/gator_backtrace.c
new file mode 100644
index 0000000..ffacb49
--- /dev/null
+++ b/drivers/gator/gator_backtrace.c
@@ -0,0 +1,168 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/*
+ * EABI backtrace stores {fp,lr} on the stack.
+ */
+struct stack_frame_eabi {
+ union {
+ struct {
+ unsigned long fp;
+ // May be the fp in the case of a leaf function or clang
+ unsigned long lr;
+ // If lr is really the fp, lr2 is the corresponding lr
+ unsigned long lr2;
+ };
+ // Used to read 32 bit fp/lr from a 64 bit kernel
+ struct {
+ u32 fp_32;
+ // same as lr above
+ u32 lr_32;
+ // same as lr2 above
+ u32 lr2_32;
+ };
+ };
+};
+
+static void arm_backtrace_eabi(int cpu, struct pt_regs *const regs, unsigned int depth)
+{
+#if defined(__arm__) || defined(__aarch64__)
+ struct stack_frame_eabi *curr;
+ struct stack_frame_eabi bufcurr;
+#if defined(__arm__)
+ const bool is_compat = false;
+ unsigned long fp = regs->ARM_fp;
+ unsigned long sp = regs->ARM_sp;
+ unsigned long lr = regs->ARM_lr;
+ const int gcc_frame_offset = sizeof(unsigned long);
+#else
+ // Is userspace aarch32 (32 bit)
+ const bool is_compat = compat_user_mode(regs);
+ unsigned long fp = (is_compat ? regs->regs[11] : regs->regs[29]);
+ unsigned long sp = (is_compat ? regs->compat_sp : regs->sp);
+ unsigned long lr = (is_compat ? regs->compat_lr : regs->regs[30]);
+ const int gcc_frame_offset = (is_compat ? sizeof(u32) : 0);
+#endif
+ // clang frame offset is always zero
+ int is_user_mode = user_mode(regs);
+
+ // pc (current function) has already been added
+
+ if (!is_user_mode) {
+ return;
+ }
+
+ // Add the lr (parent function)
+ // entry preamble may not have executed
+ gator_add_trace(cpu, lr);
+
+ // check fp is valid
+ if (fp == 0 || fp < sp) {
+ return;
+ }
+
+ // Get the current stack frame
+ curr = (struct stack_frame_eabi *)(fp - gcc_frame_offset);
+ if ((unsigned long)curr & 3) {
+ return;
+ }
+
+ while (depth-- && curr) {
+ if (!access_ok(VERIFY_READ, curr, sizeof(struct stack_frame_eabi)) ||
+ __copy_from_user_inatomic(&bufcurr, curr, sizeof(struct stack_frame_eabi))) {
+ return;
+ }
+
+ fp = (is_compat ? bufcurr.fp_32 : bufcurr.fp);
+ lr = (is_compat ? bufcurr.lr_32 : bufcurr.lr);
+
+#define calc_next(reg) ((reg) - gcc_frame_offset)
+ // Returns true if reg is a valid fp
+#define validate_next(reg, curr) \
+ ((reg) != 0 && (calc_next(reg) & 3) == 0 && (unsigned long)(curr) < calc_next(reg))
+
+ // Try lr from the stack as the fp because gcc leaf functions do not push lr
+ // If gcc_frame_offset is non-zero, the lr will also be the clang fp
+ // This assumes code is at a lower address than the stack
+ if (validate_next(lr, curr)) {
+ fp = lr;
+ lr = (is_compat ? bufcurr.lr2_32 : bufcurr.lr2);
+ }
+
+ gator_add_trace(cpu, lr);
+
+ if (!validate_next(fp, curr)) {
+ return;
+ }
+
+ // Move to the next stack frame
+ curr = (struct stack_frame_eabi *)calc_next(fp);
+ }
+#endif
+}
+
+#if defined(__arm__) || defined(__aarch64__)
+static int report_trace(struct stackframe *frame, void *d)
+{
+ unsigned int *depth = d, cookie = NO_COOKIE;
+ unsigned long addr = frame->pc;
+
+ if (*depth) {
+#if defined(MODULE)
+ unsigned int cpu = get_physical_cpu();
+ struct module *mod = __module_address(addr);
+ if (mod) {
+ cookie = get_cookie(cpu, current, mod->name, false);
+ addr = addr - (unsigned long)mod->module_core;
+ }
+#endif
+ marshal_backtrace(addr & ~1, cookie);
+ (*depth)--;
+ }
+
+ return *depth == 0;
+}
+#endif
+
+// Uncomment the following line to enable kernel stack unwinding within gator, note it can also be defined from the Makefile
+// #define GATOR_KERNEL_STACK_UNWINDING
+
+#if (defined(__arm__) || defined(__aarch64__)) && !defined(GATOR_KERNEL_STACK_UNWINDING)
+// Disabled by default
+MODULE_PARM_DESC(kernel_stack_unwinding, "Allow kernel stack unwinding.");
+bool kernel_stack_unwinding = 0;
+module_param(kernel_stack_unwinding, bool, 0644);
+#endif
+
+static void kernel_backtrace(int cpu, struct pt_regs *const regs)
+{
+#if defined(__arm__) || defined(__aarch64__)
+#ifdef GATOR_KERNEL_STACK_UNWINDING
+ int depth = gator_backtrace_depth;
+#else
+ int depth = (kernel_stack_unwinding ? gator_backtrace_depth : 1);
+#endif
+ struct stackframe frame;
+ if (depth == 0)
+ depth = 1;
+#if defined(__arm__)
+ frame.fp = regs->ARM_fp;
+ frame.sp = regs->ARM_sp;
+ frame.lr = regs->ARM_lr;
+ frame.pc = regs->ARM_pc;
+#else
+ frame.fp = regs->regs[29];
+ frame.sp = regs->sp;
+ frame.pc = regs->pc;
+#endif
+ walk_stackframe(&frame, report_trace, &depth);
+#else
+ marshal_backtrace(PC_REG & ~1, NO_COOKIE);
+#endif
+}
diff --git a/drivers/gator/gator_cookies.c b/drivers/gator/gator_cookies.c
new file mode 100644
index 0000000..eb9b946
--- /dev/null
+++ b/drivers/gator/gator_cookies.c
@@ -0,0 +1,433 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define COOKIEMAP_ENTRIES 1024 /* must be power of 2 */
+#define TRANSLATE_BUFFER_SIZE 512 // must be a power of 2 - 512/4 = 128 entries
+#define TRANSLATE_TEXT_SIZE 256
+#define MAX_COLLISIONS 2
+
+static uint32_t *gator_crc32_table;
+static unsigned int translate_buffer_mask;
+
+struct cookie_args {
+ struct task_struct *task;
+ const char *text;
+};
+
+static DEFINE_PER_CPU(char *, translate_text);
+static DEFINE_PER_CPU(uint32_t, cookie_next_key);
+static DEFINE_PER_CPU(uint64_t *, cookie_keys);
+static DEFINE_PER_CPU(uint32_t *, cookie_values);
+static DEFINE_PER_CPU(int, translate_buffer_read);
+static DEFINE_PER_CPU(int, translate_buffer_write);
+static DEFINE_PER_CPU(struct cookie_args *, translate_buffer);
+
+static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq);
+static void wq_cookie_handler(struct work_struct *unused);
+DECLARE_WORK(cookie_work, wq_cookie_handler);
+static struct timer_list app_process_wake_up_timer;
+static void app_process_wake_up_handler(unsigned long unused_data);
+
+static uint32_t cookiemap_code(uint64_t value64)
+{
+ uint32_t value = (uint32_t)((value64 >> 32) + value64);
+ uint32_t cookiecode = (value >> 24) & 0xff;
+ cookiecode = cookiecode * 31 + ((value >> 16) & 0xff);
+ cookiecode = cookiecode * 31 + ((value >> 8) & 0xff);
+ cookiecode = cookiecode * 31 + ((value >> 0) & 0xff);
+ cookiecode &= (COOKIEMAP_ENTRIES - 1);
+ return cookiecode * MAX_COLLISIONS;
+}
+
+static uint32_t gator_chksum_crc32(const char *data)
+{
+ register unsigned long crc;
+ const unsigned char *block = data;
+ int i, length = strlen(data);
+
+ crc = 0xFFFFFFFF;
+ for (i = 0; i < length; i++) {
+ crc = ((crc >> 8) & 0x00FFFFFF) ^ gator_crc32_table[(crc ^ *block++) & 0xFF];
+ }
+
+ return (crc ^ 0xFFFFFFFF);
+}
+
+/*
+ * Exists
+ * Pre: [0][1][v][3]..[n-1]
+ * Post: [v][0][1][3]..[n-1]
+ */
+static uint32_t cookiemap_exists(uint64_t key)
+{
+ unsigned long x, flags, retval = 0;
+ int cpu = get_physical_cpu();
+ uint32_t cookiecode = cookiemap_code(key);
+ uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]);
+ uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]);
+
+ // Can be called from interrupt handler or from work queue
+ local_irq_save(flags);
+ for (x = 0; x < MAX_COLLISIONS; x++) {
+ if (keys[x] == key) {
+ uint32_t value = values[x];
+ for (; x > 0; x--) {
+ keys[x] = keys[x - 1];
+ values[x] = values[x - 1];
+ }
+ keys[0] = key;
+ values[0] = value;
+ retval = value;
+ break;
+ }
+ }
+ local_irq_restore(flags);
+
+ return retval;
+}
+
+/*
+ * Add
+ * Pre: [0][1][2][3]..[n-1]
+ * Post: [v][0][1][2]..[n-2]
+ */
+static void cookiemap_add(uint64_t key, uint32_t value)
+{
+ int cpu = get_physical_cpu();
+ int cookiecode = cookiemap_code(key);
+ uint64_t *keys = &(per_cpu(cookie_keys, cpu)[cookiecode]);
+ uint32_t *values = &(per_cpu(cookie_values, cpu)[cookiecode]);
+ int x;
+
+ for (x = MAX_COLLISIONS - 1; x > 0; x--) {
+ keys[x] = keys[x - 1];
+ values[x] = values[x - 1];
+ }
+ keys[0] = key;
+ values[0] = value;
+}
+
+#ifndef CONFIG_PREEMPT_RT_FULL
+static void translate_buffer_write_args(int cpu, struct task_struct *task, const char *text)
+{
+ unsigned long flags;
+ int write;
+ int next_write;
+ struct cookie_args *args;
+
+ local_irq_save(flags);
+
+ write = per_cpu(translate_buffer_write, cpu);
+ next_write = (write + 1) & translate_buffer_mask;
+
+ // At least one entry must always remain available as when read == write, the queue is empty not full
+ if (next_write != per_cpu(translate_buffer_read, cpu)) {
+ args = &per_cpu(translate_buffer, cpu)[write];
+ args->task = task;
+ args->text = text;
+ get_task_struct(task);
+ per_cpu(translate_buffer_write, cpu) = next_write;
+ }
+
+ local_irq_restore(flags);
+}
+#endif
+
+static void translate_buffer_read_args(int cpu, struct cookie_args *args)
+{
+ unsigned long flags;
+ int read;
+
+ local_irq_save(flags);
+
+ read = per_cpu(translate_buffer_read, cpu);
+ *args = per_cpu(translate_buffer, cpu)[read];
+ per_cpu(translate_buffer_read, cpu) = (read + 1) & translate_buffer_mask;
+
+ local_irq_restore(flags);
+}
+
+static void wq_cookie_handler(struct work_struct *unused)
+{
+ struct cookie_args args;
+ int cpu = get_physical_cpu(), cookie;
+
+ mutex_lock(&start_mutex);
+
+ if (gator_started != 0) {
+ while (per_cpu(translate_buffer_read, cpu) != per_cpu(translate_buffer_write, cpu)) {
+ translate_buffer_read_args(cpu, &args);
+ cookie = get_cookie(cpu, args.task, args.text, true);
+ marshal_link(cookie, args.task->tgid, args.task->pid);
+ put_task_struct(args.task);
+ }
+ }
+
+ mutex_unlock(&start_mutex);
+}
+
+static void app_process_wake_up_handler(unsigned long unused_data)
+{
+ // had to delay scheduling work as attempting to schedule work during the context switch is illegal in kernel versions 3.5 and greater
+ schedule_work(&cookie_work);
+}
+
+// Retrieve full name from proc/pid/cmdline for java processes on Android
+static int translate_app_process(const char **text, int cpu, struct task_struct *task, bool from_wq)
+{
+ void *maddr;
+ unsigned int len;
+ unsigned long addr;
+ struct mm_struct *mm;
+ struct page *page = NULL;
+ struct vm_area_struct *page_vma;
+ int bytes, offset, retval = 0;
+ char *buf = per_cpu(translate_text, cpu);
+
+#ifndef CONFIG_PREEMPT_RT_FULL
+ // Push work into a work queue if in atomic context as the kernel functions below might sleep
+ // Rely on the in_interrupt variable rather than in_irq() or in_interrupt() kernel functions, as the value of these functions seems
+ // inconsistent during a context switch between android/linux versions
+ if (!from_wq) {
+ // Check if already in buffer
+ int pos = per_cpu(translate_buffer_read, cpu);
+ while (pos != per_cpu(translate_buffer_write, cpu)) {
+ if (per_cpu(translate_buffer, cpu)[pos].task == task)
+ goto out;
+ pos = (pos + 1) & translate_buffer_mask;
+ }
+
+ translate_buffer_write_args(cpu, task, *text);
+
+ // Not safe to call in RT-Preempt full in schedule switch context
+ mod_timer(&app_process_wake_up_timer, jiffies + 1);
+ goto out;
+ }
+#endif
+
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out;
+ if (!mm->arg_end)
+ goto outmm;
+ addr = mm->arg_start;
+ len = mm->arg_end - mm->arg_start;
+
+ if (len > TRANSLATE_TEXT_SIZE)
+ len = TRANSLATE_TEXT_SIZE;
+
+ down_read(&mm->mmap_sem);
+ while (len) {
+ if (get_user_pages(task, mm, addr, 1, 0, 1, &page, &page_vma) <= 0)
+ goto outsem;
+
+ maddr = kmap(page);
+ offset = addr & (PAGE_SIZE - 1);
+ bytes = len;
+ if (bytes > PAGE_SIZE - offset)
+ bytes = PAGE_SIZE - offset;
+
+ copy_from_user_page(page_vma, page, addr, buf, maddr + offset, bytes);
+
+ kunmap(page); // release page allocated by get_user_pages()
+ page_cache_release(page);
+
+ len -= bytes;
+ buf += bytes;
+ addr += bytes;
+
+ *text = per_cpu(translate_text, cpu);
+ retval = 1;
+ }
+
+ // On app_process startup, /proc/pid/cmdline is initially "zygote" then "<pre-initialized>" but changes after an initial startup period
+ if (strcmp(*text, "zygote") == 0 || strcmp(*text, "<pre-initialized>") == 0)
+ retval = 0;
+
+outsem:
+ up_read(&mm->mmap_sem);
+outmm:
+ mmput(mm);
+out:
+ return retval;
+}
+
+static uint32_t get_cookie(int cpu, struct task_struct *task, const char *text, bool from_wq)
+{
+ unsigned long flags, cookie;
+ uint64_t key;
+
+ key = gator_chksum_crc32(text);
+ key = (key << 32) | (uint32_t)task->tgid;
+
+ cookie = cookiemap_exists(key);
+ if (cookie) {
+ return cookie;
+ }
+
+ if (strcmp(text, "app_process") == 0) {
+ if (!translate_app_process(&text, cpu, task, from_wq))
+ return UNRESOLVED_COOKIE;
+ }
+
+ // Can be called from interrupt handler or from work queue or from scheduler trace
+ local_irq_save(flags);
+
+ cookie = UNRESOLVED_COOKIE;
+ if (marshal_cookie_header(text)) {
+ cookie = per_cpu(cookie_next_key, cpu) += nr_cpu_ids;
+ cookiemap_add(key, cookie);
+ marshal_cookie(cookie, text);
+ }
+
+ local_irq_restore(flags);
+
+ return cookie;
+}
+
+static int get_exec_cookie(int cpu, struct task_struct *task)
+{
+ struct mm_struct *mm = task->mm;
+ const char *text;
+
+ // kernel threads have no address space
+ if (!mm)
+ return NO_COOKIE;
+
+ if (task && task->mm && task->mm->exe_file) {
+ text = task->mm->exe_file->f_path.dentry->d_name.name;
+ return get_cookie(cpu, task, text, false);
+ }
+
+ return UNRESOLVED_COOKIE;
+}
+
+static unsigned long get_address_cookie(int cpu, struct task_struct *task, unsigned long addr, off_t *offset)
+{
+ unsigned long cookie = NO_COOKIE;
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+ const char *text;
+
+ if (!mm)
+ return cookie;
+
+ for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
+ if (addr < vma->vm_start || addr >= vma->vm_end)
+ continue;
+
+ if (vma->vm_file) {
+ text = vma->vm_file->f_path.dentry->d_name.name;
+ cookie = get_cookie(cpu, task, text, false);
+ *offset = (vma->vm_pgoff << PAGE_SHIFT) + addr - vma->vm_start;
+ } else {
+ /* must be an anonymous map */
+ *offset = addr;
+ }
+
+ break;
+ }
+
+ if (!vma)
+ cookie = UNRESOLVED_COOKIE;
+
+ return cookie;
+}
+
+static int cookies_initialize(void)
+{
+ uint32_t crc, poly;
+ int i, j, cpu, size, err = 0;
+
+ translate_buffer_mask = TRANSLATE_BUFFER_SIZE / sizeof(per_cpu(translate_buffer, 0)[0]) - 1;
+
+ for_each_present_cpu(cpu) {
+ per_cpu(cookie_next_key, cpu) = nr_cpu_ids + cpu;
+
+ size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint64_t);
+ per_cpu(cookie_keys, cpu) = (uint64_t *)kmalloc(size, GFP_KERNEL);
+ if (!per_cpu(cookie_keys, cpu)) {
+ err = -ENOMEM;
+ goto cookie_setup_error;
+ }
+ memset(per_cpu(cookie_keys, cpu), 0, size);
+
+ size = COOKIEMAP_ENTRIES * MAX_COLLISIONS * sizeof(uint32_t);
+ per_cpu(cookie_values, cpu) = (uint32_t *)kmalloc(size, GFP_KERNEL);
+ if (!per_cpu(cookie_values, cpu)) {
+ err = -ENOMEM;
+ goto cookie_setup_error;
+ }
+ memset(per_cpu(cookie_values, cpu), 0, size);
+
+ per_cpu(translate_buffer, cpu) = (struct cookie_args *)kmalloc(TRANSLATE_BUFFER_SIZE, GFP_KERNEL);
+ if (!per_cpu(translate_buffer, cpu)) {
+ err = -ENOMEM;
+ goto cookie_setup_error;
+ }
+
+ per_cpu(translate_buffer_write, cpu) = 0;
+ per_cpu(translate_buffer_read, cpu) = 0;
+
+ per_cpu(translate_text, cpu) = (char *)kmalloc(TRANSLATE_TEXT_SIZE, GFP_KERNEL);
+ if (!per_cpu(translate_text, cpu)) {
+ err = -ENOMEM;
+ goto cookie_setup_error;
+ }
+ }
+
+ // build CRC32 table
+ poly = 0x04c11db7;
+ gator_crc32_table = (uint32_t *)kmalloc(256 * sizeof(uint32_t), GFP_KERNEL);
+ if (!gator_crc32_table) {
+ err = -ENOMEM;
+ goto cookie_setup_error;
+ }
+ for (i = 0; i < 256; i++) {
+ crc = i;
+ for (j = 8; j > 0; j--) {
+ if (crc & 1) {
+ crc = (crc >> 1) ^ poly;
+ } else {
+ crc >>= 1;
+ }
+ }
+ gator_crc32_table[i] = crc;
+ }
+
+ setup_timer(&app_process_wake_up_timer, app_process_wake_up_handler, 0);
+
+cookie_setup_error:
+ return err;
+}
+
+static void cookies_release(void)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu) {
+ kfree(per_cpu(cookie_keys, cpu));
+ per_cpu(cookie_keys, cpu) = NULL;
+
+ kfree(per_cpu(cookie_values, cpu));
+ per_cpu(cookie_values, cpu) = NULL;
+
+ kfree(per_cpu(translate_buffer, cpu));
+ per_cpu(translate_buffer, cpu) = NULL;
+ per_cpu(translate_buffer_read, cpu) = 0;
+ per_cpu(translate_buffer_write, cpu) = 0;
+
+ kfree(per_cpu(translate_text, cpu));
+ per_cpu(translate_text, cpu) = NULL;
+ }
+
+ del_timer_sync(&app_process_wake_up_timer);
+ kfree(gator_crc32_table);
+ gator_crc32_table = NULL;
+}
diff --git a/drivers/gator/gator_events_armv6.c b/drivers/gator/gator_events_armv6.c
new file mode 100644
index 0000000..dd79740
--- /dev/null
+++ b/drivers/gator/gator_events_armv6.c
@@ -0,0 +1,237 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+// gator_events_perf_pmu.c is used if perf is supported
+#if GATOR_NO_PERF_SUPPORT
+
+static const char *pmnc_name;
+
+/*
+ * Per-CPU PMCR
+ */
+#define PMCR_E (1 << 0) /* Enable */
+#define PMCR_P (1 << 1) /* Count reset */
+#define PMCR_C (1 << 2) /* Cycle counter reset */
+#define PMCR_OFL_PMN0 (1 << 8) /* Count reg 0 overflow */
+#define PMCR_OFL_PMN1 (1 << 9) /* Count reg 1 overflow */
+#define PMCR_OFL_CCNT (1 << 10) /* Cycle counter overflow */
+
+#define PMN0 0
+#define PMN1 1
+#define CCNT 2
+#define CNTMAX (CCNT+1)
+
+static int pmnc_counters = 0;
+static unsigned long pmnc_enabled[CNTMAX];
+static unsigned long pmnc_event[CNTMAX];
+static unsigned long pmnc_key[CNTMAX];
+
+static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt);
+
+static inline void armv6_pmnc_write(u32 val)
+{
+ /* upper 4bits and 7, 11 are write-as-0 */
+ val &= 0x0ffff77f;
+ asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r" (val));
+}
+
+static inline u32 armv6_pmnc_read(void)
+{
+ u32 val;
+ asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r" (val));
+ return val;
+}
+
+static void armv6_pmnc_reset_counter(unsigned int cnt)
+{
+ u32 val = 0;
+ switch (cnt) {
+ case CCNT:
+ asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r" (val));
+ break;
+ case PMN0:
+ asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r" (val));
+ break;
+ case PMN1:
+ asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r" (val));
+ break;
+ }
+}
+
+int gator_events_armv6_create_files(struct super_block *sb, struct dentry *root)
+{
+ struct dentry *dir;
+ int i;
+
+ pmnc_counters = 3;
+
+ for (i = PMN0; i <= CCNT; i++) {
+ char buf[40];
+ if (i == CCNT) {
+ snprintf(buf, sizeof buf, "ARM_%s_ccnt", pmnc_name);
+ } else {
+ snprintf(buf, sizeof buf, "ARM_%s_cnt%d", pmnc_name, i);
+ }
+ dir = gatorfs_mkdir(sb, root, buf);
+ if (!dir) {
+ return -1;
+ }
+ gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
+ gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
+ if (i != CCNT) {
+ gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
+ }
+ }
+
+ return 0;
+}
+
+static int gator_events_armv6_online(int **buffer, bool migrate)
+{
+ unsigned int cnt, len = 0, cpu = smp_processor_id();
+ u32 pmnc;
+
+ if (armv6_pmnc_read() & PMCR_E) {
+ armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E);
+ }
+
+ /* initialize PMNC, reset overflow, D bit, C bit and P bit. */
+ armv6_pmnc_write(PMCR_OFL_PMN0 | PMCR_OFL_PMN1 | PMCR_OFL_CCNT |
+ PMCR_C | PMCR_P);
+
+ /* configure control register */
+ for (pmnc = 0, cnt = PMN0; cnt <= CCNT; cnt++) {
+ unsigned long event;
+
+ if (!pmnc_enabled[cnt])
+ continue;
+
+ event = pmnc_event[cnt] & 255;
+
+ // Set event (if destined for PMNx counters)
+ if (cnt == PMN0) {
+ pmnc |= event << 20;
+ } else if (cnt == PMN1) {
+ pmnc |= event << 12;
+ }
+
+ // Reset counter
+ armv6_pmnc_reset_counter(cnt);
+ }
+ armv6_pmnc_write(pmnc | PMCR_E);
+
+ // return zero values, no need to read as the counters were just reset
+ for (cnt = PMN0; cnt <= CCNT; cnt++) {
+ if (pmnc_enabled[cnt]) {
+ per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+ per_cpu(perfCnt, cpu)[len++] = 0;
+ }
+ }
+
+ if (buffer)
+ *buffer = per_cpu(perfCnt, cpu);
+
+ return len;
+}
+
+static int gator_events_armv6_offline(int **buffer, bool migrate)
+{
+ unsigned int cnt;
+
+ armv6_pmnc_write(armv6_pmnc_read() & ~PMCR_E);
+ for (cnt = PMN0; cnt <= CCNT; cnt++) {
+ armv6_pmnc_reset_counter(cnt);
+ }
+
+ return 0;
+}
+
+static void gator_events_armv6_stop(void)
+{
+ unsigned int cnt;
+
+ for (cnt = PMN0; cnt <= CCNT; cnt++) {
+ pmnc_enabled[cnt] = 0;
+ pmnc_event[cnt] = 0;
+ }
+}
+
+static int gator_events_armv6_read(int **buffer)
+{
+ int cnt, len = 0;
+ int cpu = smp_processor_id();
+
+ // a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled
+ if (!(armv6_pmnc_read() & PMCR_E)) {
+ return 0;
+ }
+
+ for (cnt = PMN0; cnt <= CCNT; cnt++) {
+ if (pmnc_enabled[cnt]) {
+ u32 value = 0;
+ switch (cnt) {
+ case CCNT:
+ asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r" (value));
+ break;
+ case PMN0:
+ asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r" (value));
+ break;
+ case PMN1:
+ asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r" (value));
+ break;
+ }
+ armv6_pmnc_reset_counter(cnt);
+
+ per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+ per_cpu(perfCnt, cpu)[len++] = value;
+ }
+ }
+
+ if (buffer)
+ *buffer = per_cpu(perfCnt, cpu);
+
+ return len;
+}
+
+static struct gator_interface gator_events_armv6_interface = {
+ .create_files = gator_events_armv6_create_files,
+ .stop = gator_events_armv6_stop,
+ .online = gator_events_armv6_online,
+ .offline = gator_events_armv6_offline,
+ .read = gator_events_armv6_read,
+};
+
+int gator_events_armv6_init(void)
+{
+ unsigned int cnt;
+
+ switch (gator_cpuid()) {
+ case ARM1136:
+ case ARM1156:
+ case ARM1176:
+ pmnc_name = "ARM11";
+ break;
+ case ARM11MPCORE:
+ pmnc_name = "ARM11MPCore";
+ break;
+ default:
+ return -1;
+ }
+
+ for (cnt = PMN0; cnt <= CCNT; cnt++) {
+ pmnc_enabled[cnt] = 0;
+ pmnc_event[cnt] = 0;
+ pmnc_key[cnt] = gator_events_get_key();
+ }
+
+ return gator_events_install(&gator_events_armv6_interface);
+}
+
+#endif
diff --git a/drivers/gator/gator_events_armv7.c b/drivers/gator/gator_events_armv7.c
new file mode 100644
index 0000000..30881c8
--- /dev/null
+++ b/drivers/gator/gator_events_armv7.c
@@ -0,0 +1,312 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Disabling interrupts
+ * Many of the functions below disable interrupts via local_irq_save(). This disabling of interrupts is done to prevent any race conditions
+ * between multiple entities (e.g. hrtimer interrupts and event based interrupts) calling the same functions. As accessing the pmu involves
+ * several steps (disable, select, read, enable), these steps must be performed atomically. Normal synchronization routines cannot be used
+ * as these functions are being called from interrupt context.
+ */
+
+#include "gator.h"
+
+// gator_events_perf_pmu.c is used if perf is supported
+#if GATOR_NO_PERF_SUPPORT
+
+// Per-CPU PMNC: config reg
+#define PMNC_E (1 << 0) /* Enable all counters */
+#define PMNC_P (1 << 1) /* Reset all counters */
+#define PMNC_C (1 << 2) /* Cycle counter reset */
+#define PMNC_MASK 0x3f /* Mask for writable bits */
+
+// ccnt reg
+#define CCNT_REG (1 << 31)
+
+#define CCNT 0
+#define CNT0 1
+#define CNTMAX (6+1)
+
+static const char *pmnc_name;
+static int pmnc_counters;
+
+static unsigned long pmnc_enabled[CNTMAX];
+static unsigned long pmnc_event[CNTMAX];
+static unsigned long pmnc_key[CNTMAX];
+
+static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt);
+
+inline void armv7_pmnc_write(u32 val)
+{
+ val &= PMNC_MASK;
+ asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (val));
+}
+
+inline u32 armv7_pmnc_read(void)
+{
+ u32 val;
+ asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+ return val;
+}
+
+inline u32 armv7_ccnt_read(u32 reset_value)
+{
+ unsigned long flags;
+ u32 newval = -reset_value;
+ u32 den = CCNT_REG;
+ u32 val;
+
+ local_irq_save(flags);
+ asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den)); // disable
+ asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); // read
+ asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (newval)); // new value
+ asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den)); // enable
+ local_irq_restore(flags);
+
+ return val;
+}
+
+inline u32 armv7_cntn_read(unsigned int cnt, u32 reset_value)
+{
+ unsigned long flags;
+ u32 newval = -reset_value;
+ u32 sel = (cnt - CNT0);
+ u32 den = 1 << sel;
+ u32 oldval;
+
+ local_irq_save(flags);
+ asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (den)); // disable
+ asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (sel)); // select
+ asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (oldval)); // read
+ asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (newval)); // new value
+ asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (den)); // enable
+ local_irq_restore(flags);
+
+ return oldval;
+}
+
+static inline void armv7_pmnc_disable_interrupt(unsigned int cnt)
+{
+ u32 val = cnt ? (1 << (cnt - CNT0)) : (1 << 31);
+ asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+}
+
+inline u32 armv7_pmnc_reset_interrupt(void)
+{
+ // Get and reset overflow status flags
+ u32 flags;
+ asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (flags));
+ flags &= 0x8000003f;
+ asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (flags));
+ return flags;
+}
+
+static inline u32 armv7_pmnc_enable_counter(unsigned int cnt)
+{
+ u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG;
+ asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
+ return cnt;
+}
+
+static inline u32 armv7_pmnc_disable_counter(unsigned int cnt)
+{
+ u32 val = cnt ? (1 << (cnt - CNT0)) : CCNT_REG;
+ asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
+ return cnt;
+}
+
+static inline int armv7_pmnc_select_counter(unsigned int cnt)
+{
+ u32 val = (cnt - CNT0);
+ asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
+ return cnt;
+}
+
+static inline void armv7_pmnc_write_evtsel(unsigned int cnt, u32 val)
+{
+ if (armv7_pmnc_select_counter(cnt) == cnt) {
+ asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
+ }
+}
+
+static int gator_events_armv7_create_files(struct super_block *sb, struct dentry *root)
+{
+ struct dentry *dir;
+ int i;
+
+ for (i = 0; i < pmnc_counters; i++) {
+ char buf[40];
+ if (i == 0) {
+ snprintf(buf, sizeof buf, "ARM_%s_ccnt", pmnc_name);
+ } else {
+ snprintf(buf, sizeof buf, "ARM_%s_cnt%d", pmnc_name, i - 1);
+ }
+ dir = gatorfs_mkdir(sb, root, buf);
+ if (!dir) {
+ return -1;
+ }
+ gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]);
+ gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]);
+ if (i > 0) {
+ gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]);
+ }
+ }
+
+ return 0;
+}
+
+static int gator_events_armv7_online(int **buffer, bool migrate)
+{
+ unsigned int cnt, len = 0, cpu = smp_processor_id();
+
+ if (armv7_pmnc_read() & PMNC_E) {
+ armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E);
+ }
+
+ // Initialize & Reset PMNC: C bit and P bit
+ armv7_pmnc_write(PMNC_P | PMNC_C);
+
+ // Reset overflow flags
+ armv7_pmnc_reset_interrupt();
+
+ for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+ unsigned long event;
+
+ if (!pmnc_enabled[cnt])
+ continue;
+
+ // Disable counter
+ armv7_pmnc_disable_counter(cnt);
+
+ event = pmnc_event[cnt] & 255;
+
+ // Set event (if destined for PMNx counters), we don't need to set the event if it's a cycle count
+ if (cnt != CCNT)
+ armv7_pmnc_write_evtsel(cnt, event);
+
+ armv7_pmnc_disable_interrupt(cnt);
+
+ // Reset counter
+ cnt ? armv7_cntn_read(cnt, 0) : armv7_ccnt_read(0);
+
+ // Enable counter
+ armv7_pmnc_enable_counter(cnt);
+ }
+
+ // enable
+ armv7_pmnc_write(armv7_pmnc_read() | PMNC_E);
+
+ // return zero values, no need to read as the counters were just reset
+ for (cnt = 0; cnt < pmnc_counters; cnt++) {
+ if (pmnc_enabled[cnt]) {
+ per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+ per_cpu(perfCnt, cpu)[len++] = 0;
+ }
+ }
+
+ if (buffer)
+ *buffer = per_cpu(perfCnt, cpu);
+
+ return len;
+}
+
+static int gator_events_armv7_offline(int **buffer, bool migrate)
+{
+ // disable all counters, including PMCCNTR; overflow IRQs will not be signaled
+ armv7_pmnc_write(armv7_pmnc_read() & ~PMNC_E);
+
+ return 0;
+}
+
+static void gator_events_armv7_stop(void)
+{
+ unsigned int cnt;
+
+ for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+ pmnc_enabled[cnt] = 0;
+ pmnc_event[cnt] = 0;
+ }
+}
+
+static int gator_events_armv7_read(int **buffer)
+{
+ int cnt, len = 0;
+ int cpu = smp_processor_id();
+
+ // a context switch may occur before the online hotplug event, thus need to check that the pmu is enabled
+ if (!(armv7_pmnc_read() & PMNC_E)) {
+ return 0;
+ }
+
+ for (cnt = 0; cnt < pmnc_counters; cnt++) {
+ if (pmnc_enabled[cnt]) {
+ int value;
+ if (cnt == CCNT) {
+ value = armv7_ccnt_read(0);
+ } else {
+ value = armv7_cntn_read(cnt, 0);
+ }
+ per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt];
+ per_cpu(perfCnt, cpu)[len++] = value;
+ }
+ }
+
+ if (buffer)
+ *buffer = per_cpu(perfCnt, cpu);
+
+ return len;
+}
+
+static struct gator_interface gator_events_armv7_interface = {
+ .create_files = gator_events_armv7_create_files,
+ .stop = gator_events_armv7_stop,
+ .online = gator_events_armv7_online,
+ .offline = gator_events_armv7_offline,
+ .read = gator_events_armv7_read,
+};
+
+int gator_events_armv7_init(void)
+{
+ unsigned int cnt;
+
+ switch (gator_cpuid()) {
+ case CORTEX_A5:
+ pmnc_name = "Cortex-A5";
+ pmnc_counters = 2;
+ break;
+ case CORTEX_A7:
+ pmnc_name = "Cortex-A7";
+ pmnc_counters = 4;
+ break;
+ case CORTEX_A8:
+ pmnc_name = "Cortex-A8";
+ pmnc_counters = 4;
+ break;
+ case CORTEX_A9:
+ pmnc_name = "Cortex-A9";
+ pmnc_counters = 6;
+ break;
+ case CORTEX_A15:
+ pmnc_name = "Cortex-A15";
+ pmnc_counters = 6;
+ break;
+ default:
+ return -1;
+ }
+
+ pmnc_counters++; // CNT[n] + CCNT
+
+ for (cnt = CCNT; cnt < CNTMAX; cnt++) {
+ pmnc_enabled[cnt] = 0;
+ pmnc_event[cnt] = 0;
+ pmnc_key[cnt] = gator_events_get_key();
+ }
+
+ return gator_events_install(&gator_events_armv7_interface);
+}
+
+#endif
diff --git a/drivers/gator/gator_events_block.c b/drivers/gator/gator_events_block.c
new file mode 100644
index 0000000..691ef25
--- /dev/null
+++ b/drivers/gator/gator_events_block.c
@@ -0,0 +1,153 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+#include <trace/events/block.h>
+
+#define BLOCK_RQ_WR 0
+#define BLOCK_RQ_RD 1
+
+#define BLOCK_TOTAL (BLOCK_RQ_RD+1)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+#define EVENTWRITE REQ_RW
+#else
+#define EVENTWRITE REQ_WRITE
+#endif
+
+static ulong block_rq_wr_enabled;
+static ulong block_rq_rd_enabled;
+static ulong block_rq_wr_key;
+static ulong block_rq_rd_key;
+static atomic_t blockCnt[BLOCK_TOTAL];
+static int blockGet[BLOCK_TOTAL * 4];
+
+GATOR_DEFINE_PROBE(block_rq_complete, TP_PROTO(struct request_queue *q, struct request *rq))
+{
+ int write, size;
+
+ if (!rq)
+ return;
+
+ write = rq->cmd_flags & EVENTWRITE;
+ size = rq->resid_len;
+
+ if (!size)
+ return;
+
+ if (write) {
+ if (block_rq_wr_enabled) {
+ atomic_add(size, &blockCnt[BLOCK_RQ_WR]);
+ }
+ } else {
+ if (block_rq_rd_enabled) {
+ atomic_add(size, &blockCnt[BLOCK_RQ_RD]);
+ }
+ }
+}
+
+static int gator_events_block_create_files(struct super_block *sb, struct dentry *root)
+{
+ struct dentry *dir;
+
+ /* block_complete_wr */
+ dir = gatorfs_mkdir(sb, root, "Linux_block_rq_wr");
+ if (!dir) {
+ return -1;
+ }
+ gatorfs_create_ulong(sb, dir, "enabled", &block_rq_wr_enabled);
+ gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_wr_key);
+
+ /* block_complete_rd */
+ dir = gatorfs_mkdir(sb, root, "Linux_block_rq_rd");
+ if (!dir) {
+ return -1;
+ }
+ gatorfs_create_ulong(sb, dir, "enabled", &block_rq_rd_enabled);
+ gatorfs_create_ro_ulong(sb, dir, "key", &block_rq_rd_key);
+
+ return 0;
+}
+
+static int gator_events_block_start(void)
+{
+ // register tracepoints
+ if (block_rq_wr_enabled || block_rq_rd_enabled)
+ if (GATOR_REGISTER_TRACE(block_rq_complete))
+ goto fail_block_rq_exit;
+ pr_debug("gator: registered block event tracepoints\n");
+
+ return 0;
+
+ // unregister tracepoints on error
+fail_block_rq_exit:
+ pr_err("gator: block event tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+ return -1;
+}
+
+static void gator_events_block_stop(void)
+{
+ if (block_rq_wr_enabled || block_rq_rd_enabled)
+ GATOR_UNREGISTER_TRACE(block_rq_complete);
+ pr_debug("gator: unregistered block event tracepoints\n");
+
+ block_rq_wr_enabled = 0;
+ block_rq_rd_enabled = 0;
+}
+
+static int gator_events_block_read(int **buffer)
+{
+ int len, value, data = 0;
+
+ if (!on_primary_core()) {
+ return 0;
+ }
+
+ len = 0;
+ if (block_rq_wr_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_WR])) > 0) {
+ atomic_sub(value, &blockCnt[BLOCK_RQ_WR]);
+ blockGet[len++] = block_rq_wr_key;
+ blockGet[len++] = 0; // indicates to Streamline that value bytes were written now, not since the last message
+ blockGet[len++] = block_rq_wr_key;
+ blockGet[len++] = value;
+ data += value;
+ }
+ if (block_rq_rd_enabled && (value = atomic_read(&blockCnt[BLOCK_RQ_RD])) > 0) {
+ atomic_sub(value, &blockCnt[BLOCK_RQ_RD]);
+ blockGet[len++] = block_rq_rd_key;
+ blockGet[len++] = 0; // indicates to Streamline that value bytes were read now, not since the last message
+ blockGet[len++] = block_rq_rd_key;
+ blockGet[len++] = value;
+ data += value;
+ }
+
+ if (buffer)
+ *buffer = blockGet;
+
+ return len;
+}
+
+static struct gator_interface gator_events_block_interface = {
+ .create_files = gator_events_block_create_files,
+ .start = gator_events_block_start,
+ .stop = gator_events_block_stop,
+ .read = gator_events_block_read,
+};
+
+int gator_events_block_init(void)
+{
+ block_rq_wr_enabled = 0;
+ block_rq_rd_enabled = 0;
+
+ block_rq_wr_key = gator_events_get_key();
+ block_rq_rd_key = gator_events_get_key();
+
+ return gator_events_install(&gator_events_block_interface);
+}
diff --git a/drivers/gator/gator_events_ccn-504.c b/drivers/gator/gator_events_ccn-504.c
new file mode 100644
index 0000000..b892319
--- /dev/null
+++ b/drivers/gator/gator_events_ccn-504.c
@@ -0,0 +1,346 @@
+/**
+ * Copyright (C) ARM Limited 2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+
+#include "gator.h"
+
+#define NUM_REGIONS 256
+#define REGION_SIZE (64*1024)
+#define REGION_DEBUG 1
+#define REGION_XP 64
+#define NUM_XPS 11
+
+// DT (Debug) region
+#define PMEVCNTSR0 0x0150
+#define PMCCNTRSR 0x0190
+#define PMCR 0x01A8
+#define PMSR 0x01B0
+#define PMSR_REQ 0x01B8
+#define PMSR_CLR 0x01C0
+
+// XP region
+#define DT_CONFIG 0x0300
+#define DT_CONTROL 0x0370
+
+// Multiple
+#define PMU_EVENT_SEL 0x0600
+#define OLY_ID 0xFF00
+
+#define CCNT 4
+#define CNTMAX (CCNT + 1)
+
+#define get_pmu_event_id(event) (((event) >> 0) & 0xFF)
+#define get_node_type(event) (((event) >> 8) & 0xFF)
+#define get_region(event) (((event) >> 16) & 0xFF)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+
+// From kernel/params.c
+#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \
+ int param_set_##name(const char *val, struct kernel_param *kp) \
+ { \
+ tmptype l; \
+ int ret; \
+ \
+ if (!val) return -EINVAL; \
+ ret = strtolfn(val, 0, &l); \
+ if (ret == -EINVAL || ((type)l != l)) \
+ return -EINVAL; \
+ *((type *)kp->arg) = l; \
+ return 0; \
+ } \
+ int param_get_##name(char *buffer, struct kernel_param *kp) \
+ { \
+ return sprintf(buffer, format, *((type *)kp->arg)); \
+ }
+
+#else
+
+// From kernel/params.c
+#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn) \
+ int param_set_##name(const char *val, const struct kernel_param *kp) \
+ { \
+ tmptype l; \
+ int ret; \
+ \
+ ret = strtolfn(val, 0, &l); \
+ if (ret < 0 || ((type)l != l)) \
+ return ret < 0 ? ret : -EINVAL; \
+ *((type *)kp->arg) = l; \
+ return 0; \
+ } \
+ int param_get_##name(char *buffer, const struct kernel_param *kp) \
+ { \
+ return scnprintf(buffer, PAGE_SIZE, format, \
+ *((type *)kp->arg)); \
+ } \
+ struct kernel_param_ops param_ops_##name = { \
+ .set = param_set_##name, \
+ .get = param_get_##name, \
+ }; \
+ EXPORT_SYMBOL(param_set_##name); \
+ EXPORT_SYMBOL(param_get_##name); \
+ EXPORT_SYMBOL(param_ops_##name)
+
+#endif
+
+STANDARD_PARAM_DEF(u64, u64, "%llu", u64, strict_strtoull);
+
+// From include/linux/moduleparam.h
+#define param_check_u64(name, p) __param_check(name, p, u64)
+
+MODULE_PARM_DESC(ccn504_addr, "CCN-504 physical base address");
+static u64 ccn504_addr = 0;
+module_param(ccn504_addr, u64, 0444);
+
+static void __iomem *gator_events_ccn504_base;
+static bool gator_events_ccn504_global_enabled;
+static unsigned long gator_events_ccn504_enabled[CNTMAX];
+static unsigned long gator_events_ccn504_event[CNTMAX];
+static unsigned long gator_events_ccn504_key[CNTMAX];
+static int gator_events_ccn504_buffer[2*CNTMAX];
+static int gator_events_ccn504_prev[CNTMAX];
+
+static void gator_events_ccn504_create_shutdown(void)
+{
+ if (gator_events_ccn504_base != NULL) {
+ iounmap(gator_events_ccn504_base);
+ }
+}
+
+static int gator_events_ccn504_create_files(struct super_block *sb, struct dentry *root)
+{
+ struct dentry *dir;
+ int i;
+ char buf[32];
+
+ for (i = 0; i < CNTMAX; ++i) {
+ if (i == CCNT) {
+ snprintf(buf, sizeof(buf), "CCN-504_ccnt");
+ } else {
+ snprintf(buf, sizeof(buf), "CCN-504_cnt%i", i);
+ }
+ dir = gatorfs_mkdir(sb, root, buf);
+ if (!dir) {
+ return -1;
+ }
+
+ gatorfs_create_ulong(sb, dir, "enabled", &gator_events_ccn504_enabled[i]);
+ if (i != CCNT) {
+ gatorfs_create_ulong(sb, dir, "event", &gator_events_ccn504_event[i]);
+ }
+ gatorfs_create_ro_ulong(sb, dir, "key", &gator_events_ccn504_key[i]);
+ }
+
+ return 0;
+}
+
+static void gator_events_ccn504_set_dt_config(int xp_node_id, int event_num, int value)
+{
+ u32 dt_config;
+
+ dt_config = readl(gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
+ dt_config |= (value + event_num) << (4*event_num);
+ writel(dt_config, gator_events_ccn504_base + (REGION_XP + xp_node_id)*REGION_SIZE + DT_CONFIG);
+}
+
+static int gator_events_ccn504_start(void)
+{
+ int i;
+
+ gator_events_ccn504_global_enabled = 0;
+ for (i = 0; i < CNTMAX; ++i) {
+ if (gator_events_ccn504_enabled[i]) {
+ gator_events_ccn504_global_enabled = 1;
+ break;
+ }
+ }
+
+ if (!gator_events_ccn504_global_enabled) {
+ return 0;
+ }
+
+ memset(&gator_events_ccn504_prev, 0x80, sizeof(gator_events_ccn504_prev));
+
+ // Disable INTREQ on overflow
+ // [6] ovfl_intr_en = 0
+ // perhaps set to 1?
+ // [5] cntr_rst = 0
+ // No register paring
+ // [4:1] cntcfg = 0
+ // Enable PMU features
+ // [0] pmu_en = 1
+ writel(0x1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMCR);
+
+ // Configure the XPs
+ for (i = 0; i < NUM_XPS; ++i) {
+ int dt_control;
+
+ // Pass on all events
+ writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG);
+
+ // Enable PMU capability
+ // [0] dt_enable = 1
+ dt_control = readl(gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONTROL);
+ dt_control |= 0x1;
+ writel(dt_control, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONTROL);
+ }
+
+ // Assume no other pmu_event_sel registers are set
+
+ // cycle counter does not need to be enabled
+ for (i = 0; i < CCNT; ++i) {
+ int pmu_event_id;
+ int node_type;
+ int region;
+ u32 pmu_event_sel;
+ u32 oly_id_whole;
+ u32 oly_id;
+ u32 node_id;
+
+ if (!gator_events_ccn504_enabled[i]) {
+ continue;
+ }
+
+ pmu_event_id = get_pmu_event_id(gator_events_ccn504_event[i]);
+ node_type = get_node_type(gator_events_ccn504_event[i]);
+ region = get_region(gator_events_ccn504_event[i]);
+
+ // Verify the node_type
+ oly_id_whole = readl(gator_events_ccn504_base + region*REGION_SIZE + OLY_ID);
+ oly_id = oly_id_whole & 0x1F;
+ node_id = (oly_id_whole >> 8) & 0x7F;
+ if ((oly_id != node_type) ||
+ ((node_type == 0x16) && ((oly_id != 0x14) && (oly_id != 0x15) && (oly_id != 0x16) && (oly_id != 0x18) && (oly_id != 0x19) && (oly_id != 0x1A)))) {
+ printk(KERN_ERR "gator: oly_id is 0x%x expected 0x%x\n", oly_id, node_type);
+ return -1;
+ }
+
+ // Set the control register
+ pmu_event_sel = readl(gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+ switch (node_type) {
+ case 0x08: // XP
+ pmu_event_sel |= pmu_event_id << (7*i);
+ gator_events_ccn504_set_dt_config(node_id, i, 0x4);
+ break;
+ case 0x04: // HN-F
+ case 0x16: // RN-I
+ case 0x10: // SBAS
+ pmu_event_sel |= pmu_event_id << (4*i);
+ gator_events_ccn504_set_dt_config(node_id/2, i, (node_id & 1) == 0 ? 0x8 : 0xC);
+ break;
+ }
+ writel(pmu_event_sel, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+ }
+
+ return 0;
+}
+
+static void gator_events_ccn504_stop(void)
+{
+ int i;
+
+ if (!gator_events_ccn504_global_enabled) {
+ return;
+ }
+
+ // cycle counter does not need to be disabled
+ for (i = 0; i < CCNT; ++i) {
+ int region;
+
+ if (!gator_events_ccn504_enabled[i]) {
+ continue;
+ }
+
+ region = get_region(gator_events_ccn504_event[i]);
+
+ writel(0, gator_events_ccn504_base + region*REGION_SIZE + PMU_EVENT_SEL);
+ }
+
+ // Clear dt_config
+ for (i = 0; i < NUM_XPS; ++i) {
+ writel(0, gator_events_ccn504_base + (REGION_XP + i)*REGION_SIZE + DT_CONFIG);
+ }
+}
+
+static int gator_events_ccn504_read(int **buffer)
+{
+ int i;
+ int len = 0;
+ int value;
+
+ if (!on_primary_core() || !gator_events_ccn504_global_enabled) {
+ return 0;
+ }
+
+ // Verify the pmsr register is zero
+ while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) != 0);
+
+ // Request a PMU snapshot
+ writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_REQ);
+
+ // Wait for the snapshot
+ while (readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR) == 0);
+
+ // Read the shadow registers
+ for (i = 0; i < CNTMAX; ++i) {
+ if (!gator_events_ccn504_enabled[i]) {
+ continue;
+ }
+
+ value = readl(gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + (i == CCNT ? PMCCNTRSR : PMEVCNTSR0 + 8*i));
+ if (gator_events_ccn504_prev[i] != 0x80808080) {
+ gator_events_ccn504_buffer[len++] = gator_events_ccn504_key[i];
+ gator_events_ccn504_buffer[len++] = value - gator_events_ccn504_prev[i];
+ }
+ gator_events_ccn504_prev[i] = value;
+
+ // Are the counters registers cleared when read? Is that what the cntr_rst bit on the pmcr register does?
+ }
+
+ // Clear the PMU snapshot status
+ writel(1, gator_events_ccn504_base + REGION_DEBUG*REGION_SIZE + PMSR_CLR);
+
+ if (buffer)
+ *buffer = gator_events_ccn504_buffer;
+
+ return len;
+}
+
+static struct gator_interface gator_events_ccn504_interface = {
+ .shutdown = gator_events_ccn504_create_shutdown,
+ .create_files = gator_events_ccn504_create_files,
+ .start = gator_events_ccn504_start,
+ .stop = gator_events_ccn504_stop,
+ .read = gator_events_ccn504_read,
+};
+
+int gator_events_ccn504_init(void)
+{
+ int i;
+
+ if (ccn504_addr == 0) {
+ return -1;
+ }
+
+ gator_events_ccn504_base = ioremap(ccn504_addr, NUM_REGIONS*REGION_SIZE);
+ if (gator_events_ccn504_base == NULL) {
+ printk(KERN_ERR "gator: ioremap returned NULL\n");
+ return -1;
+ }
+
+ for (i = 0; i < CNTMAX; ++i) {
+ gator_events_ccn504_enabled[i] = 0;
+ gator_events_ccn504_event[i] = 0;
+ gator_events_ccn504_key[i] = gator_events_get_key();
+ }
+
+ return gator_events_install(&gator_events_ccn504_interface);
+}
diff --git a/drivers/gator/gator_events_irq.c b/drivers/gator/gator_events_irq.c
new file mode 100644
index 0000000..b11879a
--- /dev/null
+++ b/drivers/gator/gator_events_irq.c
@@ -0,0 +1,165 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+#include <trace/events/irq.h>
+
+#define HARDIRQ 0
+#define SOFTIRQ 1
+#define TOTALIRQ (SOFTIRQ+1)
+
+static ulong hardirq_enabled;
+static ulong softirq_enabled;
+static ulong hardirq_key;
+static ulong softirq_key;
+static DEFINE_PER_CPU(atomic_t[TOTALIRQ], irqCnt);
+static DEFINE_PER_CPU(int[TOTALIRQ * 2], irqGet);
+
+GATOR_DEFINE_PROBE(irq_handler_exit,
+ TP_PROTO(int irq, struct irqaction *action, int ret))
+{
+ atomic_inc(&per_cpu(irqCnt, get_physical_cpu())[HARDIRQ]);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37)
+GATOR_DEFINE_PROBE(softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec))
+#else
+GATOR_DEFINE_PROBE(softirq_exit, TP_PROTO(unsigned int vec_nr))
+#endif
+{
+ atomic_inc(&per_cpu(irqCnt, get_physical_cpu())[SOFTIRQ]);
+}
+
+static int gator_events_irq_create_files(struct super_block *sb, struct dentry *root)
+{
+ struct dentry *dir;
+
+ /* irq */
+ dir = gatorfs_mkdir(sb, root, "Linux_irq_irq");
+ if (!dir) {
+ return -1;
+ }
+ gatorfs_create_ulong(sb, dir, "enabled", &hardirq_enabled);
+ gatorfs_create_ro_ulong(sb, dir, "key", &hardirq_key);
+
+ /* soft irq */
+ dir = gatorfs_mkdir(sb, root, "Linux_irq_softirq");
+ if (!dir) {
+ return -1;
+ }
+ gatorfs_create_ulong(sb, dir, "enabled", &softirq_enabled);
+ gatorfs_create_ro_ulong(sb, dir, "key", &softirq_key);
+
+ return 0;
+}
+
+static int gator_events_irq_online(int **buffer, bool migrate)
+{
+ int len = 0, cpu = get_physical_cpu();
+
+ // synchronization with the irq_exit functions is not necessary as the values are being reset
+ if (hardirq_enabled) {
+ atomic_set(&per_cpu(irqCnt, cpu)[HARDIRQ], 0);
+ per_cpu(irqGet, cpu)[len++] = hardirq_key;
+ per_cpu(irqGet, cpu)[len++] = 0;
+ }
+
+ if (softirq_enabled) {
+ atomic_set(&per_cpu(irqCnt, cpu)[SOFTIRQ], 0);
+ per_cpu(irqGet, cpu)[len++] = softirq_key;
+ per_cpu(irqGet, cpu)[len++] = 0;
+ }
+
+ if (buffer)
+ *buffer = per_cpu(irqGet, cpu);
+
+ return len;
+}
+
+static int gator_events_irq_start(void)
+{
+ // register tracepoints
+ if (hardirq_enabled)
+ if (GATOR_REGISTER_TRACE(irq_handler_exit))
+ goto fail_hardirq_exit;
+ if (softirq_enabled)
+ if (GATOR_REGISTER_TRACE(softirq_exit))
+ goto fail_softirq_exit;
+ pr_debug("gator: registered irq tracepoints\n");
+
+ return 0;
+
+ // unregister tracepoints on error
+fail_softirq_exit:
+ if (hardirq_enabled)
+ GATOR_UNREGISTER_TRACE(irq_handler_exit);
+fail_hardirq_exit:
+ pr_err("gator: irq tracepoints failed to activate, please verify that tracepoints are enabled in the linux kernel\n");
+
+ return -1;
+}
+
+static void gator_events_irq_stop(void)
+{
+ if (hardirq_enabled)
+ GATOR_UNREGISTER_TRACE(irq_handler_exit);
+ if (softirq_enabled)
+ GATOR_UNREGISTER_TRACE(softirq_exit);
+ pr_debug("gator: unregistered irq tracepoints\n");
+
+ hardirq_enabled = 0;
+ softirq_enabled = 0;
+}
+
+static int gator_events_irq_read(int **buffer)
+{
+ int len, value;
+ int cpu = get_physical_cpu();
+
+ len = 0;
+ if (hardirq_enabled) {
+ value = atomic_read(&per_cpu(irqCnt, cpu)[HARDIRQ]);
+ atomic_sub(value, &per_cpu(irqCnt, cpu)[HARDIRQ]);
+
+ per_cpu(irqGet, cpu)[len++] = hardirq_key;
+ per_cpu(irqGet, cpu)[len++] = value;
+ }
+
+ if (softirq_enabled) {
+ value = atomic_read(&per_cpu(irqCnt, cpu)[SOFTIRQ]);
+ atomic_sub(value, &per_cpu(irqCnt, cpu)[SOFTIRQ]);
+
+ per_cpu(irqGet, cpu)[len++] = softirq_key;
+ per_cpu(irqGet, cpu)[len++] = value;
+ }
+
+ if (buffer)
+ *buffer = per_cpu(irqGet, cpu);
+
+ return len;
+}
+
+static struct gator_interface gator_events_irq_interface = {
+ .create_files = gator_events_irq_create_files,
+ .online = gator_events_irq_online,
+ .start = gator_events_irq_start,
+ .stop = gator_events_irq_stop,
+ .read = gator_events_irq_read,
+};
+
+int gator_events_irq_init(void)
+{
+ hardirq_key = gator_events_get_key();
+ softirq_key = gator_events_get_key();
+
+ hardirq_enabled = 0;
+ softirq_enabled = 0;
+
+ return gator_events_install(&gator_events_irq_interface);
+}
diff --git a/drivers/gator/gator_events_l2c-310.c b/drivers/gator/gator_events_l2c-310.c
new file mode 100644
index 0000000..ee521af
--- /dev/null
+++ b/drivers/gator/gator_events_l2c-310.c
@@ -0,0 +1,208 @@
+/**
+ * l2c310 (L2 Cache Controller) event counters for gator
+ *
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#if defined(CONFIG_OF)
+#include <linux/of.h>
+#include <linux/of_address.h>
+#endif
+#include <asm/hardware/cache-l2x0.h>
+
+#include "gator.h"
+
+#define L2C310_COUNTERS_NUM 2
+
+static struct {
+ unsigned long enabled;
+ unsigned long event;
+ unsigned long key;
+} l2c310_counters[L2C310_COUNTERS_NUM];
+
+static int l2c310_buffer[L2C310_COUNTERS_NUM * 2];
+
+static void __iomem *l2c310_base;
+
+static void gator_events_l2c310_reset_counters(void)
+{
+ u32 val = readl(l2c310_base + L2X0_EVENT_CNT_CTRL);
+
+ val |= ((1 << L2C310_COUNTERS_NUM) - 1) << 1;
+
+ writel(val, l2c310_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static int gator_events_l2c310_create_files(struct super_block *sb,
+ struct dentry *root)
+{
+ int i;
+
+ for (i = 0; i < L2C310_COUNTERS_NUM; i++) {
+ char buf[16];
+ struct dentry *dir;
+
+ snprintf(buf, sizeof(buf), "L2C-310_cnt%d", i);
+ dir = gatorfs_mkdir(sb, root, buf);
+ if (WARN_ON(!dir))
+ return -1;
+ gatorfs_create_ulong(sb, dir, "enabled",
+ &l2c310_counters[i].enabled);
+ gatorfs_create_ulong(sb, dir, "event",
+ &l2c310_counters[i].event);
+ gatorfs_create_ro_ulong(sb, dir, "key",
+ &l2c310_counters[i].key);
+ }
+
+ return 0;
+}
+
+static int gator_events_l2c310_start(void)
+{
+ static const unsigned long l2x0_event_cntx_cfg[L2C310_COUNTERS_NUM] = {
+ L2X0_EVENT_CNT0_CFG,
+ L2X0_EVENT_CNT1_CFG,
+ };
+ int i;
+
+ /* Counter event sources */
+ for (i = 0; i < L2C310_COUNTERS_NUM; i++)
+ writel((l2c310_counters[i].event & 0xf) << 2,
+ l2c310_base + l2x0_event_cntx_cfg[i]);
+
+ gator_events_l2c310_reset_counters();
+
+ /* Event counter enable */
+ writel(1, l2c310_base + L2X0_EVENT_CNT_CTRL);
+
+ return 0;
+}
+
+static void gator_events_l2c310_stop(void)
+{
+ /* Event counter disable */
+ writel(0, l2c310_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static int gator_events_l2c310_read(int **buffer)
+{
+ static const unsigned long l2x0_event_cntx_val[L2C310_COUNTERS_NUM] = {
+ L2X0_EVENT_CNT0_VAL,
+ L2X0_EVENT_CNT1_VAL,
+ };
+ int i;
+ int len = 0;
+
+ if (!on_primary_core())
+ return 0;
+
+ for (i = 0; i < L2C310_COUNTERS_NUM; i++) {
+ if (l2c310_counters[i].enabled) {
+ l2c310_buffer[len++] = l2c310_counters[i].key;
+ l2c310_buffer[len++] = readl(l2c310_base +
+ l2x0_event_cntx_val[i]);
+ }
+ }
+
+ /* l2c310 counters are saturating, not wrapping in case of overflow */
+ gator_events_l2c310_reset_counters();
+
+ if (buffer)
+ *buffer = l2c310_buffer;
+
+ return len;
+}
+
+static struct gator_interface gator_events_l2c310_interface = {
+ .create_files = gator_events_l2c310_create_files,
+ .start = gator_events_l2c310_start,
+ .stop = gator_events_l2c310_stop,
+ .read = gator_events_l2c310_read,
+};
+
+#define L2C310_ADDR_PROBE (~0)
+
+MODULE_PARM_DESC(l2c310_addr, "L2C310 physical base address (0 to disable)");
+static unsigned long l2c310_addr = L2C310_ADDR_PROBE;
+module_param(l2c310_addr, ulong, 0444);
+
+static void __iomem *gator_events_l2c310_probe(void)
+{
+ phys_addr_t variants[] = {
+#if defined(CONFIG_ARCH_EXYNOS4) || defined(CONFIG_ARCH_S5PV310)
+ 0x10502000,
+#endif
+#if defined(CONFIG_ARCH_OMAP4)
+ 0x48242000,
+#endif
+#if defined(CONFIG_ARCH_TEGRA)
+ 0x50043000,
+#endif
+#if defined(CONFIG_ARCH_U8500)
+ 0xa0412000,
+#endif
+#if defined(CONFIG_ARCH_VEXPRESS)
+ 0x1e00a000, // A9x4 core tile (HBI-0191)
+ 0x2c0f0000, // New memory map tiles
+#endif
+ };
+ int i;
+ void __iomem *base;
+#if defined(CONFIG_OF)
+ struct device_node *node = of_find_all_nodes(NULL);
+
+ if (node) {
+ of_node_put(node);
+
+ node = of_find_compatible_node(NULL, NULL, "arm,pl310-cache");
+ base = of_iomap(node, 0);
+ of_node_put(node);
+
+ return base;
+ }
+#endif
+
+ for (i = 0; i < ARRAY_SIZE(variants); i++) {
+ base = ioremap(variants[i], SZ_4K);
+ if (base) {
+ u32 cache_id = readl(base + L2X0_CACHE_ID);
+
+ if ((cache_id & 0xff0003c0) == 0x410000c0)
+ return base;
+
+ iounmap(base);
+ }
+ }
+
+ return NULL;
+}
+
+int gator_events_l2c310_init(void)
+{
+ int i;
+
+ if (gator_cpuid() != CORTEX_A5 && gator_cpuid() != CORTEX_A9)
+ return -1;
+
+ if (l2c310_addr == L2C310_ADDR_PROBE)
+ l2c310_base = gator_events_l2c310_probe();
+ else if (l2c310_addr)
+ l2c310_base = ioremap(l2c310_addr, SZ_4K);
+
+ if (!l2c310_base)
+ return -1;
+
+ for (i = 0; i < L2C310_COUNTERS_NUM; i++) {
+ l2c310_counters[i].enabled = 0;
+ l2c310_counters[i].key = gator_events_get_key();
+ }
+
+ return gator_events_install(&gator_events_l2c310_interface);
+}
diff --git a/drivers/gator/gator_events_mali_4xx.c b/drivers/gator/gator_events_mali_4xx.c
new file mode 100644
index 0000000..6719c1e
--- /dev/null
+++ b/drivers/gator/gator_events_mali_4xx.c
@@ -0,0 +1,723 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+
+#include "linux/mali_linux_trace.h"
+
+#include "gator_events_mali_common.h"
+#include "gator_events_mali_4xx.h"
+
+/*
+ * There are (currently) four different variants of the comms between gator and Mali:
+ * 1 (deprecated): No software counter support
+ * 2 (deprecated): Tracepoint called for each separate s/w counter value as it appears
+ * 3 (default): Single tracepoint for all s/w counters in a bundle.
+ * Interface style 3 is the default if no other is specified. 1 and 2 will be eliminated when
+ * existing Mali DDKs are upgraded.
+ * 4. As above, but for the Utgard (Mali-450) driver.
+ */
+
+#if !defined(GATOR_MALI_INTERFACE_STYLE)
+#define GATOR_MALI_INTERFACE_STYLE (3)
+#endif
+
+#if GATOR_MALI_INTERFACE_STYLE < 4
+#include "mali/mali_mjollnir_profiling_gator_api.h"
+#else
+#include "mali/mali_utgard_profiling_gator_api.h"
+#endif
+
+/*
+ * Check that the MALI_SUPPORT define is set to one of the allowable device codes.
+ */
+#if (MALI_SUPPORT != MALI_4xx)
+#error MALI_SUPPORT set to an invalid device code: expecting MALI_4xx
+#endif
+
+/* gatorfs variables for counter enable state,
+ * the event the counter should count and the
+ * 'key' (a unique id set by gatord and returned
+ * by gator.ko)
+ */
+static unsigned long counter_enabled[NUMBER_OF_EVENTS];
+static unsigned long counter_event[NUMBER_OF_EVENTS];
+static unsigned long counter_key[NUMBER_OF_EVENTS];
+
+/* The data we have recorded */
+static u32 counter_data[NUMBER_OF_EVENTS];
+/* The address to sample (or 0 if samples are sent to us) */
+static u32 *counter_address[NUMBER_OF_EVENTS];
+
+/* An array used to return the data we recorded
+ * as key,value pairs hence the *2
+ */
+static unsigned long counter_dump[NUMBER_OF_EVENTS * 2];
+static unsigned long counter_prev[NUMBER_OF_EVENTS];
+
+/* Note whether tracepoints have been registered */
+static int trace_registered;
+
+/*
+ * These numbers define the actual numbers of each block type that exist in the system. Initially
+ * these are set to the maxima defined above; if the driver is capable of being queried (newer
+ * drivers only) then the values may be revised.
+ */
+static unsigned int n_vp_cores = MAX_NUM_VP_CORES;
+static unsigned int n_l2_cores = MAX_NUM_L2_CACHE_CORES;
+static unsigned int n_fp_cores = MAX_NUM_FP_CORES;
+
+/**
+ * Calculate the difference and handle the overflow.
+ */
+static u32 get_difference(u32 start, u32 end)
+{
+ if (start - end >= 0) {
+ return start - end;
+ }
+
+ // Mali counters are unsigned 32 bit values that wrap.
+ return (4294967295u - end) + start;
+}
+
+/**
+ * Returns non-zero if the given counter ID is an activity counter.
+ */
+static inline int is_activity_counter(unsigned int event_id)
+{
+ return (event_id >= FIRST_ACTIVITY_EVENT &&
+ event_id <= LAST_ACTIVITY_EVENT);
+}
+
+/**
+ * Returns non-zero if the given counter ID is a hardware counter.
+ */
+static inline int is_hw_counter(unsigned int event_id)
+{
+ return (event_id >= FIRST_HW_COUNTER && event_id <= LAST_HW_COUNTER);
+}
+
+/*
+ * These are provided for utgard compatibility.
+ */
+typedef void _mali_profiling_get_mali_version_type(struct _mali_profiling_mali_version *values);
+typedef u32 _mali_profiling_get_l2_counters_type(_mali_profiling_l2_counter_values *values);
+
+#if GATOR_MALI_INTERFACE_STYLE == 2
+/**
+ * Returns non-zero if the given counter ID is a software counter.
+ */
+static inline int is_sw_counter(unsigned int event_id)
+{
+ return (event_id >= FIRST_SW_COUNTER && event_id <= LAST_SW_COUNTER);
+}
+#endif
+
+#if GATOR_MALI_INTERFACE_STYLE == 2
+/*
+ * The Mali DDK uses s64 types to contain software counter values, but gator
+ * can only use a maximum of 32 bits. This function scales a software counter
+ * to an appropriate range.
+ */
+static u32 scale_sw_counter_value(unsigned int event_id, signed long long value)
+{
+ u32 scaled_value;
+
+ switch (event_id) {
+ case COUNTER_GLES_UPLOAD_TEXTURE_TIME:
+ case COUNTER_GLES_UPLOAD_VBO_TIME:
+ scaled_value = (u32)div_s64(value, 1000000);
+ break;
+ default:
+ scaled_value = (u32)value;
+ break;
+ }
+
+ return scaled_value;
+}
+#endif
+
+/* Probe for continuously sampled counter */
+#if 0 //WE_DONT_CURRENTLY_USE_THIS_SO_SUPPRESS_WARNING
+GATOR_DEFINE_PROBE(mali_sample_address, TP_PROTO(unsigned int event_id, u32 *addr))
+{
+ /* Turning on too many pr_debug statements in frequently called functions
+ * can cause stability and/or performance problems
+ */
+ //pr_debug("gator: mali_sample_address %d %d\n", event_id, addr);
+ if (event_id >= ACTIVITY_VP && event_id <= COUNTER_FP3_C1) {
+ counter_address[event_id] = addr;
+ }
+}
+#endif
+
+/* Probe for hardware counter events */
+GATOR_DEFINE_PROBE(mali_hw_counter, TP_PROTO(unsigned int event_id, unsigned int value))
+{
+ /* Turning on too many pr_debug statements in frequently called functions
+ * can cause stability and/or performance problems
+ */
+ //pr_debug("gator: mali_hw_counter %d %d\n", event_id, value);
+ if (is_hw_counter(event_id)) {
+ counter_data[event_id] = value;
+ }
+}
+
+#if GATOR_MALI_INTERFACE_STYLE == 2
+GATOR_DEFINE_PROBE(mali_sw_counter, TP_PROTO(unsigned int event_id, signed long long value))
+{
+ if (is_sw_counter(event_id)) {
+ counter_data[event_id] = scale_sw_counter_value(event_id, value);
+ }
+}
+#endif /* GATOR_MALI_INTERFACE_STYLE == 2 */
+
+#if GATOR_MALI_INTERFACE_STYLE >= 3
+GATOR_DEFINE_PROBE(mali_sw_counters, TP_PROTO(pid_t pid, pid_t tid, void *surface_id, unsigned int *counters))
+{
+ u32 i;
+
+ /* Copy over the values for those counters which are enabled. */
+ for (i = FIRST_SW_COUNTER; i <= LAST_SW_COUNTER; i++) {
+ if (counter_enabled[i]) {
+ counter_data[i] = (u32)(counters[i - FIRST_SW_COUNTER]);
+ }
+ }
+}
+#endif /* GATOR_MALI_INTERFACE_STYLE >= 3 */
+
+/**
+ * Create a single filesystem entry for a specified event.
+ * @param sb the superblock
+ * @param root Filesystem root
+ * @param name The name of the entry to create
+ * @param event The ID of the event
+ * @param create_event_item boolean indicating whether to create an 'event' filesystem entry. True to create.
+ *
+ * @return 0 if ok, non-zero if the create failed.
+ */
+static int create_fs_entry(struct super_block *sb, struct dentry *root, const char *name, int event, int create_event_item)
+{
+ struct dentry *dir;
+
+ dir = gatorfs_mkdir(sb, root, name);
+
+ if (!dir) {
+ return -1;
+ }
+
+ if (create_event_item) {
+ gatorfs_create_ulong(sb, dir, "event", &counter_event[event]);
+ }
+
+ gatorfs_create_ulong(sb, dir, "enabled", &counter_enabled[event]);
+ gatorfs_create_ro_ulong(sb, dir, "key", &counter_key[event]);
+
+ return 0;
+}
+
+#if GATOR_MALI_INTERFACE_STYLE > 3
+/*
+ * Read the version info structure if available
+ */
+static void initialise_version_info(void)
+{
+ _mali_profiling_get_mali_version_type *mali_profiling_get_mali_version_symbol;
+
+ mali_profiling_get_mali_version_symbol = symbol_get(_mali_profiling_get_mali_version);
+
+ if (mali_profiling_get_mali_version_symbol) {
+ struct _mali_profiling_mali_version version_info;
+
+ pr_debug("gator: mali online _mali_profiling_get_mali_version symbol @ %p\n",
+ mali_profiling_get_mali_version_symbol);
+
+ /*
+ * Revise the number of each different core type using information derived from the DDK.
+ */
+ mali_profiling_get_mali_version_symbol(&version_info);
+
+ n_fp_cores = version_info.num_of_fp_cores;
+ n_vp_cores = version_info.num_of_vp_cores;
+ n_l2_cores = version_info.num_of_l2_cores;
+
+ /* Release the function - we're done with it. */
+ symbol_put(_mali_profiling_get_mali_version);
+ } else {
+ printk("gator: mali online _mali_profiling_get_mali_version symbol not found\n");
+ }
+}
+#endif
+
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+ int event;
+ const char *mali_name = gator_mali_get_mali_name();
+
+ char buf[40];
+ int core_id;
+ int counter_number;
+
+ pr_debug("gator: Initialising counters with style = %d\n", GATOR_MALI_INTERFACE_STYLE);
+
+#if GATOR_MALI_INTERFACE_STYLE > 3
+ /*
+ * Initialise first: this sets up the number of cores available (on compatible DDK versions).
+ * Ideally this would not need guarding but other parts of the code depend on the interface style being set
+ * correctly; if it is not then the system can enter an inconsistent state.
+ */
+ initialise_version_info();
+#endif
+
+ /* Vertex processor counters */
+ for (core_id = 0; core_id < n_vp_cores; core_id++) {
+ int activity_counter_id = ACTIVITY_VP_0;
+ snprintf(buf, sizeof buf, "ARM_%s_VP_%d_active", mali_name, core_id);
+ if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) {
+ return -1;
+ }
+
+ for (counter_number = 0; counter_number < 2; counter_number++) {
+ int counter_id = COUNTER_VP_0_C0 + (2 * core_id) + counter_number;
+
+ snprintf(buf, sizeof buf, "ARM_%s_VP_%d_cnt%d", mali_name, core_id, counter_number);
+ if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+ return -1;
+ }
+ }
+ }
+
+ /* Fragment processors' counters */
+ for (core_id = 0; core_id < n_fp_cores; core_id++) {
+ int activity_counter_id = ACTIVITY_FP_0 + core_id;
+
+ snprintf(buf, sizeof buf, "ARM_%s_FP_%d_active", mali_name, core_id);
+ if (create_fs_entry(sb, root, buf, activity_counter_id, 0) != 0) {
+ return -1;
+ }
+
+ for (counter_number = 0; counter_number < 2; counter_number++) {
+ int counter_id = COUNTER_FP_0_C0 + (2 * core_id) + counter_number;
+
+ snprintf(buf, sizeof buf, "ARM_%s_FP_%d_cnt%d", mali_name, core_id, counter_number);
+ if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+ return -1;
+ }
+ }
+ }
+
+ /* L2 Cache counters */
+ for (core_id = 0; core_id < n_l2_cores; core_id++) {
+ for (counter_number = 0; counter_number < 2; counter_number++) {
+ int counter_id = COUNTER_L2_0_C0 + (2 * core_id) + counter_number;
+
+ snprintf(buf, sizeof buf, "ARM_%s_L2_%d_cnt%d", mali_name, core_id, counter_number);
+ if (create_fs_entry(sb, root, buf, counter_id, 1) != 0) {
+ return -1;
+ }
+ }
+ }
+
+ /* Now set up the software counter entries */
+ for (event = FIRST_SW_COUNTER; event <= LAST_SW_COUNTER; event++) {
+ snprintf(buf, sizeof(buf), "ARM_%s_SW_%d", mali_name, event - FIRST_SW_COUNTER);
+
+ if (create_fs_entry(sb, root, buf, event, 0) != 0) {
+ return -1;
+ }
+ }
+
+ /* Now set up the special counter entries */
+ snprintf(buf, sizeof(buf), "ARM_%s_Filmstrip_cnt0", mali_name);
+ if (create_fs_entry(sb, root, buf, COUNTER_FILMSTRIP, 1) != 0) {
+ return -1;
+ }
+
+#ifdef DVFS_REPORTED_BY_DDK
+ snprintf(buf, sizeof(buf), "ARM_%s_Frequency", mali_name);
+ if (create_fs_entry(sb, root, buf, COUNTER_FREQUENCY, 1) != 0) {
+ return -1;
+ }
+
+ snprintf(buf, sizeof(buf), "ARM_%s_Voltage", mali_name);
+ if (create_fs_entry(sb, root, buf, COUNTER_VOLTAGE, 1) != 0) {
+ return -1;
+ }
+#endif
+
+ return 0;
+}
+
+/*
+ * Local store for the get_counters entry point into the DDK.
+ * This is stored here since it is used very regularly.
+ */
+static mali_profiling_get_counters_type *mali_get_counters = NULL;
+static _mali_profiling_get_l2_counters_type *mali_get_l2_counters = NULL;
+
+/*
+ * Examine list of counters between two index limits and determine if any one is enabled.
+ * Returns 1 if any counter is enabled, 0 if none is.
+ */
+static int is_any_counter_enabled(unsigned int first_counter, unsigned int last_counter)
+{
+ unsigned int i;
+
+ for (i = first_counter; i <= last_counter; i++) {
+ if (counter_enabled[i]) {
+ return 1; /* At least one counter is enabled */
+ }
+ }
+
+ return 0; /* No s/w counters enabled */
+}
+
+static void init_counters(unsigned int from_counter, unsigned int to_counter)
+{
+ unsigned int counter_id;
+
+ /* If a Mali driver is present and exporting the appropriate symbol
+ * then we can request the HW counters (of which there are only 2)
+ * be configured to count the desired events
+ */
+ mali_profiling_set_event_type *mali_set_hw_event;
+
+ mali_set_hw_event = symbol_get(_mali_profiling_set_event);
+
+ if (mali_set_hw_event) {
+ pr_debug("gator: mali online _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
+
+ for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
+ if (counter_enabled[counter_id]) {
+ mali_set_hw_event(counter_id, counter_event[counter_id]);
+ } else {
+ mali_set_hw_event(counter_id, 0xFFFFFFFF);
+ }
+ }
+
+ symbol_put(_mali_profiling_set_event);
+ } else {
+ printk("gator: mali online _mali_profiling_set_event symbol not found\n");
+ }
+}
+
+static void mali_counter_initialize(void)
+{
+ int i;
+ int core_id;
+
+ mali_profiling_control_type *mali_control;
+
+ init_counters(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores) - 1);
+ init_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1);
+ init_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1);
+
+ /* Generic control interface for Mali DDK. */
+ mali_control = symbol_get(_mali_profiling_control);
+ if (mali_control) {
+ /* The event attribute in the XML file keeps the actual frame rate. */
+ unsigned int rate = counter_event[COUNTER_FILMSTRIP] & 0xff;
+ unsigned int resize_factor = (counter_event[COUNTER_FILMSTRIP] >> 8) & 0xff;
+
+ pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control);
+
+ mali_control(SW_COUNTER_ENABLE, (is_any_counter_enabled(FIRST_SW_COUNTER, LAST_SW_COUNTER) ? 1 : 0));
+ mali_control(FBDUMP_CONTROL_ENABLE, (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0));
+ mali_control(FBDUMP_CONTROL_RATE, rate);
+ mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor);
+
+ pr_debug("gator: sent mali_control enabled=%d, rate=%d\n", (counter_enabled[COUNTER_FILMSTRIP] ? 1 : 0), rate);
+
+ symbol_put(_mali_profiling_control);
+ } else {
+ printk("gator: mali online _mali_profiling_control symbol not found\n");
+ }
+
+ mali_get_counters = symbol_get(_mali_profiling_get_counters);
+ if (mali_get_counters) {
+ pr_debug("gator: mali online _mali_profiling_get_counters symbol @ %p\n", mali_get_counters);
+
+ } else {
+ pr_debug("gator WARNING: mali _mali_profiling_get_counters symbol not defined");
+ }
+
+ mali_get_l2_counters = symbol_get(_mali_profiling_get_l2_counters);
+ if (mali_get_l2_counters) {
+ pr_debug("gator: mali online _mali_profiling_get_l2_counters symbol @ %p\n", mali_get_l2_counters);
+
+ } else {
+ pr_debug("gator WARNING: mali _mali_profiling_get_l2_counters symbol not defined");
+ }
+
+ if (!mali_get_counters && !mali_get_l2_counters) {
+ pr_debug("gator: WARNING: no L2 counters available");
+ n_l2_cores = 0;
+ }
+
+ for (core_id = 0; core_id < n_l2_cores; core_id++) {
+ int counter_id = COUNTER_L2_0_C0 + (2 * core_id);
+ counter_prev[counter_id] = 0;
+ counter_prev[counter_id + 1] = 0;
+ }
+
+ /* Clear counters in the start */
+ for (i = 0; i < NUMBER_OF_EVENTS; i++) {
+ counter_data[i] = 0;
+ }
+}
+
+static void mali_counter_deinitialize(void)
+{
+ mali_profiling_set_event_type *mali_set_hw_event;
+ mali_profiling_control_type *mali_control;
+
+ mali_set_hw_event = symbol_get(_mali_profiling_set_event);
+
+ if (mali_set_hw_event) {
+ int i;
+
+ pr_debug("gator: mali offline _mali_profiling_set_event symbol @ %p\n", mali_set_hw_event);
+ for (i = FIRST_HW_COUNTER; i <= LAST_HW_COUNTER; i++) {
+ mali_set_hw_event(i, 0xFFFFFFFF);
+ }
+
+ symbol_put(_mali_profiling_set_event);
+ } else {
+ printk("gator: mali offline _mali_profiling_set_event symbol not found\n");
+ }
+
+ /* Generic control interface for Mali DDK. */
+ mali_control = symbol_get(_mali_profiling_control);
+
+ if (mali_control) {
+ pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_control);
+
+ /* Reset the DDK state - disable counter collection */
+ mali_control(SW_COUNTER_ENABLE, 0);
+
+ mali_control(FBDUMP_CONTROL_ENABLE, 0);
+
+ symbol_put(_mali_profiling_control);
+ } else {
+ printk("gator: mali offline _mali_profiling_control symbol not found\n");
+ }
+
+ if (mali_get_counters) {
+ symbol_put(_mali_profiling_get_counters);
+ }
+
+ if (mali_get_l2_counters) {
+ symbol_put(_mali_profiling_get_l2_counters);
+ }
+}
+
+static int start(void)
+{
+ // register tracepoints
+ if (GATOR_REGISTER_TRACE(mali_hw_counter)) {
+ printk("gator: mali_hw_counter tracepoint failed to activate\n");
+ return -1;
+ }
+
+#if GATOR_MALI_INTERFACE_STYLE == 1
+ /* None. */
+#elif GATOR_MALI_INTERFACE_STYLE == 2
+ /* For patched Mali driver. */
+ if (GATOR_REGISTER_TRACE(mali_sw_counter)) {
+ printk("gator: mali_sw_counter tracepoint failed to activate\n");
+ return -1;
+ }
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
+ /* For Mali drivers with built-in support. */
+ if (GATOR_REGISTER_TRACE(mali_sw_counters)) {
+ printk("gator: mali_sw_counters tracepoint failed to activate\n");
+ return -1;
+ }
+#else
+#error Unknown GATOR_MALI_INTERFACE_STYLE option.
+#endif
+
+ trace_registered = 1;
+
+ mali_counter_initialize();
+ return 0;
+}
+
+static void stop(void)
+{
+ unsigned int cnt;
+
+ pr_debug("gator: mali stop\n");
+
+ if (trace_registered) {
+ GATOR_UNREGISTER_TRACE(mali_hw_counter);
+
+#if GATOR_MALI_INTERFACE_STYLE == 1
+ /* None. */
+#elif GATOR_MALI_INTERFACE_STYLE == 2
+ /* For patched Mali driver. */
+ GATOR_UNREGISTER_TRACE(mali_sw_counter);
+#elif GATOR_MALI_INTERFACE_STYLE >= 3
+ /* For Mali drivers with built-in support. */
+ GATOR_UNREGISTER_TRACE(mali_sw_counters);
+#else
+#error Unknown GATOR_MALI_INTERFACE_STYLE option.
+#endif
+
+ pr_debug("gator: mali timeline tracepoint deactivated\n");
+
+ trace_registered = 0;
+ }
+
+ for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
+ counter_enabled[cnt] = 0;
+ counter_event[cnt] = 0;
+ counter_address[cnt] = NULL;
+ }
+
+ mali_counter_deinitialize();
+}
+
+static void dump_counters(unsigned int from_counter, unsigned int to_counter, unsigned int *len)
+{
+ unsigned int counter_id;
+
+ for (counter_id = from_counter; counter_id <= to_counter; counter_id++) {
+ if (counter_enabled[counter_id]) {
+ counter_dump[(*len)++] = counter_key[counter_id];
+ counter_dump[(*len)++] = counter_data[counter_id];
+
+ counter_data[counter_id] = 0;
+ }
+ }
+}
+
+static int read(int **buffer)
+{
+ int len = 0;
+
+ if (!on_primary_core())
+ return 0;
+
+ // Read the L2 C0 and C1 here.
+ if (n_l2_cores > 0 && is_any_counter_enabled(COUNTER_L2_0_C0, COUNTER_L2_0_C0 + (2 * n_l2_cores))) {
+ unsigned int unavailable_l2_caches = 0;
+ _mali_profiling_l2_counter_values cache_values;
+ unsigned int cache_id;
+ struct _mali_profiling_core_counters *per_core;
+
+ /* Poke the driver to get the counter values - older style; only one L2 cache */
+ if (mali_get_l2_counters) {
+ unavailable_l2_caches = mali_get_l2_counters(&cache_values);
+ } else if (mali_get_counters) {
+ per_core = &cache_values.cores[0];
+ mali_get_counters(&per_core->source0, &per_core->value0, &per_core->source1, &per_core->value1);
+ } else {
+ /* This should never happen, as n_l2_caches is only set > 0 if one of the above functions is found. */
+ }
+
+ /* Fill in the two cache counter values for each cache block. */
+ for (cache_id = 0; cache_id < n_l2_cores; cache_id++) {
+ unsigned int counter_id_0 = COUNTER_L2_0_C0 + (2 * cache_id);
+ unsigned int counter_id_1 = counter_id_0 + 1;
+
+ if ((1 << cache_id) & unavailable_l2_caches) {
+ continue; /* This cache is unavailable (powered-off, possibly). */
+ }
+
+ per_core = &cache_values.cores[cache_id];
+
+ if (counter_enabled[counter_id_0]) {
+ // Calculate and save src0's counter val0
+ counter_dump[len++] = counter_key[counter_id_0];
+ counter_dump[len++] = get_difference(per_core->value0, counter_prev[counter_id_0]);
+ }
+
+ if (counter_enabled[counter_id_1]) {
+ // Calculate and save src1's counter val1
+ counter_dump[len++] = counter_key[counter_id_1];
+ counter_dump[len++] = get_difference(per_core->value1, counter_prev[counter_id_1]);
+ }
+
+ // Save the previous values for the counters.
+ counter_prev[counter_id_0] = per_core->value0;
+ counter_prev[counter_id_1] = per_core->value1;
+ }
+ }
+
+ /* Process other (non-timeline) counters. */
+ dump_counters(COUNTER_VP_0_C0, COUNTER_VP_0_C0 + (2 * n_vp_cores) - 1, &len);
+ dump_counters(COUNTER_FP_0_C0, COUNTER_FP_0_C0 + (2 * n_fp_cores) - 1, &len);
+
+ dump_counters(FIRST_SW_COUNTER, LAST_SW_COUNTER, &len);
+
+#ifdef DVFS_REPORTED_BY_DDK
+ {
+ int cnt;
+ /*
+ * Add in the voltage and frequency counters if enabled. Note that, since these are
+ * actually passed as events, the counter value should not be cleared.
+ */
+ cnt = COUNTER_FREQUENCY;
+ if (counter_enabled[cnt]) {
+ counter_dump[len++] = counter_key[cnt];
+ counter_dump[len++] = counter_data[cnt];
+ }
+
+ cnt = COUNTER_VOLTAGE;
+ if (counter_enabled[cnt]) {
+ counter_dump[len++] = counter_key[cnt];
+ counter_dump[len++] = counter_data[cnt];
+ }
+ }
+#endif
+
+ if (buffer) {
+ *buffer = (int *)counter_dump;
+ }
+
+ return len;
+}
+
+static struct gator_interface gator_events_mali_interface = {
+ .create_files = create_files,
+ .start = start,
+ .stop = stop,
+ .read = read,
+};
+
+extern void gator_events_mali_log_dvfs_event(unsigned int frequency_mhz, unsigned int voltage_mv)
+{
+#ifdef DVFS_REPORTED_BY_DDK
+ counter_data[COUNTER_FREQUENCY] = frequency_mhz;
+ counter_data[COUNTER_VOLTAGE] = voltage_mv;
+#endif
+}
+
+int gator_events_mali_init(void)
+{
+ unsigned int cnt;
+
+ pr_debug("gator: mali init\n");
+
+ for (cnt = 0; cnt < NUMBER_OF_EVENTS; cnt++) {
+ counter_enabled[cnt] = 0;
+ counter_event[cnt] = 0;
+ counter_key[cnt] = gator_events_get_key();
+ counter_address[cnt] = NULL;
+ counter_data[cnt] = 0;
+ }
+
+ trace_registered = 0;
+
+ return gator_events_install(&gator_events_mali_interface);
+}
diff --git a/drivers/gator/gator_events_mali_4xx.h b/drivers/gator/gator_events_mali_4xx.h
new file mode 100644
index 0000000..413ad0f
--- /dev/null
+++ b/drivers/gator/gator_events_mali_4xx.h
@@ -0,0 +1,18 @@
+/**
+ * Copyright (C) ARM Limited 2011-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/*
+ * Header contains common definitions for the Mali-4xx processors.
+ */
+#if !defined(GATOR_EVENTS_MALI_4xx_H)
+#define GATOR_EVENTS_MALI_4xx_H
+
+extern void gator_events_mali_log_dvfs_event(unsigned int d0, unsigned int d1);
+
+#endif /* GATOR_EVENTS_MALI_4xx_H */
diff --git a/drivers/gator/gator_events_mali_common.c b/drivers/gator/gator_events_mali_common.c
new file mode 100644
index 0000000..466ca16
--- /dev/null
+++ b/drivers/gator/gator_events_mali_common.c
@@ -0,0 +1,81 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include "gator_events_mali_common.h"
+
+static u32 gator_mali_get_id(void)
+{
+ return MALI_SUPPORT;
+}
+
+extern const char *gator_mali_get_mali_name(void)
+{
+ u32 id = gator_mali_get_id();
+
+ switch (id) {
+ case MALI_T6xx:
+ return "Mali-T6xx";
+ case MALI_4xx:
+ return "Mali-4xx";
+ default:
+ pr_debug("gator: Mali-T6xx: unknown Mali ID (%d)\n", id);
+ return "Mali-Unknown";
+ }
+}
+
+extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event)
+{
+ int err;
+ char buf[255];
+ struct dentry *dir;
+
+ /* If the counter name is empty ignore it */
+ if (strlen(event_name) != 0) {
+ /* Set up the filesystem entry for this event. */
+ snprintf(buf, sizeof(buf), "ARM_%s_%s", mali_name, event_name);
+
+ dir = gatorfs_mkdir(sb, root, buf);
+
+ if (dir == NULL) {
+ pr_debug("gator: Mali-T6xx: error creating file system for: %s (%s)", event_name, buf);
+ return -1;
+ }
+
+ err = gatorfs_create_ulong(sb, dir, "enabled", &counter->enabled);
+ if (err != 0) {
+ pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ulong for: %s (%s)", event_name, buf);
+ return -1;
+ }
+ err = gatorfs_create_ro_ulong(sb, dir, "key", &counter->key);
+ if (err != 0) {
+ pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf);
+ return -1;
+ }
+ if (event != NULL) {
+ err = gatorfs_create_ulong(sb, dir, "event", event);
+ if (err != 0) {
+ pr_debug("gator: Mali-T6xx: error calling gatorfs_create_ro_ulong for: %s (%s)", event_name, buf);
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int n_counters)
+{
+ unsigned int cnt;
+
+ for (cnt = 0; cnt < n_counters; cnt++) {
+ mali_counter *counter = &counters[cnt];
+
+ counter->key = gator_events_get_key();
+ counter->enabled = 0;
+ }
+}
diff --git a/drivers/gator/gator_events_mali_common.h b/drivers/gator/gator_events_mali_common.h
new file mode 100644
index 0000000..509f9b6
--- /dev/null
+++ b/drivers/gator/gator_events_mali_common.h
@@ -0,0 +1,86 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#if !defined(GATOR_EVENTS_MALI_COMMON_H)
+#define GATOR_EVENTS_MALI_COMMON_H
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+
+/* Device codes for each known GPU */
+#define MALI_4xx (0x0b07)
+#define MALI_T6xx (0x0056)
+
+/* Ensure that MALI_SUPPORT has been defined to something. */
+#ifndef MALI_SUPPORT
+#error MALI_SUPPORT not defined!
+#endif
+
+/* Values for the supported activity event types */
+#define ACTIVITY_START (1)
+#define ACTIVITY_STOP (2)
+
+/*
+ * Runtime state information for a counter.
+ */
+typedef struct {
+ unsigned long key; /* 'key' (a unique id set by gatord and returned by gator.ko) */
+ unsigned long enabled; /* counter enable state */
+} mali_counter;
+
+/*
+ * Mali-4xx
+ */
+typedef int mali_profiling_set_event_type(unsigned int, int);
+typedef void mali_profiling_control_type(unsigned int, unsigned int);
+typedef void mali_profiling_get_counters_type(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
+
+/*
+ * Driver entry points for functions called directly by gator.
+ */
+extern int _mali_profiling_set_event(unsigned int, int);
+extern void _mali_profiling_control(unsigned int, unsigned int);
+extern void _mali_profiling_get_counters(unsigned int *, unsigned int *, unsigned int *, unsigned int *);
+
+/**
+ * Returns a name which identifies the GPU type (eg Mali-4xx, Mali-T6xx).
+ *
+ * @return The name as a constant string.
+ */
+extern const char *gator_mali_get_mali_name(void);
+
+/**
+ * Creates a filesystem entry under /dev/gator relating to the specified event name and key, and
+ * associate the key/enable values with this entry point.
+ *
+ * @param mali_name A name related to the type of GPU, obtained from a call to gator_mali_get_mali_name()
+ * @param event_name The name of the event.
+ * @param sb Linux super block
+ * @param root Directory under which the entry will be created.
+ * @param counter_key Ptr to location which will be associated with the counter key.
+ * @param counter_enabled Ptr to location which will be associated with the counter enable state.
+ *
+ * @return 0 if entry point was created, non-zero if not.
+ */
+extern int gator_mali_create_file_system(const char *mali_name, const char *event_name, struct super_block *sb, struct dentry *root, mali_counter *counter, unsigned long *event);
+
+/**
+ * Initializes the counter array.
+ *
+ * @param keys The array of counters
+ * @param n_counters The number of entries in each of the arrays.
+ */
+extern void gator_mali_initialise_counters(mali_counter counters[], unsigned int n_counters);
+
+#endif /* GATOR_EVENTS_MALI_COMMON_H */
diff --git a/drivers/gator/gator_events_mali_t6xx.c b/drivers/gator/gator_events_mali_t6xx.c
new file mode 100644
index 0000000..7bf7d6a
--- /dev/null
+++ b/drivers/gator/gator_events_mali_t6xx.c
@@ -0,0 +1,560 @@
+/**
+ * Copyright (C) ARM Limited 2011-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+
+#include "linux/mali_linux_trace.h"
+
+#include "gator_events_mali_common.h"
+
+/*
+ * Check that the MALI_SUPPORT define is set to one of the allowable device codes.
+ */
+#if (MALI_SUPPORT != MALI_T6xx)
+#error MALI_SUPPORT set to an invalid device code: expecting MALI_T6xx
+#endif
+
+/* Counters for Mali-T6xx:
+ *
+ * - Timeline events
+ * They are tracepoints, but instead of reporting a number they report a START/STOP event.
+ * They are reported in Streamline as number of microseconds while that particular counter was active.
+ *
+ * - SW counters
+ * They are tracepoints reporting a particular number.
+ * They are accumulated in sw_counter_data array until they are passed to Streamline, then they are zeroed.
+ *
+ * - Accumulators
+ * They are the same as software counters but their value is not zeroed.
+ */
+
+/* Timeline (start/stop) activity */
+static const char *timeline_event_names[] = {
+ "PM_SHADER_0",
+ "PM_SHADER_1",
+ "PM_SHADER_2",
+ "PM_SHADER_3",
+ "PM_SHADER_4",
+ "PM_SHADER_5",
+ "PM_SHADER_6",
+ "PM_SHADER_7",
+ "PM_TILER_0",
+ "PM_L2_0",
+ "PM_L2_1",
+ "MMU_AS_0",
+ "MMU_AS_1",
+ "MMU_AS_2",
+ "MMU_AS_3"
+};
+
+enum {
+ PM_SHADER_0 = 0,
+ PM_SHADER_1,
+ PM_SHADER_2,
+ PM_SHADER_3,
+ PM_SHADER_4,
+ PM_SHADER_5,
+ PM_SHADER_6,
+ PM_SHADER_7,
+ PM_TILER_0,
+ PM_L2_0,
+ PM_L2_1,
+ MMU_AS_0,
+ MMU_AS_1,
+ MMU_AS_2,
+ MMU_AS_3
+};
+/* The number of shader blocks in the enum above */
+#define NUM_PM_SHADER (8)
+
+/* Software Counters */
+static const char *software_counter_names[] = {
+ "MMU_PAGE_FAULT_0",
+ "MMU_PAGE_FAULT_1",
+ "MMU_PAGE_FAULT_2",
+ "MMU_PAGE_FAULT_3"
+};
+
+enum {
+ MMU_PAGE_FAULT_0 = 0,
+ MMU_PAGE_FAULT_1,
+ MMU_PAGE_FAULT_2,
+ MMU_PAGE_FAULT_3
+};
+
+/* Software Counters */
+static const char *accumulators_names[] = {
+ "TOTAL_ALLOC_PAGES"
+};
+
+enum {
+ TOTAL_ALLOC_PAGES = 0
+};
+
+#define FIRST_TIMELINE_EVENT (0)
+#define NUMBER_OF_TIMELINE_EVENTS (sizeof(timeline_event_names) / sizeof(timeline_event_names[0]))
+#define FIRST_SOFTWARE_COUNTER (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS)
+#define NUMBER_OF_SOFTWARE_COUNTERS (sizeof(software_counter_names) / sizeof(software_counter_names[0]))
+#define FIRST_ACCUMULATOR (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS)
+#define NUMBER_OF_ACCUMULATORS (sizeof(accumulators_names) / sizeof(accumulators_names[0]))
+#define FILMSTRIP (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS)
+#define NUMBER_OF_EVENTS (NUMBER_OF_TIMELINE_EVENTS + NUMBER_OF_SOFTWARE_COUNTERS + NUMBER_OF_ACCUMULATORS + 1)
+
+/*
+ * gatorfs variables for counter enable state
+ */
+static mali_counter counters[NUMBER_OF_EVENTS];
+static unsigned long filmstrip_event;
+
+/* An array used to return the data we recorded
+ * as key,value pairs hence the *2
+ */
+static unsigned long counter_dump[NUMBER_OF_EVENTS * 2];
+
+/*
+ * Array holding counter start times (in ns) for each counter. A zero here
+ * indicates that the activity monitored by this counter is not running.
+ */
+static struct timespec timeline_event_starttime[NUMBER_OF_TIMELINE_EVENTS];
+
+/* The data we have recorded */
+static unsigned int timeline_data[NUMBER_OF_TIMELINE_EVENTS];
+static unsigned int sw_counter_data[NUMBER_OF_SOFTWARE_COUNTERS];
+static unsigned int accumulators_data[NUMBER_OF_ACCUMULATORS];
+
+/* Hold the previous timestamp, used to calculate the sample interval. */
+static struct timespec prev_timestamp;
+
+/**
+ * Returns the timespan (in microseconds) between the two specified timestamps.
+ *
+ * @param start Ptr to the start timestamp
+ * @param end Ptr to the end timestamp
+ *
+ * @return Number of microseconds between the two timestamps (can be negative if start follows end).
+ */
+static inline long get_duration_us(const struct timespec *start, const struct timespec *end)
+{
+ long event_duration_us = (end->tv_nsec - start->tv_nsec) / 1000;
+ event_duration_us += (end->tv_sec - start->tv_sec) * 1000000;
+
+ return event_duration_us;
+}
+
+static void record_timeline_event(unsigned int timeline_index, unsigned int type)
+{
+ struct timespec event_timestamp;
+ struct timespec *event_start = &timeline_event_starttime[timeline_index];
+
+ switch (type) {
+ case ACTIVITY_START:
+ /* Get the event time... */
+ getnstimeofday(&event_timestamp);
+
+ /* Remember the start time if the activity is not already started */
+ if (event_start->tv_sec == 0) {
+ *event_start = event_timestamp; /* Structure copy */
+ }
+ break;
+
+ case ACTIVITY_STOP:
+ /* if the counter was started... */
+ if (event_start->tv_sec != 0) {
+ /* Get the event time... */
+ getnstimeofday(&event_timestamp);
+
+ /* Accumulate the duration in us */
+ timeline_data[timeline_index] += get_duration_us(event_start, &event_timestamp);
+
+ /* Reset the start time to indicate the activity is stopped. */
+ event_start->tv_sec = 0;
+ }
+ break;
+
+ default:
+ /* Other activity events are ignored. */
+ break;
+ }
+}
+
+/*
+ * Documentation about the following tracepoints is in mali_linux_trace.h
+ */
+
+GATOR_DEFINE_PROBE(mali_pm_status, TP_PROTO(unsigned int event_id, unsigned long long value))
+{
+#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
+#define TILER_PRESENT_LO 0x110 /* (RO) Tiler core present bitmap, low word */
+#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */
+#define BIT_AT(value, pos) ((value >> pos) & 1)
+
+ static unsigned long long previous_shader_bitmask = 0;
+ static unsigned long long previous_tiler_bitmask = 0;
+ static unsigned long long previous_l2_bitmask = 0;
+
+ switch (event_id) {
+ case SHADER_PRESENT_LO:
+ {
+ unsigned long long changed_bitmask = previous_shader_bitmask ^ value;
+ int pos;
+
+ for (pos = 0; pos < NUM_PM_SHADER; ++pos) {
+ if (BIT_AT(changed_bitmask, pos)) {
+ record_timeline_event(PM_SHADER_0 + pos, BIT_AT(value, pos) ? ACTIVITY_START : ACTIVITY_STOP);
+ }
+ }
+
+ previous_shader_bitmask = value;
+ break;
+ }
+
+ case TILER_PRESENT_LO:
+ {
+ unsigned long long changed = previous_tiler_bitmask ^ value;
+
+ if (BIT_AT(changed, 0)) {
+ record_timeline_event(PM_TILER_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP);
+ }
+
+ previous_tiler_bitmask = value;
+ break;
+ }
+
+ case L2_PRESENT_LO:
+ {
+ unsigned long long changed = previous_l2_bitmask ^ value;
+
+ if (BIT_AT(changed, 0)) {
+ record_timeline_event(PM_L2_0, BIT_AT(value, 0) ? ACTIVITY_START : ACTIVITY_STOP);
+ }
+ if (BIT_AT(changed, 4)) {
+ record_timeline_event(PM_L2_1, BIT_AT(value, 4) ? ACTIVITY_START : ACTIVITY_STOP);
+ }
+
+ previous_l2_bitmask = value;
+ break;
+ }
+
+ default:
+ /* No other blocks are supported at present */
+ break;
+ }
+
+#undef SHADER_PRESENT_LO
+#undef TILER_PRESENT_LO
+#undef L2_PRESENT_LO
+#undef BIT_AT
+}
+
+GATOR_DEFINE_PROBE(mali_page_fault_insert_pages, TP_PROTO(int event_id, unsigned long value))
+{
+ /* We add to the previous since we may receive many tracepoints in one sample period */
+ sw_counter_data[MMU_PAGE_FAULT_0 + event_id] += value;
+}
+
+GATOR_DEFINE_PROBE(mali_mmu_as_in_use, TP_PROTO(int event_id))
+{
+ record_timeline_event(MMU_AS_0 + event_id, ACTIVITY_START);
+}
+
+GATOR_DEFINE_PROBE(mali_mmu_as_released, TP_PROTO(int event_id))
+{
+ record_timeline_event(MMU_AS_0 + event_id, ACTIVITY_STOP);
+}
+
+GATOR_DEFINE_PROBE(mali_total_alloc_pages_change, TP_PROTO(long long int event_id))
+{
+ accumulators_data[TOTAL_ALLOC_PAGES] = event_id;
+}
+
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+ int event;
+ /*
+ * Create the filesystem for all events
+ */
+ int counter_index = 0;
+ const char *mali_name = gator_mali_get_mali_name();
+ mali_profiling_control_type *mali_control;
+
+ for (event = FIRST_TIMELINE_EVENT; event < FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS; event++) {
+ if (gator_mali_create_file_system(mali_name, timeline_event_names[counter_index], sb, root, &counters[event], NULL) != 0) {
+ return -1;
+ }
+ counter_index++;
+ }
+ counter_index = 0;
+ for (event = FIRST_SOFTWARE_COUNTER; event < FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS; event++) {
+ if (gator_mali_create_file_system(mali_name, software_counter_names[counter_index], sb, root, &counters[event], NULL) != 0) {
+ return -1;
+ }
+ counter_index++;
+ }
+ counter_index = 0;
+ for (event = FIRST_ACCUMULATOR; event < FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS; event++) {
+ if (gator_mali_create_file_system(mali_name, accumulators_names[counter_index], sb, root, &counters[event], NULL) != 0) {
+ return -1;
+ }
+ counter_index++;
+ }
+
+ mali_control = symbol_get(_mali_profiling_control);
+ if (mali_control) {
+ if (gator_mali_create_file_system(mali_name, "Filmstrip_cnt0", sb, root, &counters[FILMSTRIP], &filmstrip_event) != 0) {
+ return -1;
+ }
+ symbol_put(_mali_profiling_control);
+ }
+
+ return 0;
+}
+
+static int register_tracepoints(void)
+{
+ if (GATOR_REGISTER_TRACE(mali_pm_status)) {
+ pr_debug("gator: Mali-T6xx: mali_pm_status tracepoint failed to activate\n");
+ return 0;
+ }
+
+ if (GATOR_REGISTER_TRACE(mali_page_fault_insert_pages)) {
+ pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages tracepoint failed to activate\n");
+ return 0;
+ }
+
+ if (GATOR_REGISTER_TRACE(mali_mmu_as_in_use)) {
+ pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use tracepoint failed to activate\n");
+ return 0;
+ }
+
+ if (GATOR_REGISTER_TRACE(mali_mmu_as_released)) {
+ pr_debug("gator: Mali-T6xx: mali_mmu_as_released tracepoint failed to activate\n");
+ return 0;
+ }
+
+ if (GATOR_REGISTER_TRACE(mali_total_alloc_pages_change)) {
+ pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint failed to activate\n");
+ return 0;
+ }
+
+ pr_debug("gator: Mali-T6xx: start\n");
+ pr_debug("gator: Mali-T6xx: mali_pm_status probe is at %p\n", &probe_mali_pm_status);
+ pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages probe is at %p\n", &probe_mali_page_fault_insert_pages);
+ pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use probe is at %p\n", &probe_mali_mmu_as_in_use);
+ pr_debug("gator: Mali-T6xx: mali_mmu_as_released probe is at %p\n", &probe_mali_mmu_as_released);
+ pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change probe is at %p\n", &probe_mali_total_alloc_pages_change);
+
+ return 1;
+}
+
+static int start(void)
+{
+ unsigned int cnt;
+ mali_profiling_control_type *mali_control;
+
+ /* Clean all data for the next capture */
+ for (cnt = 0; cnt < NUMBER_OF_TIMELINE_EVENTS; cnt++) {
+ timeline_event_starttime[cnt].tv_sec = timeline_event_starttime[cnt].tv_nsec = 0;
+ timeline_data[cnt] = 0;
+ }
+
+ for (cnt = 0; cnt < NUMBER_OF_SOFTWARE_COUNTERS; cnt++) {
+ sw_counter_data[cnt] = 0;
+ }
+
+ for (cnt = 0; cnt < NUMBER_OF_ACCUMULATORS; cnt++) {
+ accumulators_data[cnt] = 0;
+ }
+
+ /* Register tracepoints */
+ if (register_tracepoints() == 0) {
+ return -1;
+ }
+
+ /* Generic control interface for Mali DDK. */
+ mali_control = symbol_get(_mali_profiling_control);
+ if (mali_control) {
+ /* The event attribute in the XML file keeps the actual frame rate. */
+ unsigned int enabled = counters[FILMSTRIP].enabled ? 1 : 0;
+ unsigned int rate = filmstrip_event & 0xff;
+ unsigned int resize_factor = (filmstrip_event >> 8) & 0xff;
+
+ pr_debug("gator: mali online _mali_profiling_control symbol @ %p\n", mali_control);
+
+#define FBDUMP_CONTROL_ENABLE (1)
+#define FBDUMP_CONTROL_RATE (2)
+#define FBDUMP_CONTROL_RESIZE_FACTOR (4)
+ mali_control(FBDUMP_CONTROL_ENABLE, enabled);
+ mali_control(FBDUMP_CONTROL_RATE, rate);
+ mali_control(FBDUMP_CONTROL_RESIZE_FACTOR, resize_factor);
+
+ pr_debug("gator: sent mali_control enabled=%d, rate=%d, resize_factor=%d\n", enabled, rate, resize_factor);
+
+ symbol_put(_mali_profiling_control);
+ } else {
+ printk("gator: mali online _mali_profiling_control symbol not found\n");
+ }
+
+ /*
+ * Set the first timestamp for calculating the sample interval. The first interval could be quite long,
+ * since it will be the time between 'start' and the first 'read'.
+ * This means that timeline values will be divided by a big number for the first sample.
+ */
+ getnstimeofday(&prev_timestamp);
+
+ return 0;
+}
+
+static void stop(void)
+{
+ mali_profiling_control_type *mali_control;
+
+ pr_debug("gator: Mali-T6xx: stop\n");
+
+ /*
+ * It is safe to unregister traces even if they were not successfully
+ * registered, so no need to check.
+ */
+ GATOR_UNREGISTER_TRACE(mali_pm_status);
+ pr_debug("gator: Mali-T6xx: mali_pm_status tracepoint deactivated\n");
+
+ GATOR_UNREGISTER_TRACE(mali_page_fault_insert_pages);
+ pr_debug("gator: Mali-T6xx: mali_page_fault_insert_pages tracepoint deactivated\n");
+
+ GATOR_UNREGISTER_TRACE(mali_mmu_as_in_use);
+ pr_debug("gator: Mali-T6xx: mali_mmu_as_in_use tracepoint deactivated\n");
+
+ GATOR_UNREGISTER_TRACE(mali_mmu_as_released);
+ pr_debug("gator: Mali-T6xx: mali_mmu_as_released tracepoint deactivated\n");
+
+ GATOR_UNREGISTER_TRACE(mali_total_alloc_pages_change);
+ pr_debug("gator: Mali-T6xx: mali_total_alloc_pages_change tracepoint deactivated\n");
+
+ /* Generic control interface for Mali DDK. */
+ mali_control = symbol_get(_mali_profiling_control);
+ if (mali_control) {
+ pr_debug("gator: mali offline _mali_profiling_control symbol @ %p\n", mali_control);
+
+ mali_control(FBDUMP_CONTROL_ENABLE, 0);
+
+ symbol_put(_mali_profiling_control);
+ } else {
+ printk("gator: mali offline _mali_profiling_control symbol not found\n");
+ }
+}
+
+static int read(int **buffer)
+{
+ int cnt;
+ int len = 0;
+ long sample_interval_us = 0;
+ struct timespec read_timestamp;
+
+ if (!on_primary_core()) {
+ return 0;
+ }
+
+ /* Get the start of this sample period. */
+ getnstimeofday(&read_timestamp);
+
+ /*
+ * Calculate the sample interval if the previous sample time is valid.
+ * We use tv_sec since it will not be 0.
+ */
+ if (prev_timestamp.tv_sec != 0) {
+ sample_interval_us = get_duration_us(&prev_timestamp, &read_timestamp);
+ }
+
+ /* Structure copy. Update the previous timestamp. */
+ prev_timestamp = read_timestamp;
+
+ /*
+ * Report the timeline counters (ACTIVITY_START/STOP)
+ */
+ for (cnt = FIRST_TIMELINE_EVENT; cnt < (FIRST_TIMELINE_EVENT + NUMBER_OF_TIMELINE_EVENTS); cnt++) {
+ mali_counter *counter = &counters[cnt];
+ if (counter->enabled) {
+ const int index = cnt - FIRST_TIMELINE_EVENT;
+ unsigned int value;
+
+ /* If the activity is still running, reset its start time to the start of this sample period
+ * to correct the count. Add the time up to the end of the sample onto the count. */
+ if (timeline_event_starttime[index].tv_sec != 0) {
+ const long event_duration = get_duration_us(&timeline_event_starttime[index], &read_timestamp);
+ timeline_data[index] += event_duration;
+ timeline_event_starttime[index] = read_timestamp; /* Activity is still running. */
+ }
+
+ if (sample_interval_us != 0) {
+ /* Convert the counter to a percent-of-sample value */
+ value = (timeline_data[index] * 100) / sample_interval_us;
+ } else {
+ pr_debug("gator: Mali-T6xx: setting value to zero\n");
+ value = 0;
+ }
+
+ /* Clear the counter value ready for the next sample. */
+ timeline_data[index] = 0;
+
+ counter_dump[len++] = counter->key;
+ counter_dump[len++] = value;
+ }
+ }
+
+ /* Report the software counters */
+ for (cnt = FIRST_SOFTWARE_COUNTER; cnt < (FIRST_SOFTWARE_COUNTER + NUMBER_OF_SOFTWARE_COUNTERS); cnt++) {
+ const mali_counter *counter = &counters[cnt];
+ if (counter->enabled) {
+ const int index = cnt - FIRST_SOFTWARE_COUNTER;
+ counter_dump[len++] = counter->key;
+ counter_dump[len++] = sw_counter_data[index];
+ /* Set the value to zero for the next time */
+ sw_counter_data[index] = 0;
+ }
+ }
+
+ /* Report the accumulators */
+ for (cnt = FIRST_ACCUMULATOR; cnt < (FIRST_ACCUMULATOR + NUMBER_OF_ACCUMULATORS); cnt++) {
+ const mali_counter *counter = &counters[cnt];
+ if (counter->enabled) {
+ const int index = cnt - FIRST_ACCUMULATOR;
+ counter_dump[len++] = counter->key;
+ counter_dump[len++] = accumulators_data[index];
+ /* Do not zero the accumulator */
+ }
+ }
+
+ /* Update the buffer */
+ if (buffer) {
+ *buffer = (int *)counter_dump;
+ }
+
+ return len;
+}
+
+static struct gator_interface gator_events_mali_t6xx_interface = {
+ .create_files = create_files,
+ .start = start,
+ .stop = stop,
+ .read = read
+};
+
+extern int gator_events_mali_t6xx_init(void)
+{
+ pr_debug("gator: Mali-T6xx: sw_counters init\n");
+
+ gator_mali_initialise_counters(counters, NUMBER_OF_EVENTS);
+
+ return gator_events_install(&gator_events_mali_t6xx_interface);
+}
diff --git a/drivers/gator/gator_events_mali_t6xx_hw.c b/drivers/gator/gator_events_mali_t6xx_hw.c
new file mode 100644
index 0000000..e406991
--- /dev/null
+++ b/drivers/gator/gator_events_mali_t6xx_hw.c
@@ -0,0 +1,784 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+
+/* Mali T6xx DDK includes */
+#include "linux/mali_linux_trace.h"
+#include "kbase/src/common/mali_kbase.h"
+#include "kbase/src/linux/mali_kbase_mem_linux.h"
+
+#include "gator_events_mali_common.h"
+
+/* If API version is not specified then assume API version 1. */
+#ifndef MALI_DDK_GATOR_API_VERSION
+#define MALI_DDK_GATOR_API_VERSION 1
+#endif
+
+#if (MALI_DDK_GATOR_API_VERSION != 1) && (MALI_DDK_GATOR_API_VERSION != 2)
+#error MALI_DDK_GATOR_API_VERSION is invalid (must be 1 for r1/r2 DDK, or 2 for r3 DDK).
+#endif
+
+/*
+ * Mali-T6xx
+ */
+typedef struct kbase_device *kbase_find_device_type(int);
+typedef kbase_context *kbase_create_context_type(kbase_device *);
+typedef void kbase_destroy_context_type(kbase_context *);
+
+#if MALI_DDK_GATOR_API_VERSION == 1
+typedef void *kbase_va_alloc_type(kbase_context *, u32);
+typedef void kbase_va_free_type(kbase_context *, void *);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+typedef void *kbase_va_alloc_type(kbase_context *, u32, kbase_hwc_dma_mapping * handle);
+typedef void kbase_va_free_type(kbase_context *, kbase_hwc_dma_mapping * handle);
+#endif
+
+typedef mali_error kbase_instr_hwcnt_enable_type(kbase_context *, kbase_uk_hwcnt_setup *);
+typedef mali_error kbase_instr_hwcnt_disable_type(kbase_context *);
+typedef mali_error kbase_instr_hwcnt_clear_type(kbase_context *);
+typedef mali_error kbase_instr_hwcnt_dump_irq_type(kbase_context *);
+typedef mali_bool kbase_instr_hwcnt_dump_complete_type(kbase_context *, mali_bool *);
+
+static kbase_find_device_type *kbase_find_device_symbol;
+static kbase_create_context_type *kbase_create_context_symbol;
+static kbase_va_alloc_type *kbase_va_alloc_symbol;
+static kbase_instr_hwcnt_enable_type *kbase_instr_hwcnt_enable_symbol;
+static kbase_instr_hwcnt_clear_type *kbase_instr_hwcnt_clear_symbol;
+static kbase_instr_hwcnt_dump_irq_type *kbase_instr_hwcnt_dump_irq_symbol;
+static kbase_instr_hwcnt_dump_complete_type *kbase_instr_hwcnt_dump_complete_symbol;
+static kbase_instr_hwcnt_disable_type *kbase_instr_hwcnt_disable_symbol;
+static kbase_va_free_type *kbase_va_free_symbol;
+static kbase_destroy_context_type *kbase_destroy_context_symbol;
+
+static long shader_present_low = 0;
+
+/** The interval between reads, in ns.
+ *
+ * Earlier we introduced
+ * a 'hold off for 1ms after last read' to resolve MIDBASE-2178 and MALINE-724.
+ * However, the 1ms hold off is too long if no context switches occur as there is a race
+ * between this value and the tick of the read clock in gator which is also 1ms. If we 'miss' the
+ * current read, the counter values are effectively 'spread' over 2ms and the values seen are half
+ * what they should be (since Streamline averages over sample time). In the presence of context switches
+ * this spread can vary and markedly affect the counters. Currently there is no 'proper' solution to
+ * this, but empirically we have found that reducing the minimum read interval to 950us causes the
+ * counts to be much more stable.
+ */
+static const int READ_INTERVAL_NSEC = 950000;
+
+#if GATOR_TEST
+#include "gator_events_mali_t6xx_hw_test.c"
+#endif
+
+/* Blocks for HW counters */
+enum {
+ JM_BLOCK = 0,
+ TILER_BLOCK,
+ SHADER_BLOCK,
+ MMU_BLOCK
+};
+
+/* Counters for Mali-T6xx:
+ *
+ * - HW counters, 4 blocks
+ * For HW counters we need strings to create /dev/gator/events files.
+ * Enums are not needed because the position of the HW name in the array is the same
+ * of the corresponding value in the received block of memory.
+ * HW counters are requested by calculating a bitmask, passed then to the driver.
+ * Every millisecond a HW counters dump is requested, and if the previous has been completed they are read.
+ */
+
+/* Hardware Counters */
+static const char *const hardware_counter_names[] = {
+ /* Job Manager */
+ "",
+ "",
+ "",
+ "",
+ "MESSAGES_SENT",
+ "MESSAGES_RECEIVED",
+ "GPU_ACTIVE", /* 6 */
+ "IRQ_ACTIVE",
+ "JS0_JOBS",
+ "JS0_TASKS",
+ "JS0_ACTIVE",
+ "",
+ "JS0_WAIT_READ",
+ "JS0_WAIT_ISSUE",
+ "JS0_WAIT_DEPEND",
+ "JS0_WAIT_FINISH",
+ "JS1_JOBS",
+ "JS1_TASKS",
+ "JS1_ACTIVE",
+ "",
+ "JS1_WAIT_READ",
+ "JS1_WAIT_ISSUE",
+ "JS1_WAIT_DEPEND",
+ "JS1_WAIT_FINISH",
+ "JS2_JOBS",
+ "JS2_TASKS",
+ "JS2_ACTIVE",
+ "",
+ "JS2_WAIT_READ",
+ "JS2_WAIT_ISSUE",
+ "JS2_WAIT_DEPEND",
+ "JS2_WAIT_FINISH",
+ "JS3_JOBS",
+ "JS3_TASKS",
+ "JS3_ACTIVE",
+ "",
+ "JS3_WAIT_READ",
+ "JS3_WAIT_ISSUE",
+ "JS3_WAIT_DEPEND",
+ "JS3_WAIT_FINISH",
+ "JS4_JOBS",
+ "JS4_TASKS",
+ "JS4_ACTIVE",
+ "",
+ "JS4_WAIT_READ",
+ "JS4_WAIT_ISSUE",
+ "JS4_WAIT_DEPEND",
+ "JS4_WAIT_FINISH",
+ "JS5_JOBS",
+ "JS5_TASKS",
+ "JS5_ACTIVE",
+ "",
+ "JS5_WAIT_READ",
+ "JS5_WAIT_ISSUE",
+ "JS5_WAIT_DEPEND",
+ "JS5_WAIT_FINISH",
+ "JS6_JOBS",
+ "JS6_TASKS",
+ "JS6_ACTIVE",
+ "",
+ "JS6_WAIT_READ",
+ "JS6_WAIT_ISSUE",
+ "JS6_WAIT_DEPEND",
+ "JS6_WAIT_FINISH",
+
+ /*Tiler */
+ "",
+ "",
+ "",
+ "JOBS_PROCESSED",
+ "TRIANGLES",
+ "QUADS",
+ "POLYGONS",
+ "POINTS",
+ "LINES",
+ "VCACHE_HIT",
+ "VCACHE_MISS",
+ "FRONT_FACING",
+ "BACK_FACING",
+ "PRIM_VISIBLE",
+ "PRIM_CULLED",
+ "PRIM_CLIPPED",
+ "LEVEL0",
+ "LEVEL1",
+ "LEVEL2",
+ "LEVEL3",
+ "LEVEL4",
+ "LEVEL5",
+ "LEVEL6",
+ "LEVEL7",
+ "COMMAND_1",
+ "COMMAND_2",
+ "COMMAND_3",
+ "COMMAND_4",
+ "COMMAND_4_7",
+ "COMMAND_8_15",
+ "COMMAND_16_63",
+ "COMMAND_64",
+ "COMPRESS_IN",
+ "COMPRESS_OUT",
+ "COMPRESS_FLUSH",
+ "TIMESTAMPS",
+ "PCACHE_HIT",
+ "PCACHE_MISS",
+ "PCACHE_LINE",
+ "PCACHE_STALL",
+ "WRBUF_HIT",
+ "WRBUF_MISS",
+ "WRBUF_LINE",
+ "WRBUF_PARTIAL",
+ "WRBUF_STALL",
+ "ACTIVE",
+ "LOADING_DESC",
+ "INDEX_WAIT",
+ "INDEX_RANGE_WAIT",
+ "VERTEX_WAIT",
+ "PCACHE_WAIT",
+ "WRBUF_WAIT",
+ "BUS_READ",
+ "BUS_WRITE",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "UTLB_STALL",
+ "UTLB_REPLAY_MISS",
+ "UTLB_REPLAY_FULL",
+ "UTLB_NEW_MISS",
+ "UTLB_HIT",
+
+ /* Shader Core */
+ "",
+ "",
+ "",
+ "SHADER_CORE_ACTIVE",
+ "FRAG_ACTIVE",
+ "FRAG_PRIMATIVES",
+ "FRAG_PRIMATIVES_DROPPED",
+ "FRAG_CYCLE_DESC",
+ "FRAG_CYCLES_PLR",
+ "FRAG_CYCLES_VERT",
+ "FRAG_CYCLES_TRISETUP",
+ "FRAG_CYCLES_RAST",
+ "FRAG_THREADS",
+ "FRAG_DUMMY_THREADS",
+ "FRAG_QUADS_RAST",
+ "FRAG_QUADS_EZS_TEST",
+ "FRAG_QUADS_EZS_KILLED",
+ "FRAG_QUADS_LZS_TEST",
+ "FRAG_QUADS_LZS_KILLED",
+ "FRAG_CYCLE_NO_TILE",
+ "FRAG_NUM_TILES",
+ "FRAG_TRANS_ELIM",
+ "COMPUTE_ACTIVE",
+ "COMPUTE_TASKS",
+ "COMPUTE_THREADS",
+ "COMPUTE_CYCLES_DESC",
+ "TRIPIPE_ACTIVE",
+ "ARITH_WORDS",
+ "ARITH_CYCLES_REG",
+ "ARITH_CYCLES_L0",
+ "ARITH_FRAG_DEPEND",
+ "LS_WORDS",
+ "LS_ISSUES",
+ "LS_RESTARTS",
+ "LS_REISSUES_MISS",
+ "LS_REISSUES_VD",
+ "LS_REISSUE_ATTRIB_MISS",
+ "LS_NO_WB",
+ "TEX_WORDS",
+ "TEX_BUBBLES",
+ "TEX_WORDS_L0",
+ "TEX_WORDS_DESC",
+ "TEX_THREADS",
+ "TEX_RECIRC_FMISS",
+ "TEX_RECIRC_DESC",
+ "TEX_RECIRC_MULTI",
+ "TEX_RECIRC_PMISS",
+ "TEX_RECIRC_CONF",
+ "LSC_READ_HITS",
+ "LSC_READ_MISSES",
+ "LSC_WRITE_HITS",
+ "LSC_WRITE_MISSES",
+ "LSC_ATOMIC_HITS",
+ "LSC_ATOMIC_MISSES",
+ "LSC_LINE_FETCHES",
+ "LSC_DIRTY_LINE",
+ "LSC_SNOOPS",
+ "AXI_TLB_STALL",
+ "AXI_TLB_MIESS",
+ "AXI_TLB_TRANSACTION",
+ "LS_TLB_MISS",
+ "LS_TLB_HIT",
+ "AXI_BEATS_READ",
+ "AXI_BEATS_WRITTEN",
+
+ /*L2 and MMU */
+ "",
+ "",
+ "",
+ "",
+ "MMU_HIT",
+ "MMU_NEW_MISS",
+ "MMU_REPLAY_FULL",
+ "MMU_REPLAY_MISS",
+ "MMU_TABLE_WALK",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "UTLB_HIT",
+ "UTLB_NEW_MISS",
+ "UTLB_REPLAY_FULL",
+ "UTLB_REPLAY_MISS",
+ "UTLB_STALL",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "L2_WRITE_BEATS",
+ "L2_READ_BEATS",
+ "L2_ANY_LOOKUP",
+ "L2_READ_LOOKUP",
+ "L2_SREAD_LOOKUP",
+ "L2_READ_REPLAY",
+ "L2_READ_SNOOP",
+ "L2_READ_HIT",
+ "L2_CLEAN_MISS",
+ "L2_WRITE_LOOKUP",
+ "L2_SWRITE_LOOKUP",
+ "L2_WRITE_REPLAY",
+ "L2_WRITE_SNOOP",
+ "L2_WRITE_HIT",
+ "L2_EXT_READ_FULL",
+ "L2_EXT_READ_HALF",
+ "L2_EXT_WRITE_FULL",
+ "L2_EXT_WRITE_HALF",
+ "L2_EXT_READ",
+ "L2_EXT_READ_LINE",
+ "L2_EXT_WRITE",
+ "L2_EXT_WRITE_LINE",
+ "L2_EXT_WRITE_SMALL",
+ "L2_EXT_BARRIER",
+ "L2_EXT_AR_STALL",
+ "L2_EXT_R_BUF_FULL",
+ "L2_EXT_RD_BUF_FULL",
+ "L2_EXT_R_RAW",
+ "L2_EXT_W_STALL",
+ "L2_EXT_W_BUF_FULL",
+ "L2_EXT_R_W_HAZARD",
+ "L2_TAG_HAZARD",
+ "L2_SNOOP_FULL",
+ "L2_REPLAY_FULL"
+};
+
+#define NUMBER_OF_HARDWARE_COUNTERS (sizeof(hardware_counter_names) / sizeof(hardware_counter_names[0]))
+
+#define GET_HW_BLOCK(c) (((c) >> 6) & 0x3)
+#define GET_COUNTER_OFFSET(c) ((c) & 0x3f)
+
+/* Memory to dump hardware counters into */
+static void *kernel_dump_buffer;
+
+#if MALI_DDK_GATOR_API_VERSION == 2
+/* DMA state used to manage lifetime of the buffer */
+kbase_hwc_dma_mapping kernel_dump_buffer_handle;
+#endif
+
+/* kbase context and device */
+static kbase_context *kbcontext = NULL;
+static struct kbase_device *kbdevice = NULL;
+
+/*
+ * The following function has no external prototype in older DDK revisions. When the DDK
+ * is updated then this should be removed.
+ */
+struct kbase_device *kbase_find_device(int minor);
+
+static volatile bool kbase_device_busy = false;
+static unsigned int num_hardware_counters_enabled;
+
+/*
+ * gatorfs variables for counter enable state
+ */
+static mali_counter counters[NUMBER_OF_HARDWARE_COUNTERS];
+
+/* An array used to return the data we recorded
+ * as key,value pairs hence the *2
+ */
+static unsigned long counter_dump[NUMBER_OF_HARDWARE_COUNTERS * 2];
+
+#define SYMBOL_GET(FUNCTION, ERROR_COUNT) \
+ if(FUNCTION ## _symbol) \
+ { \
+ printk("gator: mali " #FUNCTION " symbol was already registered\n"); \
+ (ERROR_COUNT)++; \
+ } \
+ else \
+ { \
+ FUNCTION ## _symbol = symbol_get(FUNCTION); \
+ if(! FUNCTION ## _symbol) \
+ { \
+ printk("gator: mali online " #FUNCTION " symbol not found\n"); \
+ (ERROR_COUNT)++; \
+ } \
+ }
+
+#define SYMBOL_CLEANUP(FUNCTION) \
+ if(FUNCTION ## _symbol) \
+ { \
+ symbol_put(FUNCTION); \
+ FUNCTION ## _symbol = NULL; \
+ }
+
+/**
+ * Execute symbol_get for all the Mali symbols and check for success.
+ * @return the number of symbols not loaded.
+ */
+static int init_symbols(void)
+{
+ int error_count = 0;
+ SYMBOL_GET(kbase_find_device, error_count);
+ SYMBOL_GET(kbase_create_context, error_count);
+ SYMBOL_GET(kbase_va_alloc, error_count);
+ SYMBOL_GET(kbase_instr_hwcnt_enable, error_count);
+ SYMBOL_GET(kbase_instr_hwcnt_clear, error_count);
+ SYMBOL_GET(kbase_instr_hwcnt_dump_irq, error_count);
+ SYMBOL_GET(kbase_instr_hwcnt_dump_complete, error_count);
+ SYMBOL_GET(kbase_instr_hwcnt_disable, error_count);
+ SYMBOL_GET(kbase_va_free, error_count);
+ SYMBOL_GET(kbase_destroy_context, error_count);
+
+ return error_count;
+}
+
+/**
+ * Execute symbol_put for all the registered Mali symbols.
+ */
+static void clean_symbols(void)
+{
+ SYMBOL_CLEANUP(kbase_find_device);
+ SYMBOL_CLEANUP(kbase_create_context);
+ SYMBOL_CLEANUP(kbase_va_alloc);
+ SYMBOL_CLEANUP(kbase_instr_hwcnt_enable);
+ SYMBOL_CLEANUP(kbase_instr_hwcnt_clear);
+ SYMBOL_CLEANUP(kbase_instr_hwcnt_dump_irq);
+ SYMBOL_CLEANUP(kbase_instr_hwcnt_dump_complete);
+ SYMBOL_CLEANUP(kbase_instr_hwcnt_disable);
+ SYMBOL_CLEANUP(kbase_va_free);
+ SYMBOL_CLEANUP(kbase_destroy_context);
+}
+
+/**
+ * Determines whether a read should take place
+ * @param current_time The current time, obtained from getnstimeofday()
+ * @param prev_time_s The number of seconds at the previous read attempt.
+ * @param next_read_time_ns The time (in ns) when the next read should be allowed.
+ *
+ * Note that this function has been separated out here to allow it to be tested.
+ */
+static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns)
+{
+ /* If the current ns count rolls over a second, roll the next read time too. */
+ if (current_time->tv_sec != *prev_time_s) {
+ *next_read_time_ns = *next_read_time_ns - NSEC_PER_SEC;
+ }
+
+ /* Abort the read if the next read time has not arrived. */
+ if (current_time->tv_nsec < *next_read_time_ns) {
+ return 0;
+ }
+
+ /* Set the next read some fixed time after this one, and update the read timestamp. */
+ *next_read_time_ns = current_time->tv_nsec + READ_INTERVAL_NSEC;
+
+ *prev_time_s = current_time->tv_sec;
+ return 1;
+}
+
+static int start(void)
+{
+ kbase_uk_hwcnt_setup setup;
+ mali_error err;
+ int cnt;
+ u16 bitmask[] = { 0, 0, 0, 0 };
+ unsigned long long shadersPresent = 0;
+
+ /* Setup HW counters */
+ num_hardware_counters_enabled = 0;
+
+ if (NUMBER_OF_HARDWARE_COUNTERS != 256) {
+ pr_debug("Unexpected number of hardware counters defined: expecting 256, got %d\n", NUMBER_OF_HARDWARE_COUNTERS);
+ }
+
+ /* Calculate enable bitmasks based on counters_enabled array */
+ for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
+ const mali_counter *counter = &counters[cnt];
+ if (counter->enabled) {
+ int block = GET_HW_BLOCK(cnt);
+ int enable_bit = GET_COUNTER_OFFSET(cnt) / 4;
+ bitmask[block] |= (1 << enable_bit);
+ pr_debug("gator: Mali-T6xx: hardware counter %s selected [%d]\n", hardware_counter_names[cnt], cnt);
+ num_hardware_counters_enabled++;
+ }
+ }
+
+ /* Create a kbase context for HW counters */
+ if (num_hardware_counters_enabled > 0) {
+ if (init_symbols() > 0) {
+ clean_symbols();
+ /* No Mali driver code entrypoints found - not a fault. */
+ return 0;
+ }
+
+ kbdevice = kbase_find_device_symbol(-1);
+
+ /* If we already got a context, fail */
+ if (kbcontext) {
+ pr_debug("gator: Mali-T6xx: error context already present\n");
+ goto out;
+ }
+
+ /* kbcontext will only be valid after all the Mali symbols are loaded successfully */
+ kbcontext = kbase_create_context_symbol(kbdevice);
+ if (!kbcontext) {
+ pr_debug("gator: Mali-T6xx: error creating kbase context\n");
+ goto out;
+ }
+
+
+ /* See if we can get the number of shader cores */
+ shadersPresent = kbdevice->shader_present_bitmap;
+ shader_present_low = (unsigned long)shadersPresent;
+
+ /*
+ * The amount of memory needed to store the dump (bytes)
+ * DUMP_SIZE = number of core groups
+ * * number of blocks (always 8 for midgard)
+ * * number of counters per block (always 64 for midgard)
+ * * number of bytes per counter (always 4 in midgard)
+ * For a Mali-T6xx with a single core group = 1 * 8 * 64 * 4 = 2048
+ * For a Mali-T6xx with a dual core group = 2 * 8 * 64 * 4 = 4096
+ */
+#if MALI_DDK_GATOR_API_VERSION == 1
+ kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+ kernel_dump_buffer = kbase_va_alloc_symbol(kbcontext, 4096, &kernel_dump_buffer_handle);
+#endif
+ if (!kernel_dump_buffer) {
+ pr_debug("gator: Mali-T6xx: error trying to allocate va\n");
+ goto destroy_context;
+ }
+
+ setup.dump_buffer = (uintptr_t)kernel_dump_buffer;
+ setup.jm_bm = bitmask[JM_BLOCK];
+ setup.tiler_bm = bitmask[TILER_BLOCK];
+ setup.shader_bm = bitmask[SHADER_BLOCK];
+ setup.mmu_l2_bm = bitmask[MMU_BLOCK];
+ /* These counters do not exist on Mali-T60x */
+ setup.l3_cache_bm = 0;
+
+ /* Use kbase API to enable hardware counters and provide dump buffer */
+ err = kbase_instr_hwcnt_enable_symbol(kbcontext, &setup);
+ if (err != MALI_ERROR_NONE) {
+ pr_debug("gator: Mali-T6xx: can't setup hardware counters\n");
+ goto free_buffer;
+ }
+ pr_debug("gator: Mali-T6xx: hardware counters enabled\n");
+ kbase_instr_hwcnt_clear_symbol(kbcontext);
+ pr_debug("gator: Mali-T6xx: hardware counters cleared \n");
+
+ kbase_device_busy = false;
+ }
+
+ return 0;
+
+free_buffer:
+#if MALI_DDK_GATOR_API_VERSION == 1
+ kbase_va_free_symbol(kbcontext, kernel_dump_buffer);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+ kbase_va_free_symbol(kbcontext, &kernel_dump_buffer_handle);
+#endif
+
+destroy_context:
+ kbase_destroy_context_symbol(kbcontext);
+
+out:
+ clean_symbols();
+ return -1;
+}
+
+static void stop(void)
+{
+ unsigned int cnt;
+ kbase_context *temp_kbcontext;
+
+ pr_debug("gator: Mali-T6xx: stop\n");
+
+ /* Set all counters as disabled */
+ for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
+ counters[cnt].enabled = 0;
+ }
+
+ /* Destroy the context for HW counters */
+ if (num_hardware_counters_enabled > 0 && kbcontext != NULL) {
+ /*
+ * Set the global variable to NULL before destroying it, because
+ * other function will check this before using it.
+ */
+ temp_kbcontext = kbcontext;
+ kbcontext = NULL;
+
+ kbase_instr_hwcnt_disable_symbol(temp_kbcontext);
+
+#if MALI_DDK_GATOR_API_VERSION == 1
+ kbase_va_free_symbol(temp_kbcontext, kernel_dump_buffer);
+#elif MALI_DDK_GATOR_API_VERSION == 2
+ kbase_va_free_symbol(temp_kbcontext, &kernel_dump_buffer_handle);
+#endif
+
+ kbase_destroy_context_symbol(temp_kbcontext);
+
+ pr_debug("gator: Mali-T6xx: hardware counters stopped\n");
+
+ clean_symbols();
+ }
+}
+
+static int read(int **buffer)
+{
+ int cnt;
+ int len = 0;
+ u32 value = 0;
+ mali_bool success;
+
+ struct timespec current_time;
+ static u32 prev_time_s = 0;
+ static s32 next_read_time_ns = 0;
+
+ if (!on_primary_core()) {
+ return 0;
+ }
+
+ getnstimeofday(&current_time);
+
+ /*
+ * Discard reads unless a respectable time has passed. This reduces the load on the GPU without sacrificing
+ * accuracy on the Streamline display.
+ */
+ if (!is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns)) {
+ return 0;
+ }
+
+ /*
+ * Report the HW counters
+ * Only process hardware counters if at least one of the hardware counters is enabled.
+ */
+ if (num_hardware_counters_enabled > 0) {
+ const unsigned int vithar_blocks[] = {
+ 0x700, /* VITHAR_JOB_MANAGER, Block 0 */
+ 0x400, /* VITHAR_TILER, Block 1 */
+ 0x000, /* VITHAR_SHADER_CORE, Block 2 */
+ 0x500 /* VITHAR_MEMORY_SYSTEM, Block 3 */
+ };
+
+ if (!kbcontext) {
+ return -1;
+ }
+
+ /* Mali symbols can be called safely since a kbcontext is valid */
+ if (kbase_instr_hwcnt_dump_complete_symbol(kbcontext, &success) == MALI_TRUE) {
+ kbase_device_busy = false;
+
+ if (success == MALI_TRUE) {
+ /* Cycle through hardware counters and accumulate totals */
+ for (cnt = 0; cnt < NUMBER_OF_HARDWARE_COUNTERS; cnt++) {
+ const mali_counter *counter = &counters[cnt];
+ if (counter->enabled) {
+ const int block = GET_HW_BLOCK(cnt);
+ const int counter_offset = GET_COUNTER_OFFSET(cnt);
+
+ const char* block_base_address = (char*)kernel_dump_buffer + vithar_blocks[block];
+
+ /* If counter belongs to shader block need to take into account all cores */
+ if (block == SHADER_BLOCK) {
+ int i = 0;
+ int shader_core_count = 0;
+ value = 0;
+
+ for (i = 0; i < 4; i++) {
+ if ((shader_present_low >> i) & 1) {
+ value += *((u32*) (block_base_address + (0x100 * i)) + counter_offset);
+ shader_core_count++;
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if((shader_present_low >> (i+4)) & 1) {
+ value += *((u32*)(block_base_address + (0x100 * i) + 0x800) + counter_offset);
+ shader_core_count++;
+ }
+ }
+
+ /* Need to total by number of cores to produce an average */
+ if (shader_core_count != 0) {
+ value /= shader_core_count;
+ }
+ } else {
+ value = *((u32*)block_base_address + counter_offset);
+ }
+
+ counter_dump[len++] = counter->key;
+ counter_dump[len++] = value;
+ }
+ }
+ }
+ }
+
+ if (!kbase_device_busy) {
+ kbase_device_busy = true;
+ kbase_instr_hwcnt_dump_irq_symbol(kbcontext);
+ }
+ }
+
+ /* Update the buffer */
+ if (buffer) {
+ *buffer = (int *)counter_dump;
+ }
+
+ return len;
+}
+
+static int create_files(struct super_block *sb, struct dentry *root)
+{
+ unsigned int event;
+ /*
+ * Create the filesystem for all events
+ */
+ int counter_index = 0;
+ const char *mali_name = gator_mali_get_mali_name();
+
+ for (event = 0; event < NUMBER_OF_HARDWARE_COUNTERS; event++) {
+ if (gator_mali_create_file_system(mali_name, hardware_counter_names[counter_index], sb, root, &counters[event], NULL) != 0)
+ return -1;
+ counter_index++;
+ }
+
+ return 0;
+}
+
+static struct gator_interface gator_events_mali_t6xx_interface = {
+ .create_files = create_files,
+ .start = start,
+ .stop = stop,
+ .read = read
+};
+
+int gator_events_mali_t6xx_hw_init(void)
+{
+ pr_debug("gator: Mali-T6xx: sw_counters init\n");
+
+#if GATOR_TEST
+ test_all_is_read_scheduled();
+#endif
+
+ gator_mali_initialise_counters(counters, NUMBER_OF_HARDWARE_COUNTERS);
+
+ return gator_events_install(&gator_events_mali_t6xx_interface);
+}
diff --git a/drivers/gator/gator_events_mali_t6xx_hw_test.c b/drivers/gator/gator_events_mali_t6xx_hw_test.c
new file mode 100644
index 0000000..efb32dd
--- /dev/null
+++ b/drivers/gator/gator_events_mali_t6xx_hw_test.c
@@ -0,0 +1,55 @@
+/**
+ * Copyright (C) ARM Limited 2012-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/**
+ * Test functions for mali_t600_hw code.
+ */
+
+static int is_read_scheduled(const struct timespec *current_time, u32 *prev_time_s, s32 *next_read_time_ns);
+
+static int test_is_read_scheduled(u32 s, u32 ns, u32 prev_s, s32 next_ns, int expected_result, s32 expected_next_ns)
+{
+ struct timespec current_time;
+ u32 prev_time_s = prev_s;
+ s32 next_read_time_ns = next_ns;
+
+ current_time.tv_sec = s;
+ current_time.tv_nsec = ns;
+
+ if (is_read_scheduled(&current_time, &prev_time_s, &next_read_time_ns) != expected_result) {
+ printk("Failed do_read(%u, %u, %u, %d): expected %d\n", s, ns, prev_s, next_ns, expected_result);
+ return 0;
+ }
+
+ if (next_read_time_ns != expected_next_ns) {
+ printk("Failed: next_read_ns expected=%d, actual=%d\n", expected_next_ns, next_read_time_ns);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void test_all_is_read_scheduled(void)
+{
+ const int HIGHEST_NS = 999999999;
+ int n_tests_passed = 0;
+
+ printk("gator: running tests on %s\n", __FILE__);
+
+ n_tests_passed += test_is_read_scheduled(0, 0, 0, 0, 1, READ_INTERVAL_NSEC); /* Null time */
+ n_tests_passed += test_is_read_scheduled(100, 1000, 0, 0, 1, READ_INTERVAL_NSEC + 1000); /* Initial values */
+
+ n_tests_passed += test_is_read_scheduled(100, HIGHEST_NS, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500);
+ n_tests_passed += test_is_read_scheduled(101, 0001, 100, HIGHEST_NS + 500, 0, HIGHEST_NS + 500 - NSEC_PER_SEC);
+ n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500 - NSEC_PER_SEC, 1, 600 + READ_INTERVAL_NSEC);
+
+ n_tests_passed += test_is_read_scheduled(101, 600, 100, HIGHEST_NS + 500, 1, 600 + READ_INTERVAL_NSEC);
+
+ printk("gator: %d tests passed\n", n_tests_passed);
+}
diff --git a/drivers/gator/gator_events_meminfo.c b/drivers/gator/gator_events_meminfo.c
new file mode 100644
index 0000000..451290d
--- /dev/null
+++ b/drivers/gator/gator_events_meminfo.c
@@ -0,0 +1,387 @@
+/**
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include "gator.h"
+
+#include <linux/hardirq.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <trace/events/kmem.h>
+
+enum {
+ MEMINFO_MEMFREE,
+ MEMINFO_MEMUSED,
+ MEMINFO_BUFFERRAM,
+ MEMINFO_TOTAL,
+};
+
+enum {
+ PROC_SIZE,
+ PROC_SHARE,
+ PROC_TEXT,
+ PROC_DATA,
+ PROC_COUNT,
+};
+
+static const char * const meminfo_names[] = {
+ "Linux_meminfo_memfree",
+ "Linux_meminfo_memused",
+ "Linux_meminfo_bufferram",
+};
+
+static const char * const proc_names[] = {
+ "Linux_proc_statm_size",
+ "Linux_proc_statm_share",
+ "Linux_proc_statm_text",
+ "Linux_proc_statm_data",
+};
+
+static bool meminfo_global_enabled;
+static ulong meminfo_enabled[MEMINFO_TOTAL];
+static ulong meminfo_keys[MEMINFO_TOTAL];
+static long long meminfo_buffer[2 * (MEMINFO_TOTAL + 2)];
+static int meminfo_length = 0;
+static bool new_data_avail;
+
+static bool proc_global_enabled;
+static ulong proc_enabled[PROC_COUNT];
+static ulong proc_keys[PROC_COUNT];
+static DEFINE_PER_CPU(long long, proc_buffer[2 * (PROC_COUNT + 3)]);
+
+static int gator_meminfo_func(void *data);
+static bool gator_meminfo_run;
+// Initialize semaphore unlocked to initialize memory values
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36)
+static DECLARE_MUTEX(gator_meminfo_sem);
+#else
+static DEFINE_SEMAPHORE(gator_meminfo_sem);
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+GATOR_DEFINE_PROBE(mm_page_free_direct, TP_PROTO(struct page *page, unsigned int order))
+#else
+GATOR_DEFINE_PROBE(mm_page_free, TP_PROTO(struct page *page, unsigned int order))
+#endif
+{
+ up(&gator_meminfo_sem);
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+GATOR_DEFINE_PROBE(mm_pagevec_free, TP_PROTO(struct page *page, int cold))
+#else
+GATOR_DEFINE_PROBE(mm_page_free_batched, TP_PROTO(struct page *page, int cold))
+#endif
+{
+ up(&gator_meminfo_sem);
+}
+
+GATOR_DEFINE_PROBE(mm_page_alloc, TP_PROTO(struct page *page, unsigned int order, gfp_t gfp_flags, int migratetype))
+{
+ up(&gator_meminfo_sem);
+}
+
+static int gator_events_meminfo_create_files(struct super_block *sb, struct dentry *root)
+{
+ struct dentry *dir;
+ int i;
+
+ for (i = 0; i < MEMINFO_TOTAL; i++) {
+ dir = gatorfs_mkdir(sb, root, meminfo_names[i]);
+ if (!dir) {
+ return -1;
+ }
+ gatorfs_create_ulong(sb, dir, "enabled", &meminfo_enabled[i]);
+ gatorfs_create_ro_ulong(sb, dir, "key", &meminfo_keys[i]);
+ }
+
+ for (i = 0; i < PROC_COUNT; ++i) {
+ dir = gatorfs_mkdir(sb, root, proc_names[i]);
+ if (!dir) {
+ return -1;
+ }
+ gatorfs_create_ulong(sb, dir, "enabled", &proc_enabled[i]);
+ gatorfs_create_ro_ulong(sb, dir, "key", &proc_keys[i]);
+ }
+
+ return 0;
+}
+
+static int gator_events_meminfo_start(void)
+{
+ int i;
+
+ new_data_avail = false;
+ meminfo_global_enabled = 0;
+ for (i = 0; i < MEMINFO_TOTAL; i++) {
+ if (meminfo_enabled[i]) {
+ meminfo_global_enabled = 1;
+ break;
+ }
+ }
+
+ proc_global_enabled = 0;
+ for (i = 0; i < PROC_COUNT; ++i) {
+ if (proc_enabled[i]) {
+ proc_global_enabled = 1;
+ break;
+ }
+ }
+ if (meminfo_enabled[MEMINFO_MEMUSED]) {
+ proc_global_enabled = 1;
+ }
+
+ if (meminfo_global_enabled == 0)
+ return 0;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+ if (GATOR_REGISTER_TRACE(mm_page_free_direct))
+#else
+ if (GATOR_REGISTER_TRACE(mm_page_free))
+#endif
+ goto mm_page_free_exit;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+ if (GATOR_REGISTER_TRACE(mm_pagevec_free))
+#else
+ if (GATOR_REGISTER_TRACE(mm_page_free_batched))
+#endif
+ goto mm_page_free_batched_exit;
+ if (GATOR_REGISTER_TRACE(mm_page_alloc))
+ goto mm_page_alloc_exit;
+
+ // Start worker thread
+ gator_meminfo_run = true;
+ // Since the mutex starts unlocked, memory values will be initialized
+ if (IS_ERR(kthread_run(gator_meminfo_func, NULL, "gator_meminfo")))
+ goto kthread_run_exit;
+
+ return 0;
+
+kthread_run_exit:
+ GATOR_UNREGISTER_TRACE(mm_page_alloc);
+mm_page_alloc_exit:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+ GATOR_UNREGISTER_TRACE(mm_pagevec_free);
+#else
+ GATOR_UNREGISTER_TRACE(mm_page_free_batched);
+#endif
+mm_page_free_batched_exit:
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+ GATOR_UNREGISTER_TRACE(mm_page_free_direct);
+#else
+ GATOR_UNREGISTER_TRACE(mm_page_free);
+#endif
+mm_page_free_exit:
+ return -1;
+}
+
+static void gator_events_meminfo_stop(void)
+{
+ if (meminfo_global_enabled) {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0)
+ GATOR_UNREGISTER_TRACE(mm_page_free_direct);
+ GATOR_UNREGISTER_TRACE(mm_pagevec_free);
+#else
+ GATOR_UNREGISTER_TRACE(mm_page_free);
+ GATOR_UNREGISTER_TRACE(mm_page_free_batched);
+#endif
+ GATOR_UNREGISTER_TRACE(mm_page_alloc);
+
+ // Stop worker thread
+ gator_meminfo_run = false;
+ up(&gator_meminfo_sem);
+ }
+}
+
+// Must be run in process context as the kernel function si_meminfo() can sleep
+static int gator_meminfo_func(void *data)
+{
+ struct sysinfo info;
+ int i, len;
+ unsigned long long value;
+
+ for (;;) {
+ if (down_killable(&gator_meminfo_sem)) {
+ break;
+ }
+
+ // Eat up any pending events
+ while (!down_trylock(&gator_meminfo_sem));
+
+ if (!gator_meminfo_run) {
+ break;
+ }
+
+ meminfo_length = len = 0;
+
+ si_meminfo(&info);
+ for (i = 0; i < MEMINFO_TOTAL; i++) {
+ if (meminfo_enabled[i]) {
+ switch (i) {
+ case MEMINFO_MEMFREE:
+ value = info.freeram * PAGE_SIZE;
+ break;
+ case MEMINFO_MEMUSED:
+ // pid -1 means system wide
+ meminfo_buffer[len++] = 1;
+ meminfo_buffer[len++] = -1;
+ // Emit value
+ meminfo_buffer[len++] = meminfo_keys[MEMINFO_MEMUSED];
+ meminfo_buffer[len++] = (info.totalram - info.freeram) * PAGE_SIZE;
+ // Clear pid
+ meminfo_buffer[len++] = 1;
+ meminfo_buffer[len++] = 0;
+ continue;
+ case MEMINFO_BUFFERRAM:
+ value = info.bufferram * PAGE_SIZE;
+ break;
+ default:
+ value = 0;
+ break;
+ }
+ meminfo_buffer[len++] = meminfo_keys[i];
+ meminfo_buffer[len++] = value;
+ }
+ }
+
+ meminfo_length = len;
+ new_data_avail = true;
+ }
+
+ return 0;
+}
+
+static int gator_events_meminfo_read(long long **buffer)
+{
+ if (!on_primary_core() || !meminfo_global_enabled)
+ return 0;
+
+ if (!new_data_avail)
+ return 0;
+
+ new_data_avail = false;
+
+ if (buffer)
+ *buffer = meminfo_buffer;
+
+ return meminfo_length;
+}
+
+static int gator_events_meminfo_read_proc(long long **buffer, struct task_struct *task)
+{
+ struct mm_struct *mm;
+ u64 share = 0;
+ int i;
+ long long value;
+ int len = 0;
+ int cpu = get_physical_cpu();
+ long long *buf = per_cpu(proc_buffer, cpu);
+
+ if (!proc_global_enabled) {
+ return 0;
+ }
+
+ // Collect the memory stats of the process instead of the thread
+ if (task->group_leader != NULL) {
+ task = task->group_leader;
+ }
+
+ // get_task_mm/mmput is not needed in this context because the task and it's mm are required as part of the sched_switch
+ mm = task->mm;
+ if (mm == NULL) {
+ return 0;
+ }
+
+ // Derived from task_statm in fs/proc/task_mmu.c
+ if (meminfo_enabled[MEMINFO_MEMUSED] || proc_enabled[PROC_SHARE]) {
+ share = get_mm_counter(mm,
+#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)
+ file_rss
+#else
+ MM_FILEPAGES
+#endif
+ );
+ }
+
+ // key of 1 indicates a pid
+ buf[len++] = 1;
+ buf[len++] = task->pid;
+
+ for (i = 0; i < PROC_COUNT; ++i) {
+ if (proc_enabled[i]) {
+ switch (i) {
+ case PROC_SIZE:
+ value = mm->total_vm;
+ break;
+ case PROC_SHARE:
+ value = share;
+ break;
+ case PROC_TEXT:
+ value = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT;
+ break;
+ case PROC_DATA:
+ value = mm->total_vm - mm->shared_vm;
+ break;
+ }
+
+ buf[len++] = proc_keys[i];
+ buf[len++] = value * PAGE_SIZE;
+ }
+ }
+
+ if (meminfo_enabled[MEMINFO_MEMUSED]) {
+ value = share + get_mm_counter(mm,
+#if LINUX_VERSION_CODE == KERNEL_VERSION(2, 6, 32)
+ anon_rss
+#else
+ MM_ANONPAGES
+#endif
+ );
+ // Send resident for this pid
+ buf[len++] = meminfo_keys[MEMINFO_MEMUSED];
+ buf[len++] = value * PAGE_SIZE;
+ }
+
+ // Clear pid
+ buf[len++] = 1;
+ buf[len++] = 0;
+
+ if (buffer)
+ *buffer = buf;
+
+ return len;
+}
+
+static struct gator_interface gator_events_meminfo_interface = {
+ .create_files = gator_events_meminfo_create_files,
+ .start = gator_events_meminfo_start,
+ .stop = gator_events_meminfo_stop,
+ .read64 = gator_events_meminfo_read,
+ .read_proc = gator_events_meminfo_read_proc,
+};
+
+int gator_events_meminfo_init(void)
+{
+ int i;
+
+ meminfo_global_enabled = 0;
+ for (i = 0; i < MEMINFO_TOTAL; i++) {
+ meminfo_enabled[i] = 0;
+ meminfo_keys[i] = gator_events_get_key();
+ }
+
+ proc_global_enabled = 0;
+ for (i = 0; i < PROC_COUNT; ++i) {
+ proc_enabled[i] = 0;
+ proc_keys[i] = gator_events_get_key();
+ }
+
+ return gator_events_install(&gator_events_meminfo_interface);
+}
diff --git a/drivers/gator/gator_events_mmapped.c b/drivers/gator/gator_events_mmapped.c
new file mode 100644
index 0000000..f055e48
--- /dev/null
+++ b/drivers/gator/gator_events_mmapped.c
@@ -0,0 +1,209 @@
+/*
+ * Example events provider
+ *
+ * Copyright (C) ARM Limited 2010-2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Similar entries to those below must be present in the events.xml file.
+ * To add them to the events.xml, create an events-mmap.xml with the
+ * following contents and rebuild gatord:
+ *
+ * <counter_set name="mmapped_cnt" count="3"/>
+ * <category name="mmapped" counter_set="mmapped_cnt" per_cpu="no">
+ * <event event="0x0" title="Simulated1" name="Sine" display="maximum" average_selection="yes" description="Sort-of-sine"/>
+ * <event event="0x1" title="Simulated2" name="Triangle" display="maximum" average_selection="yes" description="Triangular wave"/>
+ * <event event="0x2" title="Simulated3" name="PWM" display="maximum" average_selection="yes" description="PWM Signal"/>
+ * </category>
+ *
+ * When adding custom events, be sure do the following
+ * - add any needed .c files to the gator driver Makefile
+ * - call gator_events_install in the events init function
+ * - add the init function to GATOR_EVENTS_LIST in gator_main.c
+ * - add a new events-*.xml file to the gator daemon and rebuild
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/ratelimit.h>
+
+#include "gator.h"
+
+#define MMAPPED_COUNTERS_NUM 3
+
+static int mmapped_global_enabled;
+
+static struct {
+ unsigned long enabled;
+ unsigned long event;
+ unsigned long key;
+} mmapped_counters[MMAPPED_COUNTERS_NUM];
+
+static int mmapped_buffer[MMAPPED_COUNTERS_NUM * 2];
+
+static s64 prev_time;
+
+/* Adds mmapped_cntX directories and enabled, event, and key files to /dev/gator/events */
+static int gator_events_mmapped_create_files(struct super_block *sb,
+ struct dentry *root)
+{
+ int i;
+
+ for (i = 0; i < MMAPPED_COUNTERS_NUM; i++) {
+ char buf[16];
+ struct dentry *dir;
+
+ snprintf(buf, sizeof(buf), "mmapped_cnt%d", i);
+ dir = gatorfs_mkdir(sb, root, buf);
+ if (WARN_ON(!dir))
+ return -1;
+ gatorfs_create_ulong(sb, dir, "enabled",
+ &mmapped_counters[i].enabled);
+ gatorfs_create_ulong(sb, dir, "event",
+ &mmapped_counters[i].event);
+ gatorfs_create_ro_ulong(sb, dir, "key",
+ &mmapped_counters[i].key);
+ }
+
+ return 0;
+}
+
+static int gator_events_mmapped_start(void)
+{