diff options
author | Mark Brown <broonie@linaro.org> | 2014-07-22 23:58:00 +0100 |
---|---|---|
committer | Mark Brown <broonie@linaro.org> | 2014-07-22 23:58:00 +0100 |
commit | c05d3b7ddcf9658c919b22e6d0f0c10640e9a93a (patch) | |
tree | f23f8fd2013e7b5fa04bd55a3f2f0abcf59941c7 | |
parent | 8e8420b5bc8de83907f7473c239b6a2472f493b9 (diff) | |
parent | f396deae7a8c6cc72fffdf2623bd14cc8fc44b4b (diff) |
Merge branch 'linux-linaro-lsk-v3.14' into linux-linaro-lsk-v3.14-android
66 files changed, 7176 insertions, 89 deletions
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt index 3e1e498fea96..a2dbf67c1cb7 100644 --- a/Documentation/devicetree/bindings/arm/pmu.txt +++ b/Documentation/devicetree/bindings/arm/pmu.txt @@ -17,6 +17,9 @@ Required properties: "arm,arm1176-pmu" "arm,arm1136-pmu" - interrupts : 1 combined interrupt or 1 per core. +- cluster : a phandle to the cluster to which it belongs + If there are more than one cluster with same CPU type + then there should be separate PMU nodes per cluster. Example: diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 982d8ab9235a..fad06762f73c 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -312,7 +312,14 @@ dtb-$(CONFIG_ARCH_U300) += ste-u300.dtb dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2p-ca5s.dtb \ vexpress-v2p-ca9.dtb \ vexpress-v2p-ca15-tc1.dtb \ - vexpress-v2p-ca15_a7.dtb + vexpress-v2p-ca15_a7.dtb \ + rtsm_ve-cortex_a9x2.dtb \ + rtsm_ve-cortex_a9x4.dtb \ + rtsm_ve-cortex_a15x1.dtb \ + rtsm_ve-cortex_a15x2.dtb \ + rtsm_ve-cortex_a15x4.dtb \ + rtsm_ve-v2p-ca15x1-ca7x1.dtb \ + rtsm_ve-v2p-ca15x4-ca7x4.dtb dtb-$(CONFIG_ARCH_VIRT) += xenvm-4.2.dtb dtb-$(CONFIG_ARCH_VT8500) += vt8500-bv07.dtb \ wm8505-ref.dtb \ diff --git a/arch/arm/boot/dts/clcd-panels.dtsi b/arch/arm/boot/dts/clcd-panels.dtsi new file mode 100644 index 000000000000..0b0ff6ead4b2 --- /dev/null +++ b/arch/arm/boot/dts/clcd-panels.dtsi @@ -0,0 +1,52 @@ +/* + * ARM Ltd. Versatile Express + * + */ + +/ { + panels { + panel@0 { + compatible = "panel"; + mode = "VGA"; + refresh = <60>; + xres = <640>; + yres = <480>; + pixclock = <39721>; + left_margin = <40>; + right_margin = <24>; + upper_margin = <32>; + lower_margin = <11>; + hsync_len = <96>; + vsync_len = <2>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + + panel@1 { + compatible = "panel"; + mode = "XVGA"; + refresh = <60>; + xres = <1024>; + yres = <768>; + pixclock = <15748>; + left_margin = <152>; + right_margin = <48>; + upper_margin = <23>; + lower_margin = <3>; + hsync_len = <104>; + vsync_len = <4>; + sync = <0>; + vmode = "FB_VMODE_NONINTERLACED"; + + tim2 = "TIM2_BCD", "TIM2_IPC"; + cntl = "CNTL_LCDTFT", "CNTL_BGR", "CNTL_LCDVCOMP(1)"; + caps = "CLCD_CAP_5551", "CLCD_CAP_565", "CLCD_CAP_888"; + bpp = <16>; + }; + }; +}; diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts new file mode 100644 index 000000000000..c9eee916aa7e --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x1.dts @@ -0,0 +1,159 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x1CT + * + * RTSM_VE_Cortex_A15x1.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x1"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x1", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts new file mode 100644 index 000000000000..853a166e3c32 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x2.dts @@ -0,0 +1,165 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x2CT + * + * RTSM_VE_Cortex_A15x2.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x2"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x2", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts new file mode 100644 index 000000000000..c1947a3a5c88 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a15x4.dts @@ -0,0 +1,177 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * + * RTSM_VE_Cortex_A15x4.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA15x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <3>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts new file mode 100644 index 000000000000..fca6b2f79677 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x2.dts @@ -0,0 +1,171 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA9MPx2CT + * + * RTSM_VE_Cortex_A9x2.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA9x2"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a9x2", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <1>; + #size-cells = <1>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <1>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x80000000>; + }; + + scu@2c000000 { + compatible = "arm,cortex-a9-scu"; + reg = <0x2c000000 0x58>; + }; + + timer@2c000600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x2c000600 0x20>; + interrupts = <1 13 0xf04>; + }; + + watchdog@2c000620 { + compatible = "arm,cortex-a9-twd-wdt"; + reg = <0x2c000620 0x20>; + interrupts = <1 14 0xf04>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x2c001000 0x1000>, + <0x2c000100 0x100>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0x08000000 0x04000000>, + <1 0 0x14000000 0x04000000>, + <2 0 0x18000000 0x04000000>, + <3 0 0x1c000000 0x04000000>, + <4 0 0x0c000000 0x04000000>, + <5 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts new file mode 100644 index 000000000000..fd8a6ed97a04 --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-cortex_a9x4.dts @@ -0,0 +1,183 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA9MPx4CT + * + * RTSM_VE_Cortex_A9x4.lisa + */ + +/dts-v1/; + +/ { + model = "RTSM_VE_CortexA9x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a9x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <1>; + #size-cells = <1>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <1>; + }; + + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a9"; + reg = <3>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x80000000>; + }; + + scu@2c000000 { + compatible = "arm,cortex-a9-scu"; + reg = <0x2c000000 0x58>; + }; + + timer@2c000600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x2c000600 0x20>; + interrupts = <1 13 0xf04>; + }; + + watchdog@2c000620 { + compatible = "arm,cortex-a9-twd-wdt"; + reg = <0x2c000620 0x20>; + interrupts = <1 14 0xf04>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0x2c001000 0x1000>, + <0x2c000100 0x100>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0x08000000 0x04000000>, + <1 0 0x14000000 0x04000000>, + <2 0 0x18000000 0x04000000>, + <3 0 0x1c000000 0x04000000>, + <4 0 0x0c000000 0x04000000>, + <5 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi new file mode 100644 index 000000000000..a2d895ee5faa --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-motherboard.dtsi @@ -0,0 +1,231 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * Motherboard component + * + * VEMotherBoard.lisa + */ + + motherboard { + compatible = "arm,vexpress,v2m-p1", "simple-bus"; + arm,hbi = <0x190>; + arm,vexpress,site = <0>; + arm,v2m-memory-map = "rs1"; + #address-cells = <2>; /* SMB chipselect number and offset */ + #size-cells = <1>; + #interrupt-cells = <1>; + ranges; + + flash@0,00000000 { + compatible = "arm,vexpress-flash", "cfi-flash"; + reg = <0 0x00000000 0x04000000>, + <4 0x00000000 0x04000000>; + bank-width = <4>; + }; + + vram@2,00000000 { + compatible = "arm,vexpress-vram"; + reg = <2 0x00000000 0x00800000>; + }; + + ethernet@2,02000000 { + compatible = "smsc,lan91c111"; + reg = <2 0x02000000 0x10000>; + interrupts = <15>; + }; + + iofpga@3,00000000 { + compatible = "arm,amba-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 3 0 0x200000>; + + v2m_sysreg: sysreg@010000 { + compatible = "arm,vexpress-sysreg"; + reg = <0x010000 0x1000>; + gpio-controller; + #gpio-cells = <2>; + }; + + v2m_sysctl: sysctl@020000 { + compatible = "arm,sp810", "arm,primecell"; + reg = <0x020000 0x1000>; + clocks = <&v2m_refclk32khz>, <&v2m_refclk1mhz>, <&smbclk>; + clock-names = "refclk", "timclk", "apb_pclk"; + #clock-cells = <1>; + clock-output-names = "timerclken0", "timerclken1", "timerclken2", "timerclken3"; + }; + + aaci@040000 { + compatible = "arm,pl041", "arm,primecell"; + reg = <0x040000 0x1000>; + interrupts = <11>; + clocks = <&smbclk>; + clock-names = "apb_pclk"; + }; + + mmci@050000 { + compatible = "arm,pl180", "arm,primecell"; + reg = <0x050000 0x1000>; + interrupts = <9 10>; + cd-gpios = <&v2m_sysreg 0 0>; + wp-gpios = <&v2m_sysreg 1 0>; + max-frequency = <12000000>; + vmmc-supply = <&v2m_fixed_3v3>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "mclk", "apb_pclk"; + }; + + kmi@060000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x060000 0x1000>; + interrupts = <12>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + kmi@070000 { + compatible = "arm,pl050", "arm,primecell"; + reg = <0x070000 0x1000>; + interrupts = <13>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "KMIREFCLK", "apb_pclk"; + }; + + v2m_serial0: uart@090000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x090000 0x1000>; + interrupts = <5>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial1: uart@0a0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0a0000 0x1000>; + interrupts = <6>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial2: uart@0b0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0b0000 0x1000>; + interrupts = <7>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + v2m_serial3: uart@0c0000 { + compatible = "arm,pl011", "arm,primecell"; + reg = <0x0c0000 0x1000>; + interrupts = <8>; + clocks = <&v2m_clk24mhz>, <&smbclk>; + clock-names = "uartclk", "apb_pclk"; + }; + + wdt@0f0000 { + compatible = "arm,sp805", "arm,primecell"; + reg = <0x0f0000 0x1000>; + interrupts = <0>; + clocks = <&v2m_refclk32khz>, <&smbclk>; + clock-names = "wdogclk", "apb_pclk"; + }; + + v2m_timer01: timer@110000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x110000 0x1000>; + interrupts = <2>; + clocks = <&v2m_sysctl 0>, <&v2m_sysctl 1>, <&smbclk>; + clock-names = "timclken1", "timclken2", "apb_pclk"; + }; + + v2m_timer23: timer@120000 { + compatible = "arm,sp804", "arm,primecell"; + reg = <0x120000 0x1000>; + interrupts = <3>; + clocks = <&v2m_sysctl 2>, <&v2m_sysctl 3>, <&smbclk>; + clock-names = "timclken1", "timclken2", "apb_pclk"; + }; + + rtc@170000 { + compatible = "arm,pl031", "arm,primecell"; + reg = <0x170000 0x1000>; + interrupts = <4>; + clocks = <&smbclk>; + clock-names = "apb_pclk"; + }; + + clcd@1f0000 { + compatible = "arm,pl111", "arm,primecell"; + reg = <0x1f0000 0x1000>; + interrupts = <14>; + clocks = <&v2m_oscclk1>, <&smbclk>; + clock-names = "v2m:oscclk1", "apb_pclk"; + mode = "VGA"; + use_dma = <0>; + framebuffer = <0x18000000 0x00180000>; + }; + + virtio_block@0130000 { + compatible = "virtio,mmio"; + reg = <0x130000 0x200>; + interrupts = <42>; + }; + + }; + + v2m_fixed_3v3: fixedregulator@0 { + compatible = "regulator-fixed"; + regulator-name = "3V3"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + v2m_clk24mhz: clk24mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <24000000>; + clock-output-names = "v2m:clk24mhz"; + }; + + v2m_refclk1mhz: refclk1mhz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <1000000>; + clock-output-names = "v2m:refclk1mhz"; + }; + + v2m_refclk32khz: refclk32khz { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <32768>; + clock-output-names = "v2m:refclk32khz"; + }; + + mcc { + compatible = "simple-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + v2m_oscclk1: osc@1 { + /* CLCD clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <23750000 63500000>; + #clock-cells = <0>; + clock-output-names = "v2m:oscclk1"; + }; + + muxfpga@0 { + compatible = "arm,vexpress-muxfpga"; + arm,vexpress-sysreg,func = <7 0>; + }; + + shutdown@0 { + compatible = "arm,vexpress-shutdown"; + arm,vexpress-sysreg,func = <8 0>; + }; + }; + }; diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts new file mode 100644 index 000000000000..c59f4b5cdf9e --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x1-ca7x1.dts @@ -0,0 +1,233 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * ARMCortexA7x4CT + * RTSM_VE_Cortex_A15x1_A7x1.lisa + */ + +/dts-v1/; + +/memreserve/ 0xff000000 0x01000000; + +/ { + model = "RTSM_VE_CortexA15x1-A7x1"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x1_a7x1", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + clusters { + #address-cells = <1>; + #size-cells = <0>; + + cluster0: cluster@0 { + reg = <0>; +// freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + }; + }; + + cluster1: cluster@1 { + reg = <1>; +// freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core1: core@0 { + reg = <0>; + }; + + }; + }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cluster = <&cluster0>; + core = <&core0>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + cluster = <&cluster1>; + core = <&core1>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + cci@2c090000 { + compatible = "arm,cci-400", "arm,cci"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0 0x2c090000 0 0x1000>; + ranges = <0x0 0x0 0x2c090000 0x10000>; + + cci_control1: slave-if@4000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x4000 0x1000>; + }; + + cci_control2: slave-if@5000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x5000 0x1000>; + }; + }; + + dcscb@60000000 { + compatible = "arm,rtsm,dcscb"; + reg = <0 0x60000000 0 0x1000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts new file mode 100644 index 000000000000..5f1e727be28c --- /dev/null +++ b/arch/arm/boot/dts/rtsm_ve-v2p-ca15x4-ca7x4.dts @@ -0,0 +1,317 @@ +/* + * ARM Ltd. Fast Models + * + * Versatile Express (VE) system model + * ARMCortexA15x4CT + * ARMCortexA7x4CT + * RTSM_VE_Cortex_A15x4_A7x4.lisa + */ + +/dts-v1/; + +/memreserve/ 0xff000000 0x01000000; + +/ { + model = "RTSM_VE_CortexA15x4-A7x4"; + arm,vexpress,site = <0xf>; + compatible = "arm,rtsm_ve,cortex_a15x4_a7x4", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + }; + + clusters { + #address-cells = <1>; + #size-cells = <0>; + + cluster0: cluster@0 { + reg = <0>; +// freqs = <500000000 600000000 700000000 800000000 900000000 1000000000 1100000000 1200000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + core1: core@1 { + reg = <1>; + }; + + core2: core@2 { + reg = <2>; + }; + + core3: core@3 { + reg = <3>; + }; + + }; + }; + + cluster1: cluster@1 { + reg = <1>; +// freqs = <350000000 400000000 500000000 600000000 700000000 800000000 900000000 1000000000>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core4: core@0 { + reg = <0>; + }; + + core5: core@1 { + reg = <1>; + }; + + core6: core@2 { + reg = <2>; + }; + + core7: core@3 { + reg = <3>; + }; + + }; + }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cluster = <&cluster0>; + core = <&core0>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + cluster = <&cluster0>; + core = <&core1>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu2: cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <2>; + cluster = <&cluster0>; + core = <&core2>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu3: cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <3>; + cluster = <&cluster0>; + core = <&core3>; +// clock-frequency = <1000000000>; + cci-control-port = <&cci_control1>; + }; + + cpu4: cpu@4 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x100>; + cluster = <&cluster1>; + core = <&core4>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu5: cpu@5 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x101>; + cluster = <&cluster1>; + core = <&core5>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu6: cpu@6 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x102>; + cluster = <&cluster1>; + core = <&core6>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + + cpu7: cpu@7 { + device_type = "cpu"; + compatible = "arm,cortex-a7"; + reg = <0x103>; + cluster = <&cluster1>; + core = <&core7>; +// clock-frequency = <800000000>; + cci-control-port = <&cci_control2>; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; + }; + + cci@2c090000 { + compatible = "arm,cci-400", "arm,cci"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0 0x2c090000 0 0x1000>; + ranges = <0x0 0x0 0x2c090000 0x10000>; + + cci_control1: slave-if@4000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x4000 0x1000>; + }; + + cci_control2: slave-if@5000 { + compatible = "arm,cci-400-ctrl-if"; + interface-type = "ace"; + reg = <0x5000 0x1000>; + }; + }; + + dcscb@60000000 { + compatible = "arm,rtsm,dcscb"; + reg = <0 0x60000000 0 0x1000>; + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,cortex-a15-gic", "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x1000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = <1 9 0xf04>; + }; + + timer { + compatible = "arm,armv7-timer"; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + osc@0 { + /* ACLK clock to the AXI master port on the test chip */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 0>; + freq-range = <30000000 50000000>; + #clock-cells = <0>; + clock-output-names = "extsaxiclk"; + }; + + oscclk1: osc@1 { + /* Reference clock for the CLCD */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 1>; + freq-range = <10000000 80000000>; + #clock-cells = <0>; + clock-output-names = "clcdclk"; + }; + + smbclk: oscclk2: osc@2 { + /* Reference clock for the test chip internal PLLs */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 2>; + freq-range = <33000000 100000000>; + #clock-cells = <0>; + clock-output-names = "tcrefclk"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic 0 0 4>, + <0 0 1 &gic 0 1 4>, + <0 0 2 &gic 0 2 4>, + <0 0 3 &gic 0 3 4>, + <0 0 4 &gic 0 4 4>, + <0 0 5 &gic 0 5 4>, + <0 0 6 &gic 0 6 4>, + <0 0 7 &gic 0 7 4>, + <0 0 8 &gic 0 8 4>, + <0 0 9 &gic 0 9 4>, + <0 0 10 &gic 0 10 4>, + <0 0 11 &gic 0 11 4>, + <0 0 12 &gic 0 12 4>, + <0 0 13 &gic 0 13 4>, + <0 0 14 &gic 0 14 4>, + <0 0 15 &gic 0 15 4>, + <0 0 16 &gic 0 16 4>, + <0 0 17 &gic 0 17 4>, + <0 0 18 &gic 0 18 4>, + <0 0 19 &gic 0 19 4>, + <0 0 20 &gic 0 20 4>, + <0 0 21 &gic 0 21 4>, + <0 0 22 &gic 0 22 4>, + <0 0 23 &gic 0 23 4>, + <0 0 24 &gic 0 24 4>, + <0 0 25 &gic 0 25 4>, + <0 0 26 &gic 0 26 4>, + <0 0 27 &gic 0 27 4>, + <0 0 28 &gic 0 28 4>, + <0 0 29 &gic 0 29 4>, + <0 0 30 &gic 0 30 4>, + <0 0 31 &gic 0 31 4>, + <0 0 32 &gic 0 32 4>, + <0 0 33 &gic 0 33 4>, + <0 0 34 &gic 0 34 4>, + <0 0 35 &gic 0 35 4>, + <0 0 36 &gic 0 36 4>, + <0 0 37 &gic 0 37 4>, + <0 0 38 &gic 0 38 4>, + <0 0 39 &gic 0 39 4>, + <0 0 40 &gic 0 40 4>, + <0 0 41 &gic 0 41 4>, + <0 0 42 &gic 0 42 4>; + + /include/ "rtsm_ve-motherboard.dtsi" + }; +}; + +/include/ "clcd-panels.dtsi" diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi index ac870fb3fa0d..9584232ee6b6 100644 --- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi @@ -228,6 +228,7 @@ }; clcd@1f0000 { + status = "disabled"; compatible = "arm,pl111", "arm,primecell"; reg = <0x1f0000 0x1000>; interrupts = <14>; diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi b/arch/arm/boot/dts/vexpress-v2m.dtsi index f1420368355b..6593398c11ae 100644 --- a/arch/arm/boot/dts/vexpress-v2m.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m.dtsi @@ -227,6 +227,7 @@ }; clcd@1f000 { + status = "disabled"; compatible = "arm,pl111", "arm,primecell"; reg = <0x1f000 0x1000>; interrupts = <14>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts index 9420053acc14..7cbe38cecf6c 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15-tc1.dts @@ -9,6 +9,8 @@ /dts-v1/; +/memreserve/ 0xbf000000 0x01000000; + / { model = "V2P-CA15"; arm,hbi = <0x237>; @@ -57,6 +59,8 @@ interrupts = <0 85 4>; clocks = <&oscclk5>; clock-names = "pxlclk"; + mode = "1024x768-16@60"; + framebuffer = <0 0xbf000000 0 0x01000000>; }; memory-controller@2b0a0000 { diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index 15f98cbcb75a..e56356a249cd 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -9,6 +9,8 @@ /dts-v1/; +/memreserve/ 0xff000000 0x01000000; + / { model = "V2P-CA15_CA7"; arm,hbi = <0x249>; @@ -29,29 +31,60 @@ i2c1 = &v2m_i2c_pcie; }; - cpus { + clusters { #address-cells = <1>; #size-cells = <0>; - cpu0: cpu@0 { - device_type = "cpu"; - compatible = "arm,cortex-a15"; + cluster0: cluster@0 { reg = <0>; - cci-control-port = <&cci_control1>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core0: core@0 { + reg = <0>; + }; + + core1: core@1 { + reg = <1>; + }; + + }; }; - cpu1: cpu@1 { - device_type = "cpu"; - compatible = "arm,cortex-a15"; + cluster1: cluster@1 { reg = <1>; - cci-control-port = <&cci_control1>; + cores { + #address-cells = <1>; + #size-cells = <0>; + + core2: core@0 { + reg = <0>; + }; + + core3: core@1 { + reg = <1>; + }; + + core4: core@2 { + reg = <2>; + }; + }; }; + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; cpu2: cpu@2 { device_type = "cpu"; compatible = "arm,cortex-a7"; reg = <0x100>; cci-control-port = <&cci_control2>; + cluster = <&cluster1>; + core = <&core2>; + clock-frequency = <800000000>; }; cpu3: cpu@3 { @@ -59,6 +92,9 @@ compatible = "arm,cortex-a7"; reg = <0x101>; cci-control-port = <&cci_control2>; + cluster = <&cluster1>; + core = <&core3>; + clock-frequency = <800000000>; }; cpu4: cpu@4 { @@ -66,12 +102,35 @@ compatible = "arm,cortex-a7"; reg = <0x102>; cci-control-port = <&cci_control2>; + cluster = <&cluster1>; + core = <&core4>; + clock-frequency = <800000000>; + }; + + cpu0: cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <0>; + cci-control-port = <&cci_control1>; + cluster = <&cluster0>; + core = <&core0>; + clock-frequency = <1000000000>; + }; + + cpu1: cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a15"; + reg = <1>; + cci-control-port = <&cci_control1>; + cluster = <&cluster0>; + core = <&core1>; + clock-frequency = <1000000000>; }; }; memory@80000000 { device_type = "memory"; - reg = <0 0x80000000 0 0x40000000>; + reg = <0 0x80000000 0 0x80000000>; }; wdt@2a490000 { @@ -86,6 +145,8 @@ compatible = "arm,hdlcd"; reg = <0 0x2b000000 0 0x1000>; interrupts = <0 85 4>; + mode = "1024x768-16@60"; + framebuffer = <0 0xff000000 0 0x01000000>; clocks = <&oscclk5>; clock-names = "pxlclk"; }; @@ -127,6 +188,16 @@ interface-type = "ace"; reg = <0x5000 0x1000>; }; + + pmu@9000 { + compatible = "arm,cci-400-pmu"; + reg = <0x9000 0x5000>; + interrupts = <0 101 4>, + <0 102 4>, + <0 103 4>, + <0 104 4>, + <0 105 4>; + }; }; memory-controller@7ffd0000 { @@ -164,12 +235,21 @@ <1 10 0xf08>; }; - pmu { + pmu_a15 { compatible = "arm,cortex-a15-pmu"; + cluster = <&cluster0>; interrupts = <0 68 4>, <0 69 4>; }; + pmu_a7 { + compatible = "arm,cortex-a7-pmu"; + cluster = <&cluster1>; + interrupts = <0 128 4>, + <0 129 4>, + <0 130 4>; + }; + oscclk6a: oscclk6a { /* Reference 24MHz clock */ compatible = "fixed-clock"; @@ -178,6 +258,19 @@ clock-output-names = "oscclk6a"; }; +/* PSCI requires support from firmware and is not present in the normal TC2 + * distribution, so this node is commented out by default... + + psci { + compatible = "arm,psci"; + method = "smc"; + cpu_suspend = <0x80100001>; + cpu_off = <0x80100002>; + cpu_on = <0x80100003>; + migrate = <0x80100004>; + }; +*/ + dcc { compatible = "arm,vexpress,config-bus"; arm,vexpress,config-bridge = <&v2m_sysreg>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts index c544a5504591..cf633ed6a1b4 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca5s.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca5s.dts @@ -9,6 +9,8 @@ /dts-v1/; +/memreserve/ 0xbf000000 0x01000000; + / { model = "V2P-CA5s"; arm,hbi = <0x225>; @@ -59,6 +61,8 @@ interrupts = <0 85 4>; clocks = <&oscclk3>; clock-names = "pxlclk"; + mode = "640x480-16@60"; + framebuffer = <0xbf000000 0x01000000>; }; memory-controller@2a150000 { diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts index 62d9b225dcce..f83706bd3f9a 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts @@ -9,6 +9,8 @@ /dts-v1/; +/include/ "clcd-panels.dtsi" + / { model = "V2P-CA9"; arm,hbi = <0x191>; @@ -73,6 +75,8 @@ interrupts = <0 44 4>; clocks = <&oscclk1>, <&oscclk2>; clock-names = "clcdclk", "apb_pclk"; + mode = "XVGA"; + use_dma = <1>; }; memory-controller@100e0000 { diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 4ff5d4c226bc..14e592312033 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_SHARP_SCOOP) += scoop.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o +CFLAGS_REMOVE_mcpm_entry.o = -pg AFLAGS_mcpm_head.o := -march=armv7-a AFLAGS_vlock.o := -march=armv7-a obj-$(CONFIG_TI_PRIV_EDMA) += edma.o diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 0704e0cf5571..be467f3780dd 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -107,7 +107,6 @@ static inline void arch_counter_set_user_access(void) /* Also disable virtual event stream */ cntkctl &= ~(ARCH_TIMER_USR_PT_ACCESS_EN | ARCH_TIMER_USR_VT_ACCESS_EN - | ARCH_TIMER_VIRT_EVT_EN | ARCH_TIMER_USR_VCT_ACCESS_EN | ARCH_TIMER_USR_PCT_ACCESS_EN); arch_timer_set_cntkctl(cntkctl); diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index f24edad26c70..0cd7824ca762 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -62,9 +62,19 @@ struct pmu_hw_events { raw_spinlock_t pmu_lock; }; +struct cpupmu_regs { + u32 pmc; + u32 pmcntenset; + u32 pmuseren; + u32 pmintenset; + u32 pmxevttype[8]; + u32 pmxevtcnt[8]; +}; + struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; + cpumask_t valid_cpus; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct perf_event *event); @@ -81,6 +91,8 @@ struct arm_pmu { int (*request_irq)(struct arm_pmu *, irq_handler_t handler); void (*free_irq)(struct arm_pmu *); int (*map_event)(struct perf_event *event); + void (*save_regs)(struct arm_pmu *, struct cpupmu_regs *); + void (*restore_regs)(struct arm_pmu *, struct cpupmu_regs *); int num_events; atomic_t active_events; struct mutex reserve_mutex; diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index c4ae171850f8..eb628a6679e8 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -16,6 +16,10 @@ #define PSCI_POWER_STATE_TYPE_STANDBY 0 #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 +#define PSCI_POWER_STATE_AFFINITY_LEVEL0 0 +#define PSCI_POWER_STATE_AFFINITY_LEVEL1 1 +#define PSCI_POWER_STATE_AFFINITY_LEVEL2 2 +#define PSCI_POWER_STATE_AFFINITY_LEVEL3 3 struct psci_power_state { u16 id; @@ -42,4 +46,12 @@ static inline void psci_init(void) { } static inline bool psci_smp_available(void) { return false; } #endif +#ifdef CONFIG_ARM_PSCI +extern int psci_probe(void); +#else +static inline int psci_probe(void) +{ + return -ENODEV; +} +#endif #endif /* __ASM_ARM_PSCI_H */ diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 58b8b84adcd2..611edefaeaf1 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -26,11 +26,14 @@ extern struct cputopo_arm cpu_topology[NR_CPUS]; void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask); #else static inline void init_cpu_topology(void) { } static inline void store_cpu_topology(unsigned int cpuid) { } +static inline int cluster_to_logical_mask(unsigned int socket_id, + cpumask_t *cluster_mask) { return -EINVAL; } #endif diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 789d846a9184..42059a54a9c0 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -12,6 +12,7 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt +#include <linux/cpumask.h> #include <linux/kernel.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> @@ -86,6 +87,9 @@ armpmu_map_event(struct perf_event *event, return armpmu_map_cache_event(cache_map, config); case PERF_TYPE_RAW: return armpmu_map_raw_event(raw_event_mask, config); + default: + if (event->attr.type >= PERF_TYPE_MAX) + return armpmu_map_raw_event(raw_event_mask, config); } return -ENOENT; @@ -159,6 +163,8 @@ armpmu_stop(struct perf_event *event, int flags) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; /* * ARM pmu always has to update the counter, so ignore * PERF_EF_UPDATE, see comments in armpmu_start(). @@ -175,6 +181,8 @@ static void armpmu_start(struct perf_event *event, int flags) struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; /* * ARM pmu always has to reprogram the period, so ignore * PERF_EF_RELOAD, see the comment below. @@ -202,6 +210,9 @@ armpmu_del(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return; + armpmu_stop(event, PERF_EF_UPDATE); hw_events->events[idx] = NULL; clear_bit(idx, hw_events->used_mask); @@ -218,6 +229,10 @@ armpmu_add(struct perf_event *event, int flags) int idx; int err = 0; + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->valid_cpus)) + return 0; + perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ @@ -419,6 +434,10 @@ static int armpmu_event_init(struct perf_event *event) int err = 0; atomic_t *active_events = &armpmu->active_events; + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->valid_cpus)) + return -ENOENT; + /* does not support taken branch sampling */ if (has_branch_stack(event)) return -EOPNOTSUPP; diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index 20d553c9f5e2..31c5cdefae3f 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -19,6 +19,7 @@ #define pr_fmt(fmt) "CPU PMU: " fmt #include <linux/bitmap.h> +#include <linux/cpu_pm.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/of.h> @@ -31,33 +32,36 @@ #include <asm/pmu.h> /* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *cpu_pmu; +static DEFINE_PER_CPU(struct arm_pmu *, cpu_pmu); static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); +static DEFINE_PER_CPU(struct cpupmu_regs, cpu_pmu_regs); + /* * Despite the names, these two functions are CPU-specific and are used * by the OProfile/perf code. */ const char *perf_pmu_name(void) { - if (!cpu_pmu) + struct arm_pmu *pmu = per_cpu(cpu_pmu, 0); + if (!pmu) return NULL; - return cpu_pmu->name; + return pmu->name; } EXPORT_SYMBOL_GPL(perf_pmu_name); int perf_num_counters(void) { - int max_events = 0; + struct arm_pmu *pmu = per_cpu(cpu_pmu, 0); - if (cpu_pmu != NULL) - max_events = cpu_pmu->num_events; + if (!pmu) + return 0; - return max_events; + return pmu->num_events; } EXPORT_SYMBOL_GPL(perf_num_counters); @@ -75,11 +79,13 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) { int i, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; + int cpu = -1; irqs = min(pmu_device->num_resources, num_possible_cpus()); for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) + cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus); + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) continue; irq = platform_get_irq(pmu_device, i); if (irq >= 0) @@ -91,6 +97,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) { int i, err, irq, irqs; struct platform_device *pmu_device = cpu_pmu->plat_device; + int cpu = -1; if (!pmu_device) return -ENODEV; @@ -103,6 +110,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) for (i = 0; i < irqs; ++i) { err = 0; + cpu = cpumask_next(cpu, &cpu_pmu->valid_cpus); irq = platform_get_irq(pmu_device, i); if (irq < 0) continue; @@ -112,7 +120,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) * assume that we're running on a uniprocessor machine and * continue. Otherwise, continue without this interrupt. */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", irq, i); continue; @@ -127,7 +135,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) return err; } - cpumask_set_cpu(i, &cpu_pmu->active_irqs); + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); } return 0; @@ -136,7 +144,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) static void cpu_pmu_init(struct arm_pmu *cpu_pmu) { int cpu; - for_each_possible_cpu(cpu) { + for_each_cpu_mask(cpu, cpu_pmu->valid_cpus) { struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); events->events = per_cpu(hw_events, cpu); events->used_mask = per_cpu(used_mask, cpu); @@ -149,7 +157,7 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) /* Ensure the PMU has sane values out of reset. */ if (cpu_pmu->reset) - on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); + on_each_cpu_mask(&cpu_pmu->valid_cpus, cpu_pmu->reset, cpu_pmu, 1); } /* @@ -161,21 +169,46 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu) static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, void *hcpu) { + struct arm_pmu *pmu = per_cpu(cpu_pmu, (long)hcpu); + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) return NOTIFY_DONE; - if (cpu_pmu && cpu_pmu->reset) - cpu_pmu->reset(cpu_pmu); + if (pmu && pmu->reset) + pmu->reset(pmu); else return NOTIFY_DONE; return NOTIFY_OK; } +static int cpu_pmu_pm_notify(struct notifier_block *b, + unsigned long action, void *hcpu) +{ + int cpu = smp_processor_id(); + struct arm_pmu *pmu = per_cpu(cpu_pmu, cpu); + struct cpupmu_regs *pmuregs = &per_cpu(cpu_pmu_regs, cpu); + + if (!pmu) + return NOTIFY_DONE; + + if (action == CPU_PM_ENTER && pmu->save_regs) { + pmu->save_regs(pmu, pmuregs); + } else if (action == CPU_PM_EXIT && pmu->restore_regs) { + pmu->restore_regs(pmu, pmuregs); + } + + return NOTIFY_OK; +} + static struct notifier_block cpu_pmu_hotplug_notifier = { .notifier_call = cpu_pmu_notify, }; +static struct notifier_block cpu_pmu_pm_notifier = { + .notifier_call = cpu_pmu_pm_notify, +}; + /* * PMU platform driver and devicetree bindings. */ @@ -247,6 +280,9 @@ static int probe_current_pmu(struct arm_pmu *pmu) } } + /* assume PMU support all the CPUs in this case */ + cpumask_setall(&pmu->valid_cpus); + put_cpu(); return ret; } @@ -254,15 +290,10 @@ static int probe_current_pmu(struct arm_pmu *pmu) static int cpu_pmu_device_probe(struct platform_device *pdev) { const struct of_device_id *of_id; - const int (*init_fn)(struct arm_pmu *); struct device_node *node = pdev->dev.of_node; struct arm_pmu *pmu; - int ret = -ENODEV; - - if (cpu_pmu) { - pr_info("attempt to register multiple PMU devices!"); - return -ENOSPC; - } + int ret = 0; + int cpu; pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); if (!pmu) { @@ -271,8 +302,28 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) } if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { - init_fn = of_id->data; - ret = init_fn(pmu); + smp_call_func_t init_fn = (smp_call_func_t)of_id->data; + struct device_node *ncluster; + int cluster = -1; + cpumask_t sibling_mask; + + ncluster = of_parse_phandle(node, "cluster", 0); + if (ncluster) { + int len; + const u32 *hwid; + hwid = of_get_property(ncluster, "reg", &len); + if (hwid && len == 4) + cluster = be32_to_cpup(hwid); + } + /* set sibling mask to all cpu mask if socket is not specified */ + if (cluster == -1 || + cluster_to_logical_mask(cluster, &sibling_mask)) + cpumask_setall(&sibling_mask); + + smp_call_function_any(&sibling_mask, init_fn, pmu, 1); + + /* now set the valid_cpus after init */ + cpumask_copy(&pmu->valid_cpus, &sibling_mask); } else { ret = probe_current_pmu(pmu); } @@ -282,10 +333,12 @@ static int cpu_pmu_device_probe(struct platform_device *pdev) goto out_free; } - cpu_pmu = pmu; - cpu_pmu->plat_device = pdev; - cpu_pmu_init(cpu_pmu); - ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW); + for_each_cpu_mask(cpu, pmu->valid_cpus) + per_cpu(cpu_pmu, cpu) = pmu; + + pmu->plat_device = pdev; + cpu_pmu_init(pmu); + ret = armpmu_register(pmu, -1); if (!ret) return 0; @@ -314,9 +367,17 @@ static int __init register_pmu_driver(void) if (err) return err; + err = cpu_pm_register_notifier(&cpu_pmu_pm_notifier); + if (err) { + unregister_cpu_notifier(&cpu_pmu_hotplug_notifier); + return err; + } + err = platform_driver_register(&cpu_pmu_driver); - if (err) + if (err) { + cpu_pm_unregister_notifier(&cpu_pmu_pm_notifier); unregister_cpu_notifier(&cpu_pmu_hotplug_notifier); + } return err; } diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 039cffb053a7..654db5030c31 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -950,6 +950,51 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) } #endif +static void armv7pmu_save_regs(struct arm_pmu *cpu_pmu, + struct cpupmu_regs *regs) +{ + unsigned int cnt; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (regs->pmc)); + if (!(regs->pmc & ARMV7_PMNC_E)) + return; + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (regs->pmcntenset)); + asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r" (regs->pmuseren)); + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (regs->pmintenset)); + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (regs->pmxevtcnt[0])); + for (cnt = ARMV7_IDX_COUNTER0; + cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 1" + : "=r"(regs->pmxevttype[cnt])); + asm volatile("mrc p15, 0, %0, c9, c13, 2" + : "=r"(regs->pmxevtcnt[cnt])); + } + return; +} + +static void armv7pmu_restore_regs(struct arm_pmu *cpu_pmu, + struct cpupmu_regs *regs) +{ + unsigned int cnt; + if (!(regs->pmc & ARMV7_PMNC_E)) + return; + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (regs->pmcntenset)); + asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r" (regs->pmuseren)); + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (regs->pmintenset)); + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (regs->pmxevtcnt[0])); + for (cnt = ARMV7_IDX_COUNTER0; + cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mcr p15, 0, %0, c9, c13, 1" + : : "r"(regs->pmxevttype[cnt])); + asm volatile("mcr p15, 0, %0, c9, c13, 2" + : : "r"(regs->pmxevtcnt[cnt])); + } + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (regs->pmc)); +} + static void armv7pmu_enable_event(struct perf_event *event) { unsigned long flags; @@ -1223,6 +1268,8 @@ static void armv7pmu_init(struct arm_pmu *cpu_pmu) cpu_pmu->start = armv7pmu_start; cpu_pmu->stop = armv7pmu_stop; cpu_pmu->reset = armv7pmu_reset; + cpu_pmu->save_regs = armv7pmu_save_regs; + cpu_pmu->restore_regs = armv7pmu_restore_regs; cpu_pmu->max_period = (1LLU << 32) - 1; }; @@ -1240,7 +1287,7 @@ static u32 armv7_read_num_pmnc_events(void) static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A8"; + cpu_pmu->name = "ARMv7_Cortex_A8"; cpu_pmu->map_event = armv7_a8_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1249,7 +1296,7 @@ static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A9"; + cpu_pmu->name = "ARMv7_Cortex_A9"; cpu_pmu->map_event = armv7_a9_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1258,7 +1305,7 @@ static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A5"; + cpu_pmu->name = "ARMv7_Cortex_A5"; cpu_pmu->map_event = armv7_a5_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); return 0; @@ -1267,7 +1314,7 @@ static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A15"; + cpu_pmu->name = "ARMv7_Cortex_A15"; cpu_pmu->map_event = armv7_a15_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; @@ -1277,7 +1324,7 @@ static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) { armv7pmu_init(cpu_pmu); - cpu_pmu->name = "ARMv7 Cortex-A7"; + cpu_pmu->name = "ARMv7_Cortex_A7"; cpu_pmu->map_event = armv7_a7_map_event; cpu_pmu->num_events = armv7_read_num_pmnc_events(); cpu_pmu->set_event_filter = armv7pmu_set_event_filter; diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c index 46931880093d..e2c3fa8632e8 100644 --- a/arch/arm/kernel/psci.c +++ b/arch/arm/kernel/psci.c @@ -42,6 +42,7 @@ static u32 psci_function_id[PSCI_FN_MAX]; #define PSCI_RET_EOPNOTSUPP -1 #define PSCI_RET_EINVAL -2 #define PSCI_RET_EPERM -3 +#define PSCI_RET_EALREADYON -4 static int psci_to_linux_errno(int errno) { @@ -54,6 +55,8 @@ static int psci_to_linux_errno(int errno) return -EINVAL; case PSCI_RET_EPERM: return -EPERM; + case PSCI_RET_EALREADYON: + return -EAGAIN; }; return -EINVAL; @@ -153,7 +156,7 @@ static int psci_migrate(unsigned long cpuid) return psci_to_linux_errno(err); } -static const struct of_device_id psci_of_match[] __initconst = { +static const struct of_device_id psci_of_match[] = { { .compatible = "arm,psci", }, {}, }; @@ -208,3 +211,16 @@ out_put_node: of_node_put(np); return; } + +int psci_probe(void) +{ + struct device_node *np; + int ret = -ENODEV; + + np = of_find_matching_node(NULL, psci_of_match); + if (np) + ret = 0; + + of_node_put(np); + return ret; +} diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 1e8b030dbefd..4a5e9430f16b 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -273,6 +273,19 @@ static int cpu_has_aliasing_icache(unsigned int arch) int aliasing_icache; unsigned int id_reg, num_sets, line_size; +#ifdef CONFIG_BIG_LITTLE + /* + * We expect a combination of Cortex-A15 and Cortex-A7 cores. + * A7 = VIPT aliasing I-cache + * A15 = PIPT (non-aliasing) I-cache + * To cater for this discrepancy, let's assume aliasing I-cache + * all the time. This means unneeded extra work on the A15 but + * only ptrace is affected which is not performance critical. + */ + if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc0f0) + return 1; +#endif + /* PIPT caches never alias. */ if (icache_is_pipt()) return 0; diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 0bc94b1fd1ae..d325410e02c8 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -267,6 +267,33 @@ void store_cpu_topology(unsigned int cpuid) } /* + * cluster_to_logical_mask - return cpu logical mask of CPUs in a cluster + * @socket_id: cluster HW identifier + * @cluster_mask: the cpumask location to be initialized, modified by the + * function only if return value == 0 + * + * Return: + * + * 0 on success + * -EINVAL if cluster_mask is NULL or there is no record matching socket_id + */ +int cluster_to_logical_mask(unsigned int socket_id, cpumask_t *cluster_mask) +{ + int cpu; + + if (!cluster_mask) + return -EINVAL; + + for_each_online_cpu(cpu) + if (socket_id == topology_physical_package_id(cpu)) { + cpumask_copy(cluster_mask, topology_core_cpumask(cpu)); + return 0; + } + + return -EINVAL; +} + +/* * init_cpu_topology is called at boot when only one cpu is running * which prevent simultaneous write access to cpu_topology array */ diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig index 4a70be485ff8..556b01d305a8 100644 --- a/arch/arm/mach-vexpress/Kconfig +++ b/arch/arm/mach-vexpress/Kconfig @@ -55,6 +55,7 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA config ARCH_VEXPRESS_CA9X4 bool "Versatile Express Cortex-A9x4 tile" + select ARM_ERRATA_643719 config ARCH_VEXPRESS_DCSCB bool "Dual Cluster System Control Block (DCSCB) support" diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile index 0997e0b7494c..191f2b591adc 100644 --- a/arch/arm/mach-vexpress/Makefile +++ b/arch/arm/mach-vexpress/Makefile @@ -8,8 +8,15 @@ obj-y := v2m.o obj-$(CONFIG_ARCH_VEXPRESS_CA9X4) += ct-ca9x4.o obj-$(CONFIG_ARCH_VEXPRESS_DCSCB) += dcscb.o dcscb_setup.o CFLAGS_dcscb.o += -march=armv7-a +CFLAGS_REMOVE_dcscb.o = -pg obj-$(CONFIG_ARCH_VEXPRESS_SPC) += spc.o +CFLAGS_REMOVE_spc.o = -pg obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM) += tc2_pm.o CFLAGS_tc2_pm.o += -march=armv7-a +CFLAGS_REMOVE_tc2_pm.o = -pg +ifeq ($(CONFIG_ARCH_VEXPRESS_TC2_PM),y) +obj-$(CONFIG_ARM_PSCI) += tc2_pm_psci.o +CFLAGS_REMOVE_tc2_pm_psci.o = -pg +endif obj-$(CONFIG_SMP) += platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index 14d499688736..b35700f8e01f 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -23,6 +23,7 @@ #include <asm/cacheflush.h> #include <asm/cputype.h> #include <asm/cp15.h> +#include <asm/psci.h> #define RST_HOLD0 0x0 @@ -193,6 +194,12 @@ static int __init dcscb_init(void) unsigned int cfg; int ret; + ret = psci_probe(); + if (!ret) { + pr_debug("psci found. Aborting native init\n"); + return -ENODEV; + } + if (!cci_probed()) return -ENODEV; diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index 29e7785a54bc..a32a179080d5 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -27,6 +27,7 @@ #include <asm/cacheflush.h> #include <asm/cputype.h> #include <asm/cp15.h> +#include <asm/psci.h> #include <linux/arm-cci.h> @@ -329,6 +330,12 @@ static int __init tc2_pm_init(void) u32 a15_cluster_id, a7_cluster_id, sys_info; struct device_node *np; + ret = psci_probe(); + if (!ret) { + pr_debug("psci found. Aborting native init\n"); + return -ENODEV; + } + /* * The power management-related features are hidden behind * SCC registers. We need to extract runtime information like diff --git a/arch/arm/mach-vexpress/tc2_pm_psci.c b/arch/arm/mach-vexpress/tc2_pm_psci.c new file mode 100644 index 000000000000..7cb2148caebc --- /dev/null +++ b/arch/arm/mach-vexpress/tc2_pm_psci.c @@ -0,0 +1,173 @@ +/* + * arch/arm/mach-vexpress/tc2_pm_psci.c - TC2 PSCI support + * + * Created by: Achin Gupta, December 2012 + * Copyright: (C) 2012 ARM Limited + * + * Some portions of this file were originally written by Nicolas Pitre + * Copyright: (C) 2012 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/spinlock.h> +#include <linux/errno.h> + +#include <asm/mcpm.h> +#include <asm/proc-fns.h> +#include <asm/cacheflush.h> +#include <asm/psci.h> +#include <asm/atomic.h> +#include <asm/cputype.h> +#include <asm/cp15.h> + +#include <mach/motherboard.h> + +#include <linux/vexpress.h> + +/* + * Platform specific state id understood by the firmware and used to + * program the power controller + */ +#define PSCI_POWER_STATE_ID 0 + +#define TC2_CLUSTERS 2 +#define TC2_MAX_CPUS_PER_CLUSTER 3 + +static atomic_t tc2_pm_use_count[TC2_MAX_CPUS_PER_CLUSTER][TC2_CLUSTERS]; + +static int tc2_pm_psci_power_up(unsigned int cpu, unsigned int cluster) +{ + unsigned int mpidr = (cluster << 8) | cpu; + int ret = 0; + + BUG_ON(!psci_ops.cpu_on); + + switch (atomic_inc_return(&tc2_pm_use_count[cpu][cluster])) { + case 1: + /* + * This is a request to power up a cpu that linux thinks has + * been powered down. Retries are needed if the firmware has + * seen the power down request as yet. + */ + do + ret = psci_ops.cpu_on(mpidr, + virt_to_phys(mcpm_entry_point)); + while (ret == -EAGAIN); + + return ret; + case 2: + /* This power up request has overtaken a power down request */ + return ret; + default: + /* Any other value is a bug */ + BUG(); + } +} + +static void tc2_pm_psci_power_down(void) +{ + struct psci_power_state power_state; + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + BUG_ON(!psci_ops.cpu_off); + + switch (atomic_dec_return(&tc2_pm_use_count[cpu][cluster])) { + case 1: + /* + * Overtaken by a power up. Flush caches, exit coherency, + * return & fake a reset + */ + set_cr(get_cr() & ~CR_C); + + flush_cache_louis(); + + asm volatile ("clrex"); + set_auxcr(get_auxcr() & ~(1 << 6)); + + return; + case 0: + /* A normal request to possibly power down the cluster */ + power_state.id = PSCI_POWER_STATE_ID; + power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN; + power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1; + + psci_ops.cpu_off(power_state); + + /* On success this function never returns */ + default: + /* Any other value is a bug */ + BUG(); + } +} + +static void tc2_pm_psci_suspend(u64 unused) +{ + struct psci_power_state power_state; + + BUG_ON(!psci_ops.cpu_suspend); + + /* On TC2 always attempt to power down the cluster */ + power_state.id = PSCI_POWER_STATE_ID; + power_state.type = PSCI_POWER_STATE_TYPE_POWER_DOWN; + power_state.affinity_level = PSCI_POWER_STATE_AFFINITY_LEVEL1; + + psci_ops.cpu_suspend(power_state, virt_to_phys(mcpm_entry_point)); + + /* On success this function never returns */ + BUG(); +} + +static const struct mcpm_platform_ops tc2_pm_power_ops = { + .power_up = tc2_pm_psci_power_up, + .power_down = tc2_pm_psci_power_down, + .suspend = tc2_pm_psci_suspend, +}; + +static void __init tc2_pm_usage_count_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster); + BUG_ON(cluster >= TC2_CLUSTERS || cpu >= TC2_MAX_CPUS_PER_CLUSTER); + + atomic_set(&tc2_pm_use_count[cpu][cluster], 1); +} + +static int __init tc2_pm_psci_init(void) +{ + int ret; + + ret = psci_probe(); + if (ret) { + pr_debug("psci not found. Aborting psci init\n"); + return -ENODEV; + } + + if (!of_machine_is_compatible("arm,vexpress,v2p-ca15_a7")) + return -ENODEV; + + tc2_pm_usage_count_init(); + + ret = mcpm_platform_register(&tc2_pm_power_ops); + if (!ret) + ret = mcpm_sync_init(NULL); + if (!ret) + pr_info("TC2 power management using PSCI initialized\n"); + return ret; +} + +early_initcall(tc2_pm_psci_init); diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 4f8b8cb17ff5..ffa9b73b7d34 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -7,6 +7,7 @@ #include <linux/io.h> #include <linux/smp.h> #include <linux/init.h> +#include <linux/memblock.h> #include <linux/of_address.h> #include <linux/of_fdt.h> #include <linux/of_irq.h> @@ -369,6 +370,31 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express") .init_machine = v2m_init, MACHINE_END +static void __init v2m_dt_hdlcd_init(void) +{ + struct device_node *node; + int len, na, ns; + const __be32 *prop; + phys_addr_t fb_base, fb_size; + + node = of_find_compatible_node(NULL, NULL, "arm,hdlcd"); + if (!node) + return; + + na = of_n_addr_cells(node); + ns = of_n_size_cells(node); + + prop = of_get_property(node, "framebuffer", &len); + if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop))) + return; + + fb_base = of_read_number(prop, na); + fb_size = of_read_number(prop + na, ns); + + if (WARN_ON(memblock_remove(fb_base, fb_size))) + return; +}; + static struct map_desc v2m_rs1_io_desc __initdata = { .virtual = V2M_PERIPH, .pfn = __phys_to_pfn(0x1c000000), @@ -421,6 +447,8 @@ void __init v2m_dt_init_early(void) } versatile_sched_clock_init(vexpress_get_24mhz_clock_base(), 24000000); + + v2m_dt_hdlcd_init(); } static const struct of_device_id v2m_dt_bus_match[] __initconst = { diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index b784579545e8..3bbfd5dbe741 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -449,8 +449,16 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (pud_none(*pud_k)) goto bad_area; - if (!pud_present(*pud)) + if (!pud_present(*pud)) { set_pud(pud, *pud_k); + /* + * There is a small window during free_pgtables() where the + * user *pud entry is 0 but the TLB has not been invalidated + * and we get a level 2 (pmd) translation fault caused by the + * intermediate TLB caching of the old level 1 (pud) entry. + */ + flush_tlb_kernel_page(addr); + } pmd = pmd_offset(pud, addr); pmd_k = pmd_offset(pud_k, addr); @@ -473,8 +481,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, #endif if (pmd_none(pmd_k[index])) goto bad_area; + if (!pmd_present(pmd[index])) + copy_pmd(pmd, pmd_k); - copy_pmd(pmd, pmd_k); return 0; bad_area: diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bede54fbcac3..845e7f157867 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -455,5 +455,8 @@ source "arch/arm64/Kconfig.debug" source "security/Kconfig" source "crypto/Kconfig" +if CRYPTO +source "arch/arm64/crypto/Kconfig" +endif source "lib/Kconfig" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 2fceb71ac3b7..8185a913c5ed 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -45,6 +45,7 @@ export TEXT_OFFSET GZFLAGS core-y += arch/arm64/kernel/ arch/arm64/mm/ core-$(CONFIG_KVM) += arch/arm64/kvm/ core-$(CONFIG_XEN) += arch/arm64/xen/ +core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ libs-y := arch/arm64/lib/ $(libs-y) libs-y += $(LIBGCC) diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig new file mode 100644 index 000000000000..5562652c5316 --- /dev/null +++ b/arch/arm64/crypto/Kconfig @@ -0,0 +1,53 @@ + +menuconfig ARM64_CRYPTO + bool "ARM64 Accelerated Cryptographic Algorithms" + depends on ARM64 + help + Say Y here to choose from a selection of cryptographic algorithms + implemented using ARM64 specific CPU features or instructions. + +if ARM64_CRYPTO + +config CRYPTO_SHA1_ARM64_CE + tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + +config CRYPTO_SHA2_ARM64_CE + tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + +config CRYPTO_GHASH_ARM64_CE + tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_HASH + +config CRYPTO_AES_ARM64_CE + tristate "AES core cipher using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES + +config CRYPTO_AES_ARM64_CE_CCM + tristate "AES in CCM mode using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AES + select CRYPTO_AEAD + +config CRYPTO_AES_ARM64_CE_BLK + tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_ABLK_HELPER + +config CRYPTO_AES_ARM64_NEON_BLK + tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" + depends on ARM64 && KERNEL_MODE_NEON + select CRYPTO_BLKCIPHER + select CRYPTO_AES + select CRYPTO_ABLK_HELPER + +endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile new file mode 100644 index 000000000000..2070a56ecc46 --- /dev/null +++ b/arch/arm64/crypto/Makefile @@ -0,0 +1,38 @@ +# +# linux/arch/arm64/crypto/Makefile +# +# Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o +sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o + +obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o +sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o + +obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o +ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o +CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o +aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_CE_BLK) += aes-ce-blk.o +aes-ce-blk-y := aes-glue-ce.o aes-ce.o + +obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o +aes-neon-blk-y := aes-glue-neon.o aes-neon.o + +AFLAGS_aes-ce.o := -DINTERLEAVE=2 -DINTERLEAVE_INLINE +AFLAGS_aes-neon.o := -DINTERLEAVE=4 + +CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS + +$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE + $(call if_changed_dep,cc_o_c) diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S new file mode 100644 index 000000000000..432e4841cd81 --- /dev/null +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -0,0 +1,222 @@ +/* + * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + + .text + .arch armv8-a+crypto + + /* + * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, + * u32 *macp, u8 const rk[], u32 rounds); + */ +ENTRY(ce_aes_ccm_auth_data) + ldr w8, [x3] /* leftover from prev round? */ + ld1 {v0.2d}, [x0] /* load mac */ + cbz w8, 1f + sub w8, w8, #16 + eor v1.16b, v1.16b, v1.16b +0: ldrb w7, [x1], #1 /* get 1 byte of input */ + subs w2, w2, #1 + add w8, w8, #1 + ins v1.b[0], w7 + ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ + beq 8f /* out of input? */ + cbnz w8, 0b + eor v0.16b, v0.16b, v1.16b +1: ld1 {v3.2d}, [x4] /* load first round key */ + prfm pldl1strm, [x1] + cmp w5, #12 /* which key size? */ + add x6, x4, #16 + sub w7, w5, #2 /* modified # of rounds */ + bmi 2f + bne 5f + mov v5.16b, v3.16b + b 4f +2: mov v4.16b, v3.16b + ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ +3: aese v0.16b, v4.16b + aesmc v0.16b, v0.16b +4: ld1 {v3.2d}, [x6], #16 /* load next round key */ + aese v0.16b, v5.16b + aesmc v0.16b, v0.16b +5: ld1 {v4.2d}, [x6], #16 /* load next round key */ + subs w7, w7, #3 + aese v0.16b, v3.16b + aesmc v0.16b, v0.16b + ld1 {v5.2d}, [x6], #16 /* load next round key */ + bpl 3b + aese v0.16b, v4.16b + subs w2, w2, #16 /* last data? */ + eor v0.16b, v0.16b, v5.16b /* final round */ + bmi 6f + ld1 {v1.16b}, [x1], #16 /* load next input block */ + eor v0.16b, v0.16b, v1.16b /* xor with mac */ + bne 1b +6: st1 {v0.2d}, [x0] /* store mac */ + beq 10f + adds w2, w2, #16 + beq 10f + mov w8, w2 +7: ldrb w7, [x1], #1 + umov w6, v0.b[0] + eor w6, w6, w7 + strb w6, [x0], #1 + subs w2, w2, #1 + beq 10f + ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ + b 7b +8: mov w7, w8 + add w8, w8, #16 +9: ext v1.16b, v1.16b, v1.16b, #1 + adds w7, w7, #1 + bne 9b + eor v0.16b, v0.16b, v1.16b + st1 {v0.2d}, [x0] +10: str w8, [x3] + ret +ENDPROC(ce_aes_ccm_auth_data) + + /* + * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], + * u32 rounds); + */ +ENTRY(ce_aes_ccm_final) + ld1 {v3.2d}, [x2], #16 /* load first round key */ + ld1 {v0.2d}, [x0] /* load mac */ + cmp w3, #12 /* which key size? */ + sub w3, w3, #2 /* modified # of rounds */ + ld1 {v1.2d}, [x1] /* load 1st ctriv */ + bmi 0f + bne 3f + mov v5.16b, v3.16b + b 2f +0: mov v4.16b, v3.16b +1: ld1 {v5.2d}, [x2], #16 /* load next round key */ + aese v0.16b, v4.16b + aese v1.16b, v4.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +2: ld1 {v3.2d}, [x2], #16 /* load next round key */ + aese v0.16b, v5.16b + aese v1.16b, v5.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +3: ld1 {v4.2d}, [x2], #16 /* load next round key */ + subs w3, w3, #3 + aese v0.16b, v3.16b + aese v1.16b, v3.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b + bpl 1b + aese v0.16b, v4.16b + aese v1.16b, v4.16b + /* final round key cancels out */ + eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ + st1 {v0.2d}, [x0] /* store result */ + ret +ENDPROC(ce_aes_ccm_final) + + .macro aes_ccm_do_crypt,enc + ldr x8, [x6, #8] /* load lower ctr */ + ld1 {v0.2d}, [x5] /* load mac */ + rev x8, x8 /* keep swabbed ctr in reg */ +0: /* outer loop */ + ld1 {v1.1d}, [x6] /* load upper ctr */ + prfm pldl1strm, [x1] + add x8, x8, #1 + rev x9, x8 + cmp w4, #12 /* which key size? */ + sub w7, w4, #2 /* get modified # of rounds */ + ins v1.d[1], x9 /* no carry in lower ctr */ + ld1 {v3.2d}, [x3] /* load first round key */ + add x10, x3, #16 + bmi 1f + bne 4f + mov v5.16b, v3.16b + b 3f +1: mov v4.16b, v3.16b + ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ +2: /* inner loop: 3 rounds, 2x interleaved */ + aese v0.16b, v4.16b + aese v1.16b, v4.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +3: ld1 {v3.2d}, [x10], #16 /* load next round key */ + aese v0.16b, v5.16b + aese v1.16b, v5.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b +4: ld1 {v4.2d}, [x10], #16 /* load next round key */ + subs w7, w7, #3 + aese v0.16b, v3.16b + aese v1.16b, v3.16b + aesmc v0.16b, v0.16b + aesmc v1.16b, v1.16b + ld1 {v5.2d}, [x10], #16 /* load next round key */ + bpl 2b + aese v0.16b, v4.16b + aese v1.16b, v4.16b + subs w2, w2, #16 + bmi 6f /* partial block? */ + ld1 {v2.16b}, [x1], #16 /* load next input block */ + .if \enc == 1 + eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ + eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ + .else + eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ + eor v1.16b, v2.16b, v5.16b /* final round enc */ + .endif + eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ + st1 {v1.16b}, [x0], #16 /* write output block */ + bne 0b + rev x8, x8 + st1 {v0.2d}, [x5] /* store mac */ + str x8, [x6, #8] /* store lsb end of ctr (BE) */ +5: ret + +6: eor v0.16b, v0.16b, v5.16b /* final round mac */ + eor v1.16b, v1.16b, v5.16b /* final round enc */ + st1 {v0.2d}, [x5] /* store mac */ + add w2, w2, #16 /* process partial tail block */ +7: ldrb w9, [x1], #1 /* get 1 byte of input */ + umov w6, v1.b[0] /* get top crypted ctr byte */ + umov w7, v0.b[0] /* get top mac byte */ + .if \enc == 1 + eor w7, w7, w9 + eor w9, w9, w6 + .else + eor w9, w9, w6 + eor w7, w7, w9 + .endif + strb w9, [x0], #1 /* store out byte */ + strb w7, [x5], #1 /* store mac byte */ + subs w2, w2, #1 + beq 5b + ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ + ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ + b 7b + .endm + + /* + * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, + * u8 const rk[], u32 rounds, u8 mac[], + * u8 ctr[]); + * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, + * u8 const rk[], u32 rounds, u8 mac[], + * u8 ctr[]); + */ +ENTRY(ce_aes_ccm_encrypt) + aes_ccm_do_crypt 1 +ENDPROC(ce_aes_ccm_encrypt) + +ENTRY(ce_aes_ccm_decrypt) + aes_ccm_do_crypt 0 +ENDPROC(ce_aes_ccm_decrypt) diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c new file mode 100644 index 000000000000..9e6cdde9b43d --- /dev/null +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -0,0 +1,297 @@ +/* + * aes-ccm-glue.c - AES-CCM transform for ARMv8 with Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <asm/unaligned.h> +#include <crypto/aes.h> +#include <crypto/algapi.h> +#include <crypto/scatterwalk.h> +#include <linux/crypto.h> +#include <linux/module.h> + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +asmlinkage void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, + u32 *macp, u32 const rk[], u32 rounds); + +asmlinkage void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, + u32 const rk[], u32 rounds, u8 mac[], + u8 ctr[]); + +asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, + u32 const rk[], u32 rounds, u8 mac[], + u8 ctr[]); + +asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[], + u32 rounds); + +static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = crypto_aes_expand_key(ctx, in_key, key_len); + if (!ret) + return 0; + + tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + if ((authsize & 1) || authsize < 4) + return -EINVAL; + return 0; +} + +static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + __be32 *n = (__be32 *)&maciv[AES_BLOCK_SIZE - 8]; + u32 l = req->iv[0] + 1; + + /* verify that CCM dimension 'L' is set correctly in the IV */ + if (l < 2 || l > 8) + return -EINVAL; + + /* verify that msglen can in fact be represented in L bytes */ + if (l < 4 && msglen >> (8 * l)) + return -EOVERFLOW; + + /* + * Even if the CCM spec allows L values of up to 8, the Linux cryptoapi + * uses a u32 type to represent msglen so the top 4 bytes are always 0. + */ + n[0] = 0; + n[1] = cpu_to_be32(msglen); + + memcpy(maciv, req->iv, AES_BLOCK_SIZE - l); + + /* + * Meaning of byte 0 according to CCM spec (RFC 3610/NIST 800-38C) + * - bits 0..2 : max # of bytes required to represent msglen, minus 1 + * (already set by caller) + * - bits 3..5 : size of auth tag (1 => 4 bytes, 2 => 6 bytes, etc) + * - bit 6 : indicates presence of authenticate-only data + */ + maciv[0] |= (crypto_aead_authsize(aead) - 2) << 2; + if (req->assoclen) + maciv[0] |= 0x40; + + memset(&req->iv[AES_BLOCK_SIZE - l], 0, l); + return 0; +} + +static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + struct __packed { __be16 l; __be32 h; u16 len; } ltag; + struct scatter_walk walk; + u32 len = req->assoclen; + u32 macp = 0; + + /* prepend the AAD with a length tag */ + if (len < 0xff00) { + ltag.l = cpu_to_be16(len); + ltag.len = 2; + } else { + ltag.l = cpu_to_be16(0xfffe); + put_unaligned_be32(len, <ag.h); + ltag.len = 6; + } + + ce_aes_ccm_auth_data(mac, (u8 *)<ag, ltag.len, &macp, ctx->key_enc, + num_rounds(ctx)); + scatterwalk_start(&walk, req->assoc); + + do { + u32 n = scatterwalk_clamp(&walk, len); + u8 *p; + + if (!n) { + scatterwalk_start(&walk, sg_next(walk.sg)); + n = scatterwalk_clamp(&walk, len); + } + p = scatterwalk_map(&walk); + ce_aes_ccm_auth_data(mac, p, n, &macp, ctx->key_enc, + num_rounds(ctx)); + len -= n; + + scatterwalk_unmap(p); + scatterwalk_advance(&walk, n); + scatterwalk_done(&walk, 0, len); + } while (len); +} + +static int ccm_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + struct blkcipher_desc desc = { .info = req->iv }; + struct blkcipher_walk walk; + u8 __aligned(8) mac[AES_BLOCK_SIZE]; + u8 buf[AES_BLOCK_SIZE]; + u32 len = req->cryptlen; + int err; + + err = ccm_init_mac(req, mac, len); + if (err) + return err; + + kernel_neon_begin_partial(6); + + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); + + /* preserve the original iv for the final round */ + memcpy(buf, req->iv, AES_BLOCK_SIZE); + + blkcipher_walk_init(&walk, req->dst, req->src, len); + err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, + AES_BLOCK_SIZE); + + while (walk.nbytes) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + + if (walk.nbytes == len) + tail = 0; + + ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); + + len -= walk.nbytes - tail; + err = blkcipher_walk_done(&desc, &walk, tail); + } + if (!err) + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + + kernel_neon_end(); + + if (err) + return err; + + /* copy authtag to end of dst */ + scatterwalk_map_and_copy(mac, req->dst, req->cryptlen, + crypto_aead_authsize(aead), 1); + + return 0; +} + +static int ccm_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); + unsigned int authsize = crypto_aead_authsize(aead); + struct blkcipher_desc desc = { .info = req->iv }; + struct blkcipher_walk walk; + u8 __aligned(8) mac[AES_BLOCK_SIZE]; + u8 buf[AES_BLOCK_SIZE]; + u32 len = req->cryptlen - authsize; + int err; + + err = ccm_init_mac(req, mac, len); + if (err) + return err; + + kernel_neon_begin_partial(6); + + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); + + /* preserve the original iv for the final round */ + memcpy(buf, req->iv, AES_BLOCK_SIZE); + + blkcipher_walk_init(&walk, req->dst, req->src, len); + err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, + AES_BLOCK_SIZE); + + while (walk.nbytes) { + u32 tail = walk.nbytes % AES_BLOCK_SIZE; + + if (walk.nbytes == len) + tail = 0; + + ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes - tail, ctx->key_enc, + num_rounds(ctx), mac, walk.iv); + + len -= walk.nbytes - tail; + err = blkcipher_walk_done(&desc, &walk, tail); + } + if (!err) + ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); + + kernel_neon_end(); + + if (err) + return err; + + /* compare calculated auth tag with the stored one */ + scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize, + authsize, 0); + + if (memcmp(mac, buf, authsize)) + return -EBADMSG; + return 0; +} + +static struct crypto_alg ccm_aes_alg = { + .cra_name = "ccm(aes)", + .cra_driver_name = "ccm-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AEAD, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_aead_type, + .cra_module = THIS_MODULE, + .cra_aead = { + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .setkey = ccm_setkey, + .setauthsize = ccm_setauthsize, + .encrypt = ccm_encrypt, + .decrypt = ccm_decrypt, + } +}; + +static int __init aes_mod_init(void) +{ + if (!(elf_hwcap & HWCAP_AES)) + return -ENODEV; + return crypto_register_alg(&ccm_aes_alg); +} + +static void __exit aes_mod_exit(void) +{ + crypto_unregister_alg(&ccm_aes_alg); +} + +module_init(aes_mod_init); +module_exit(aes_mod_exit); + +MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("ccm(aes)"); diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c new file mode 100644 index 000000000000..2075e1acae6b --- /dev/null +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -0,0 +1,155 @@ +/* + * aes-ce-cipher.c - core AES cipher using ARMv8 Crypto Extensions + * + * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <crypto/aes.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/module.h> + +MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +struct aes_block { + u8 b[AES_BLOCK_SIZE]; +}; + +static int num_rounds(struct crypto_aes_ctx *ctx) +{ + /* + * # of rounds specified by AES: + * 128 bit key 10 rounds + * 192 bit key 12 rounds + * 256 bit key 14 rounds + * => n byte key => 6 + (n/4) rounds + */ + return 6 + ctx->key_length / 4; +} + +static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct aes_block *out = (struct aes_block *)dst; + struct aes_block const *in = (struct aes_block *)src; + void *dummy0; + int dummy1; + + kernel_neon_begin_partial(4); + + __asm__(" ld1 {v0.16b}, %[in] ;" + " ld1 {v1.2d}, [%[key]], #16 ;" + " cmp %w[rounds], #10 ;" + " bmi 0f ;" + " bne 3f ;" + " mov v3.16b, v1.16b ;" + " b 2f ;" + "0: mov v2.16b, v1.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + "1: aese v0.16b, v2.16b ;" + " aesmc v0.16b, v0.16b ;" + "2: ld1 {v1.2d}, [%[key]], #16 ;" + " aese v0.16b, v3.16b ;" + " aesmc v0.16b, v0.16b ;" + "3: ld1 {v2.2d}, [%[key]], #16 ;" + " subs %w[rounds], %w[rounds], #3 ;" + " aese v0.16b, v1.16b ;" + " aesmc v0.16b, v0.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + " bpl 1b ;" + " aese v0.16b, v2.16b ;" + " eor v0.16b, v0.16b, v3.16b ;" + " st1 {v0.16b}, %[out] ;" + + : [out] "=Q"(*out), + [key] "=r"(dummy0), + [rounds] "=r"(dummy1) + : [in] "Q"(*in), + "1"(ctx->key_enc), + "2"(num_rounds(ctx) - 2) + : "cc"); + + kernel_neon_end(); +} + +static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) +{ + struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct aes_block *out = (struct aes_block *)dst; + struct aes_block const *in = (struct aes_block *)src; + void *dummy0; + int dummy1; + + kernel_neon_begin_partial(4); + + __asm__(" ld1 {v0.16b}, %[in] ;" + " ld1 {v1.2d}, [%[key]], #16 ;" + " cmp %w[rounds], #10 ;" + " bmi 0f ;" + " bne 3f ;" + " mov v3.16b, v1.16b ;" + " b 2f ;" + "0: mov v2.16b, v1.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + "1: aesd v0.16b, v2.16b ;" + " aesimc v0.16b, v0.16b ;" + "2: ld1 {v1.2d}, [%[key]], #16 ;" + " aesd v0.16b, v3.16b ;" + " aesimc v0.16b, v0.16b ;" + "3: ld1 {v2.2d}, [%[key]], #16 ;" + " subs %w[rounds], %w[rounds], #3 ;" + " aesd v0.16b, v1.16b ;" + " aesimc v0.16b, v0.16b ;" + " ld1 {v3.2d}, [%[key]], #16 ;" + " bpl 1b ;" + " aesd v0.16b, v2.16b ;" + " eor v0.16b, v0.16b, v3.16b ;" + " st1 {v0.16b}, %[out] ;" + + : [out] "=Q"(*out), + [key] "=r"(dummy0), + [rounds] "=r"(dummy1) + : [in] "Q"(*in), + "1"(ctx->key_dec), + "2"(num_rounds(ctx) - 2) + : "cc"); + + kernel_neon_end(); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + .cra_cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = crypto_aes_set_key, + .cia_encrypt = aes_cipher_encrypt, + .cia_decrypt = aes_cipher_decrypt + } +}; + +static int __init aes_mod_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_mod_exit(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_cpu_feature_match(AES, aes_mod_init); +module_exit(aes_mod_exit); diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S new file mode 100644 index 000000000000..685a18f731eb --- /dev/null +++ b/arch/arm64/crypto/aes-ce.S @@ -0,0 +1,133 @@ +/* + * linux/arch/arm64/crypto/aes-ce.S - AES cipher for ARMv8 with + * Crypto Extensions + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +#define AES_ENTRY(func) ENTRY(ce_ ## func) +#define AES_ENDPROC(func) ENDPROC(ce_ ## func) + + .arch armv8-a+crypto + + /* preload all round keys */ + .macro load_round_keys, rounds, rk + cmp \rounds, #12 + blo 2222f /* 128 bits */ + beq 1111f /* 192 bits */ + ld1 {v17.16b-v18.16b}, [\rk], #32 +1111: ld1 {v19.16b-v20.16b}, [\rk], #32 +2222: ld1 {v21.16b-v24.16b}, [\rk], #64 + ld1 {v25.16b-v28.16b}, [\rk], #64 + ld1 {v29.16b-v31.16b}, [\rk] + .endm + + /* prepare for encryption with key in rk[] */ + .macro enc_prepare, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + /* prepare for encryption (again) but with new key in rk[] */ + .macro enc_switch_key, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + /* prepare for decryption with key in rk[] */ + .macro dec_prepare, rounds, rk, ignore + load_round_keys \rounds, \rk + .endm + + .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 + aes\de \i0\().16b, \k\().16b + .ifnb \i1 + aes\de \i1\().16b, \k\().16b + .ifnb \i3 + aes\de \i2\().16b, \k\().16b + aes\de \i3\().16b, \k\().16b + .endif + .endif + aes\mc \i0\().16b, \i0\().16b + .ifnb \i1 + aes\mc \i1\().16b, \i1\().16b + .ifnb \i3 + aes\mc \i2\().16b, \i2\().16b + aes\mc \i3\().16b, \i3\().16b + .endif + .endif + .endm + + /* up to 4 interleaved encryption rounds with the same round key */ + .macro round_Nx, enc, k, i0, i1, i2, i3 + .ifc \enc, e + do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3 + .else + do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3 + .endif + .endm + + /* up to 4 interleaved final rounds */ + .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3 + aes\de \i0\().16b, \k\().16b + .ifnb \i1 + aes\de \i1\().16b, \k\().16b + .ifnb \i3 + aes\de \i2\().16b, \k\().16b + aes\de \i3\().16b, \k\().16b + .endif + .endif + eor \i0\().16b, \i0\().16b, \k2\().16b + .ifnb \i1 + eor \i1\().16b, \i1\().16b, \k2\().16b + .ifnb \i3 + eor \i2\().16b, \i2\().16b, \k2\().16b + eor \i3\().16b, \i3\().16b, \k2\().16b + .endif + .endif + .endm + + /* up to 4 interleaved blocks */ + .macro do_block_Nx, enc, rounds, i0, i1, i2, i3 + cmp \rounds, #12 + blo 2222f /* 128 bits */ + beq 1111f /* 192 bits */ + round_Nx \enc, v17, \i0, \i1, \i2, \i3 + round_Nx \enc, v18, \i0, \i1, \i2, \i3 +1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3 + round_Nx \enc, v20, \i0, \i1, \i2, \i3 +2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 + round_Nx \enc, \key, \i0, \i1, \i2, \i3 + .endr + fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3 + .endm + + .macro encrypt_block, in, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \in + .endm + + .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \i0, \i1 + .endm + + .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \i0, \i1, \i2, \i3 + .endm + + .macro decrypt_block, in, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \in + .endm + + .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \i0, \i1 + .endm + + .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \i0, \i1, \i2, \i3 + .endm + +#include "aes-modes.S" diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c new file mode 100644 index 000000000000..60f2f4c12256 --- /dev/null +++ b/arch/arm64/crypto/aes-glue.c @@ -0,0 +1,446 @@ +/* + * linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <asm/hwcap.h> +#include <crypto/aes.h> +#include <crypto/ablk_helper.h> +#include <crypto/algapi.h> +#include <linux/module.h> +#include <linux/cpufeature.h> + +#ifdef USE_V8_CRYPTO_EXTENSIONS +#define MODE "ce" +#define PRIO 300 +#define aes_ecb_encrypt ce_aes_ecb_encrypt +#define aes_ecb_decrypt ce_aes_ecb_decrypt +#define aes_cbc_encrypt ce_aes_cbc_encrypt +#define aes_cbc_decrypt ce_aes_cbc_decrypt +#define aes_ctr_encrypt ce_aes_ctr_encrypt +#define aes_xts_encrypt ce_aes_xts_encrypt +#define aes_xts_decrypt ce_aes_xts_decrypt +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); +#else +#define MODE "neon" +#define PRIO 200 +#define aes_ecb_encrypt neon_aes_ecb_encrypt +#define aes_ecb_decrypt neon_aes_ecb_decrypt +#define aes_cbc_encrypt neon_aes_cbc_encrypt +#define aes_cbc_decrypt neon_aes_cbc_decrypt +#define aes_ctr_encrypt neon_aes_ctr_encrypt +#define aes_xts_encrypt neon_aes_xts_encrypt +#define aes_xts_decrypt neon_aes_xts_decrypt +MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); +MODULE_ALIAS("ecb(aes)"); +MODULE_ALIAS("cbc(aes)"); +MODULE_ALIAS("ctr(aes)"); +MODULE_ALIAS("xts(aes)"); +#endif + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +/* defined in aes-modes.S */ +asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, int first); +asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, int first); + +asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], int first); +asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 iv[], int first); + +asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], + int rounds, int blocks, u8 ctr[], int first); + +asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 const rk2[], u8 iv[], + int first); +asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], + int rounds, int blocks, u8 const rk2[], u8 iv[], + int first); + +struct crypto_aes_xts_ctx { + struct crypto_aes_ctx key1; + struct crypto_aes_ctx __aligned(8) key2; +}; + +static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + + ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2); + if (!ret) + ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2], + key_len / 2); + if (!ret) + return 0; + + tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, rounds, blocks, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, walk.iv, + first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_dec, rounds, blocks, walk.iv, + first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + return err; +} + +static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key_length / 4; + struct blkcipher_walk walk; + int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + + first = 1; + kernel_neon_begin(); + while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { + aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key_enc, rounds, blocks, walk.iv, + first); + first = 0; + nbytes -= blocks * AES_BLOCK_SIZE; + if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) + break; + err = blkcipher_walk_done(desc, &walk, + walk.nbytes % AES_BLOCK_SIZE); + } + if (nbytes) { + u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; + u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + u8 __aligned(8) tail[AES_BLOCK_SIZE]; + + /* + * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need + * to tell aes_ctr_encrypt() to only read half a block. + */ + blocks = (nbytes <= 8) ? -1 : 1; + + aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds, + blocks, walk.iv, first); + memcpy(tdst, tail, nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key1.key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_enc, rounds, blocks, + (u8 *)ctx->key2.key_enc, walk.iv, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + int err, first, rounds = 6 + ctx->key1.key_length / 4; + struct blkcipher_walk walk; + unsigned int blocks; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_neon_begin(); + for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { + aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, + (u8 *)ctx->key1.key_dec, rounds, blocks, + (u8 *)ctx->key2.key_enc, walk.iv, first); + err = blkcipher_walk_done(desc, &walk, 0); + } + kernel_neon_end(); + + return err; +} + +static struct crypto_alg aes_algs[] = { { + .cra_name = "__ecb-aes-" MODE, + .cra_driver_name = "__driver-ecb-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, +}, { + .cra_name = "__cbc-aes-" MODE, + .cra_driver_name = "__driver-cbc-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, +}, { + .cra_name = "__ctr-aes-" MODE, + .cra_driver_name = "__driver-ctr-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = crypto_aes_set_key, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, + }, +}, { + .cra_name = "__xts-aes-" MODE, + .cra_driver_name = "__driver-xts-aes-" MODE, + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, +}, { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +}, { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 7, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + } +} }; + +static int __init aes_init(void) +{ + return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +static void __exit aes_exit(void) +{ + crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); +} + +#ifdef USE_V8_CRYPTO_EXTENSIONS +module_cpu_feature_match(AES, aes_init); +#else +module_init(aes_init); +#endif +module_exit(aes_exit); diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S new file mode 100644 index 000000000000..f6e372c528eb --- /dev/null +++ b/arch/arm64/crypto/aes-modes.S @@ -0,0 +1,532 @@ +/* + * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* included by aes-ce.S and aes-neon.S */ + + .text + .align 4 + +/* + * There are several ways to instantiate this code: + * - no interleave, all inline + * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2) + * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE) + * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4) + * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE) + * + * Macros imported by this code: + * - enc_prepare - setup NEON registers for encryption + * - dec_prepare - setup NEON registers for decryption + * - enc_switch_key - change to new key after having prepared for encryption + * - encrypt_block - encrypt a single block + * - decrypt block - decrypt a single block + * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2) + * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2) + * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4) + * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4) + */ + +#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE) +#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp +#define FRAME_POP ldp x29, x30, [sp],#16 + +#if INTERLEAVE == 2 + +aes_encrypt_block2x: + encrypt_block2x v0, v1, w3, x2, x6, w7 + ret +ENDPROC(aes_encrypt_block2x) + +aes_decrypt_block2x: + decrypt_block2x v0, v1, w3, x2, x6, w7 + ret +ENDPROC(aes_decrypt_block2x) + +#elif INTERLEAVE == 4 + +aes_encrypt_block4x: + encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + ret +ENDPROC(aes_encrypt_block4x) + +aes_decrypt_block4x: + decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + ret +ENDPROC(aes_decrypt_block4x) + +#else +#error INTERLEAVE should equal 2 or 4 +#endif + + .macro do_encrypt_block2x + bl aes_encrypt_block2x + .endm + + .macro do_decrypt_block2x + bl aes_decrypt_block2x + .endm + + .macro do_encrypt_block4x + bl aes_encrypt_block4x + .endm + + .macro do_decrypt_block4x + bl aes_decrypt_block4x + .endm + +#else +#define FRAME_PUSH +#define FRAME_POP + + .macro do_encrypt_block2x + encrypt_block2x v0, v1, w3, x2, x6, w7 + .endm + + .macro do_decrypt_block2x + decrypt_block2x v0, v1, w3, x2, x6, w7 + .endm + + .macro do_encrypt_block4x + encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + .endm + + .macro do_decrypt_block4x + decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7 + .endm + +#endif + + /* + * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, int first) + * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, int first) + */ + +AES_ENTRY(aes_ecb_encrypt) + FRAME_PUSH + cbz w5, .LecbencloopNx + + enc_prepare w3, x2, x5 + +.LecbencloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lecbenc1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ + do_encrypt_block2x + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + do_encrypt_block4x + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LecbencloopNx +.Lecbenc1x: + adds w4, w4, #INTERLEAVE + beq .Lecbencout +#endif +.Lecbencloop: + ld1 {v0.16b}, [x1], #16 /* get next pt block */ + encrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lecbencloop +.Lecbencout: + FRAME_POP + ret +AES_ENDPROC(aes_ecb_encrypt) + + +AES_ENTRY(aes_ecb_decrypt) + FRAME_PUSH + cbz w5, .LecbdecloopNx + + dec_prepare w3, x2, x5 + +.LecbdecloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lecbdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + do_decrypt_block2x + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + do_decrypt_block4x + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LecbdecloopNx +.Lecbdec1x: + adds w4, w4, #INTERLEAVE + beq .Lecbdecout +#endif +.Lecbdecloop: + ld1 {v0.16b}, [x1], #16 /* get next ct block */ + decrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lecbdecloop +.Lecbdecout: + FRAME_POP + ret +AES_ENDPROC(aes_ecb_decrypt) + + + /* + * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[], int first) + * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 iv[], int first) + */ + +AES_ENTRY(aes_cbc_encrypt) + cbz w6, .Lcbcencloop + + ld1 {v0.16b}, [x5] /* get iv */ + enc_prepare w3, x2, x5 + +.Lcbcencloop: + ld1 {v1.16b}, [x1], #16 /* get next pt block */ + eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ + encrypt_block v0, w3, x2, x5, w6 + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lcbcencloop + ret +AES_ENDPROC(aes_cbc_encrypt) + + +AES_ENTRY(aes_cbc_decrypt) + FRAME_PUSH + cbz w6, .LcbcdecloopNx + + ld1 {v7.16b}, [x5] /* get iv */ + dec_prepare w3, x2, x5 + +.LcbcdecloopNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lcbcdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + mov v2.16b, v0.16b + mov v3.16b, v1.16b + do_decrypt_block2x + eor v0.16b, v0.16b, v7.16b + eor v1.16b, v1.16b, v2.16b + mov v7.16b, v3.16b + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + mov v4.16b, v0.16b + mov v5.16b, v1.16b + mov v6.16b, v2.16b + do_decrypt_block4x + sub x1, x1, #16 + eor v0.16b, v0.16b, v7.16b + eor v1.16b, v1.16b, v4.16b + ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ + eor v2.16b, v2.16b, v5.16b + eor v3.16b, v3.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 +#endif + b .LcbcdecloopNx +.Lcbcdec1x: + adds w4, w4, #INTERLEAVE + beq .Lcbcdecout +#endif +.Lcbcdecloop: + ld1 {v1.16b}, [x1], #16 /* get next ct block */ + mov v0.16b, v1.16b /* ...and copy to v0 */ + decrypt_block v0, w3, x2, x5, w6 + eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ + mov v7.16b, v1.16b /* ct is next iv */ + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + bne .Lcbcdecloop +.Lcbcdecout: + FRAME_POP + ret +AES_ENDPROC(aes_cbc_decrypt) + + + /* + * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, + * int blocks, u8 ctr[], int first) + */ + +AES_ENTRY(aes_ctr_encrypt) + FRAME_PUSH + cbnz w6, .Lctrfirst /* 1st time around? */ + umov x5, v4.d[1] /* keep swabbed ctr in reg */ + rev x5, x5 +#if INTERLEAVE >= 2 + cmn w5, w4 /* 32 bit overflow? */ + bcs .Lctrinc + add x5, x5, #1 /* increment BE ctr */ + b .LctrincNx +#else + b .Lctrinc +#endif +.Lctrfirst: + enc_prepare w3, x2, x6 + ld1 {v4.16b}, [x5] + umov x5, v4.d[1] /* keep swabbed ctr in reg */ + rev x5, x5 +#if INTERLEAVE >= 2 + cmn w5, w4 /* 32 bit overflow? */ + bcs .Lctrloop +.LctrloopNx: + subs w4, w4, #INTERLEAVE + bmi .Lctr1x +#if INTERLEAVE == 2 + mov v0.8b, v4.8b + mov v1.8b, v4.8b + rev x7, x5 + add x5, x5, #1 + ins v0.d[1], x7 + rev x7, x5 + add x5, x5, #1 + ins v1.d[1], x7 + ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ + do_encrypt_block2x + eor v0.16b, v0.16b, v2.16b + eor v1.16b, v1.16b, v3.16b + st1 {v0.16b-v1.16b}, [x0], #32 +#else + ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ + dup v7.4s, w5 + mov v0.16b, v4.16b + add v7.4s, v7.4s, v8.4s + mov v1.16b, v4.16b + rev32 v8.16b, v7.16b + mov v2.16b, v4.16b + mov v3.16b, v4.16b + mov v1.s[3], v8.s[0] + mov v2.s[3], v8.s[1] + mov v3.s[3], v8.s[2] + ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ + do_encrypt_block4x + eor v0.16b, v5.16b, v0.16b + ld1 {v5.16b}, [x1], #16 /* get 1 input block */ + eor v1.16b, v6.16b, v1.16b + eor v2.16b, v7.16b, v2.16b + eor v3.16b, v5.16b, v3.16b + st1 {v0.16b-v3.16b}, [x0], #64 + add x5, x5, #INTERLEAVE +#endif + cbz w4, .LctroutNx +.LctrincNx: + rev x7, x5 + ins v4.d[1], x7 + b .LctrloopNx +.LctroutNx: + sub x5, x5, #1 + rev x7, x5 + ins v4.d[1], x7 + b .Lctrout +.Lctr1x: + adds w4, w4, #INTERLEAVE + beq .Lctrout +#endif +.Lctrloop: + mov v0.16b, v4.16b + encrypt_block v0, w3, x2, x6, w7 + subs w4, w4, #1 + bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ + ld1 {v3.16b}, [x1], #16 + eor v3.16b, v0.16b, v3.16b + st1 {v3.16b}, [x0], #16 + beq .Lctrout +.Lctrinc: + adds x5, x5, #1 /* increment BE ctr */ + rev x7, x5 + ins v4.d[1], x7 + bcc .Lctrloop /* no overflow? */ + umov x7, v4.d[0] /* load upper word of ctr */ + rev x7, x7 /* ... to handle the carry */ + add x7, x7, #1 + rev x7, x7 + ins v4.d[0], x7 + b .Lctrloop +.Lctrhalfblock: + ld1 {v3.8b}, [x1] + eor v3.8b, v0.8b, v3.8b + st1 {v3.8b}, [x0] +.Lctrout: + FRAME_POP + ret +AES_ENDPROC(aes_ctr_encrypt) + .ltorg + + + /* + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 const rk2[], u8 iv[], int first) + * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int blocks, u8 const rk2[], u8 iv[], int first) + */ + + .macro next_tweak, out, in, const, tmp + sshr \tmp\().2d, \in\().2d, #63 + and \tmp\().16b, \tmp\().16b, \const\().16b + add \out\().2d, \in\().2d, \in\().2d + ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 + eor \out\().16b, \out\().16b, \tmp\().16b + .endm + +.Lxts_mul_x: + .word 1, 0, 0x87, 0 + +AES_ENTRY(aes_xts_encrypt) + FRAME_PUSH + cbz w7, .LxtsencloopNx + + ld1 {v4.16b}, [x6] + enc_prepare w3, x5, x6 + encrypt_block v4, w3, x5, x6, w7 /* first tweak */ + enc_switch_key w3, x2, x6 + ldr q7, .Lxts_mul_x + b .LxtsencNx + +.LxtsencloopNx: + ldr q7, .Lxts_mul_x + next_tweak v4, v4, v7, v8 +.LxtsencNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lxtsenc1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + do_encrypt_block2x + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + st1 {v0.16b-v1.16b}, [x0], #32 + cbz w4, .LxtsencoutNx + next_tweak v4, v5, v7, v8 + b .LxtsencNx +.LxtsencoutNx: + mov v4.16b, v5.16b + b .Lxtsencout +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + next_tweak v6, v5, v7, v8 + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + next_tweak v7, v6, v7, v8 + eor v3.16b, v3.16b, v7.16b + do_encrypt_block4x + eor v3.16b, v3.16b, v7.16b + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 + mov v4.16b, v7.16b + cbz w4, .Lxtsencout + b .LxtsencloopNx +#endif +.Lxtsenc1x: + adds w4, w4, #INTERLEAVE + beq .Lxtsencout +#endif +.Lxtsencloop: + ld1 {v1.16b}, [x1], #16 + eor v0.16b, v1.16b, v4.16b + encrypt_block v0, w3, x2, x6, w7 + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + beq .Lxtsencout + next_tweak v4, v4, v7, v8 + b .Lxtsencloop +.Lxtsencout: + FRAME_POP + ret +AES_ENDPROC(aes_xts_encrypt) + + +AES_ENTRY(aes_xts_decrypt) + FRAME_PUSH + cbz w7, .LxtsdecloopNx + + ld1 {v4.16b}, [x6] + enc_prepare w3, x5, x6 + encrypt_block v4, w3, x5, x6, w7 /* first tweak */ + dec_prepare w3, x2, x6 + ldr q7, .Lxts_mul_x + b .LxtsdecNx + +.LxtsdecloopNx: + ldr q7, .Lxts_mul_x + next_tweak v4, v4, v7, v8 +.LxtsdecNx: +#if INTERLEAVE >= 2 + subs w4, w4, #INTERLEAVE + bmi .Lxtsdec1x +#if INTERLEAVE == 2 + ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + do_decrypt_block2x + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + st1 {v0.16b-v1.16b}, [x0], #32 + cbz w4, .LxtsdecoutNx + next_tweak v4, v5, v7, v8 + b .LxtsdecNx +.LxtsdecoutNx: + mov v4.16b, v5.16b + b .Lxtsdecout +#else + ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + next_tweak v5, v4, v7, v8 + eor v0.16b, v0.16b, v4.16b + next_tweak v6, v5, v7, v8 + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + next_tweak v7, v6, v7, v8 + eor v3.16b, v3.16b, v7.16b + do_decrypt_block4x + eor v3.16b, v3.16b, v7.16b + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + st1 {v0.16b-v3.16b}, [x0], #64 + mov v4.16b, v7.16b + cbz w4, .Lxtsdecout + b .LxtsdecloopNx +#endif +.Lxtsdec1x: + adds w4, w4, #INTERLEAVE + beq .Lxtsdecout +#endif +.Lxtsdecloop: + ld1 {v1.16b}, [x1], #16 + eor v0.16b, v1.16b, v4.16b + decrypt_block v0, w3, x2, x6, w7 + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x0], #16 + subs w4, w4, #1 + beq .Lxtsdecout + next_tweak v4, v4, v7, v8 + b .Lxtsdecloop +.Lxtsdecout: + FRAME_POP + ret +AES_ENDPROC(aes_xts_decrypt) diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S new file mode 100644 index 000000000000..b93170e1cc93 --- /dev/null +++ b/arch/arm64/crypto/aes-neon.S @@ -0,0 +1,382 @@ +/* + * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> + +#define AES_ENTRY(func) ENTRY(neon_ ## func) +#define AES_ENDPROC(func) ENDPROC(neon_ ## func) + + /* multiply by polynomial 'x' in GF(2^8) */ + .macro mul_by_x, out, in, temp, const + sshr \temp, \in, #7 + add \out, \in, \in + and \temp, \temp, \const + eor \out, \out, \temp + .endm + + /* preload the entire Sbox */ + .macro prepare, sbox, shiftrows, temp + adr \temp, \sbox + movi v12.16b, #0x40 + ldr q13, \shiftrows + movi v14.16b, #0x1b + ld1 {v16.16b-v19.16b}, [\temp], #64 + ld1 {v20.16b-v23.16b}, [\temp], #64 + ld1 {v24.16b-v27.16b}, [\temp], #64 + ld1 {v28.16b-v31.16b}, [\temp] + .endm + + /* do preload for encryption */ + .macro enc_prepare, ignore0, ignore1, temp + prepare .LForward_Sbox, .LForward_ShiftRows, \temp + .endm + + .macro enc_switch_key, ignore0, ignore1, temp + /* do nothing */ + .endm + + /* do preload for decryption */ + .macro dec_prepare, ignore0, ignore1, temp + prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp + .endm + + /* apply SubBytes transformation using the the preloaded Sbox */ + .macro sub_bytes, in + sub v9.16b, \in\().16b, v12.16b + tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b + sub v10.16b, v9.16b, v12.16b + tbx \in\().16b, {v20.16b-v23.16b}, v9.16b + sub v11.16b, v10.16b, v12.16b + tbx \in\().16b, {v24.16b-v27.16b}, v10.16b + tbx \in\().16b, {v28.16b-v31.16b}, v11.16b + .endm + + /* apply MixColumns transformation */ + .macro mix_columns, in + mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b + rev32 v8.8h, \in\().8h + eor \in\().16b, v10.16b, \in\().16b + shl v9.4s, v8.4s, #24 + shl v11.4s, \in\().4s, #24 + sri v9.4s, v8.4s, #8 + sri v11.4s, \in\().4s, #8 + eor v9.16b, v9.16b, v8.16b + eor v10.16b, v10.16b, v9.16b + eor \in\().16b, v10.16b, v11.16b + .endm + + /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */ + .macro inv_mix_columns, in + mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b + mul_by_x v11.16b, v11.16b, v10.16b, v14.16b + eor \in\().16b, \in\().16b, v11.16b + rev32 v11.8h, v11.8h + eor \in\().16b, \in\().16b, v11.16b + mix_columns \in + .endm + + .macro do_block, enc, in, rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ + sub_bytes \in + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns \in + .else + inv_mix_columns \in + .endif + b 1111b +2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ + .endm + + .macro encrypt_block, in, rounds, rk, rkp, i + do_block 1, \in, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block, in, rounds, rk, rkp, i + do_block 0, \in, \rounds, \rk, \rkp, \i + .endm + + /* + * Interleaved versions: functionally equivalent to the + * ones above, but applied to 2 or 4 AES states in parallel. + */ + + .macro sub_bytes_2x, in0, in1 + sub v8.16b, \in0\().16b, v12.16b + sub v9.16b, \in1\().16b, v12.16b + tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b + tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b + sub v10.16b, v8.16b, v12.16b + sub v11.16b, v9.16b, v12.16b + tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b + tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b + sub v8.16b, v10.16b, v12.16b + sub v9.16b, v11.16b, v12.16b + tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b + tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b + tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b + tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b + .endm + + .macro sub_bytes_4x, in0, in1, in2, in3 + sub v8.16b, \in0\().16b, v12.16b + tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b + sub v9.16b, \in1\().16b, v12.16b + tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b + sub v10.16b, \in2\().16b, v12.16b + tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b + sub v11.16b, \in3\().16b, v12.16b + tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b + tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b + tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b + sub v8.16b, v8.16b, v12.16b + tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b + sub v9.16b, v9.16b, v12.16b + tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b + sub v10.16b, v10.16b, v12.16b + tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b + sub v11.16b, v11.16b, v12.16b + tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b + sub v8.16b, v8.16b, v12.16b + tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b + sub v9.16b, v9.16b, v12.16b + tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b + sub v10.16b, v10.16b, v12.16b + tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b + sub v11.16b, v11.16b, v12.16b + tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b + tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b + tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b + .endm + + .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const + sshr \tmp0\().16b, \in0\().16b, #7 + add \out0\().16b, \in0\().16b, \in0\().16b + sshr \tmp1\().16b, \in1\().16b, #7 + and \tmp0\().16b, \tmp0\().16b, \const\().16b + add \out1\().16b, \in1\().16b, \in1\().16b + and \tmp1\().16b, \tmp1\().16b, \const\().16b + eor \out0\().16b, \out0\().16b, \tmp0\().16b + eor \out1\().16b, \out1\().16b, \tmp1\().16b + .endm + + .macro mix_columns_2x, in0, in1 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + rev32 v10.8h, \in0\().8h + rev32 v11.8h, \in1\().8h + eor \in0\().16b, v8.16b, \in0\().16b + eor \in1\().16b, v9.16b, \in1\().16b + shl v12.4s, v10.4s, #24 + shl v13.4s, v11.4s, #24 + eor v8.16b, v8.16b, v10.16b + sri v12.4s, v10.4s, #8 + shl v10.4s, \in0\().4s, #24 + eor v9.16b, v9.16b, v11.16b + sri v13.4s, v11.4s, #8 + shl v11.4s, \in1\().4s, #24 + sri v10.4s, \in0\().4s, #8 + eor \in0\().16b, v8.16b, v12.16b + sri v11.4s, \in1\().4s, #8 + eor \in1\().16b, v9.16b, v13.16b + eor \in0\().16b, v10.16b, \in0\().16b + eor \in1\().16b, v11.16b, \in1\().16b + .endm + + .macro inv_mix_cols_2x, in0, in1 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + mul_by_x_2x v8, v9, v8, v9, v10, v11, v14 + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + rev32 v8.8h, v8.8h + rev32 v9.8h, v9.8h + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + mix_columns_2x \in0, \in1 + .endm + + .macro inv_mix_cols_4x, in0, in1, in2, in3 + mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14 + mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14 + mul_by_x_2x v8, v9, v8, v9, v12, v13, v14 + mul_by_x_2x v10, v11, v10, v11, v12, v13, v14 + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + eor \in2\().16b, \in2\().16b, v10.16b + eor \in3\().16b, \in3\().16b, v11.16b + rev32 v8.8h, v8.8h + rev32 v9.8h, v9.8h + rev32 v10.8h, v10.8h + rev32 v11.8h, v11.8h + eor \in0\().16b, \in0\().16b, v8.16b + eor \in1\().16b, \in1\().16b, v9.16b + eor \in2\().16b, \in2\().16b, v10.16b + eor \in3\().16b, \in3\().16b, v11.16b + mix_columns_2x \in0, \in1 + mix_columns_2x \in2, \in3 + .endm + + .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + sub_bytes_2x \in0, \in1 + tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ + tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns_2x \in0, \in1 + ldr q13, .LForward_ShiftRows + .else + inv_mix_cols_2x \in0, \in1 + ldr q13, .LReverse_ShiftRows + .endif + movi v12.16b, #0x40 + b 1111b +2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + .endm + + .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i + ld1 {v15.16b}, [\rk] + add \rkp, \rk, #16 + mov \i, \rounds +1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ + eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ + sub_bytes_4x \in0, \in1, \in2, \in3 + tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ + tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ + tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ + tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ + ld1 {v15.16b}, [\rkp], #16 + subs \i, \i, #1 + beq 2222f + .if \enc == 1 + mix_columns_2x \in0, \in1 + mix_columns_2x \in2, \in3 + ldr q13, .LForward_ShiftRows + .else + inv_mix_cols_4x \in0, \in1, \in2, \in3 + ldr q13, .LReverse_ShiftRows + .endif + movi v12.16b, #0x40 + b 1111b +2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ + eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ + eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */ + eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ + .endm + + .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i + do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i + do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i + .endm + + .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i + do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i + .endm + + .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i + do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i + .endm + +#include "aes-modes.S" + + .text + .align 4 +.LForward_ShiftRows: + .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 + .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb + +.LReverse_ShiftRows: + .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb + .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 + +.LForward_Sbox: + .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 + .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 + .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 + .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 + .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc + .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 + .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a + .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 + .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 + .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 + .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b + .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf + .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 + .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 + .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 + .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 + .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 + .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 + .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 + .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb + .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c + .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 + .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 + .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 + .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 + .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a + .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e + .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e + .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 + .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf + .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 + .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + +.LReverse_Sbox: + .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 + .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 + .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d + .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 + .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 + .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda + .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a + .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 + .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea + .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 + .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 + .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 + .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 + .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d + .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 + .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 + .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S new file mode 100644 index 000000000000..dc457015884e --- /dev/null +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -0,0 +1,79 @@ +/* + * Accelerated GHASH implementation with ARMv8 PMULL instructions. + * + * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + SHASH .req v0 + SHASH2 .req v1 + T1 .req v2 + T2 .req v3 + MASK .req v4 + XL .req v5 + XM .req v6 + XH .req v7 + IN1 .req v7 + + .text + .arch armv8-a+crypto + + /* + * void pmull_ghash_update(int blocks, u64 dg[], const char *src, + * struct ghash_key const *k, const char *head) + */ +ENTRY(pmull_ghash_update) + ld1 {SHASH.16b}, [x3] + ld1 {XL.16b}, [x1] + movi MASK.16b, #0xe1 + ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 + shl MASK.2d, MASK.2d, #57 + eor SHASH2.16b, SHASH2.16b, SHASH.16b + + /* do the head block first, if supplied */ + cbz x4, 0f + ld1 {T1.2d}, [x4] + b 1f + +0: ld1 {T1.2d}, [x2], #16 + sub w0, w0, #1 + +1: /* multiply XL by SHASH in GF(2^128) */ +CPU_LE( rev64 T1.16b, T1.16b ) + + ext T2.16b, XL.16b, XL.16b, #8 + ext IN1.16b, T1.16b, T1.16b, #8 + eor T1.16b, T1.16b, T2.16b + eor XL.16b, XL.16b, IN1.16b + + pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 + eor T1.16b, T1.16b, XL.16b + pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 + pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) + + ext T1.16b, XL.16b, XH.16b, #8 + eor T2.16b, XL.16b, XH.16b + eor XM.16b, XM.16b, T1.16b + eor XM.16b, XM.16b, T2.16b + pmull T2.1q, XL.1d, MASK.1d + + mov XH.d[0], XM.d[1] + mov XM.d[1], XL.d[0] + + eor XL.16b, XM.16b, T2.16b + ext T2.16b, XL.16b, XL.16b, #8 + pmull XL.1q, XL.1d, MASK.1d + eor T2.16b, T2.16b, XH.16b + eor XL.16b, XL.16b, T2.16b + + cbnz w0, 0b + + st1 {XL.16b}, [x1] + ret +ENDPROC(pmull_ghash_update) diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c new file mode 100644 index 000000000000..833ec1e3f3e9 --- /dev/null +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -0,0 +1,156 @@ +/* + * Accelerated GHASH implementation with ARMv8 PMULL instructions. + * + * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <asm/unaligned.h> +#include <crypto/internal/hash.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/module.h> + +MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +struct ghash_key { + u64 a; + u64 b; +}; + +struct ghash_desc_ctx { + u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; + u8 buf[GHASH_BLOCK_SIZE]; + u32 count; +}; + +asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src, + struct ghash_key const *k, const char *head); + +static int ghash_init(struct shash_desc *desc) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + + *ctx = (struct ghash_desc_ctx){}; + return 0; +} + +static int ghash_update(struct shash_desc *desc, const u8 *src, + unsigned int len) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + + ctx->count += len; + + if ((partial + len) >= GHASH_BLOCK_SIZE) { + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + int blocks; + + if (partial) { + int p = GHASH_BLOCK_SIZE - partial; + + memcpy(ctx->buf + partial, src, p); + src += p; + len -= p; + } + + blocks = len / GHASH_BLOCK_SIZE; + len %= GHASH_BLOCK_SIZE; + + kernel_neon_begin_partial(8); + pmull_ghash_update(blocks, ctx->digest, src, key, + partial ? ctx->buf : NULL); + kernel_neon_end(); + src += blocks * GHASH_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(ctx->buf + partial, src, len); + return 0; +} + +static int ghash_final(struct shash_desc *desc, u8 *dst) +{ + struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; + + if (partial) { + struct ghash_key *key = crypto_shash_ctx(desc->tfm); + + memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); + + kernel_neon_begin_partial(8); + pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL); + kernel_neon_end(); + } + put_unaligned_be64(ctx->digest[1], dst); + put_unaligned_be64(ctx->digest[0], dst + 8); + + *ctx = (struct ghash_desc_ctx){}; + return 0; +} + +static int ghash_setkey(struct crypto_shash *tfm, + const u8 *inkey, unsigned int keylen) +{ + struct ghash_key *key = crypto_shash_ctx(tfm); + u64 a, b; + + if (keylen != GHASH_BLOCK_SIZE) { + crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + /* perform multiplication by 'x' in GF(2^128) */ + b = get_unaligned_be64(inkey); + a = get_unaligned_be64(inkey + 8); + + key->a = (a << 1) | (b >> 63); + key->b = (b << 1) | (a >> 63); + + if (b >> 63) + key->b ^= 0xc200000000000000UL; + + return 0; +} + +static struct shash_alg ghash_alg = { + .digestsize = GHASH_DIGEST_SIZE, + .init = ghash_init, + .update = ghash_update, + .final = ghash_final, + .setkey = ghash_setkey, + .descsize = sizeof(struct ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "ghash-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ghash_key), + .cra_module = THIS_MODULE, + }, +}; + +static int __init ghash_ce_mod_init(void) +{ + return crypto_register_shash(&ghash_alg); +} + +static void __exit ghash_ce_mod_exit(void) +{ + crypto_unregister_shash(&ghash_alg); +} + +module_cpu_feature_match(PMULL, ghash_ce_mod_init); +module_exit(ghash_ce_mod_exit); diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S new file mode 100644 index 000000000000..09d57d98609c --- /dev/null +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -0,0 +1,153 @@ +/* + * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .arch armv8-a+crypto + + k0 .req v0 + k1 .req v1 + k2 .req v2 + k3 .req v3 + + t0 .req v4 + t1 .req v5 + + dga .req q6 + dgav .req v6 + dgb .req s7 + dgbv .req v7 + + dg0q .req q12 + dg0s .req s12 + dg0v .req v12 + dg1s .req s13 + dg1v .req v13 + dg2s .req s14 + + .macro add_only, op, ev, rc, s0, dg1 + .ifc \ev, ev + add t1.4s, v\s0\().4s, \rc\().4s + sha1h dg2s, dg0s + .ifnb \dg1 + sha1\op dg0q, \dg1, t0.4s + .else + sha1\op dg0q, dg1s, t0.4s + .endif + .else + .ifnb \s0 + add t0.4s, v\s0\().4s, \rc\().4s + .endif + sha1h dg1s, dg0s + sha1\op dg0q, dg2s, t1.4s + .endif + .endm + + .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 + sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s + add_only \op, \ev, \rc, \s1, \dg1 + sha1su1 v\s0\().4s, v\s3\().4s + .endm + + /* + * The SHA1 round constants + */ + .align 4 +.Lsha1_rcon: + .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 + + /* + * void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + * u8 *head, long bytes) + */ +ENTRY(sha1_ce_transform) + /* load round constants */ + adr x6, .Lsha1_rcon + ld1r {k0.4s}, [x6], #4 + ld1r {k1.4s}, [x6], #4 + ld1r {k2.4s}, [x6], #4 + ld1r {k3.4s}, [x6] + + /* load state */ + ldr dga, [x2] + ldr dgb, [x2, #16] + + /* load partial state (if supplied) */ + cbz x3, 0f + ld1 {v8.4s-v11.4s}, [x3] + b 1f + + /* load input */ +0: ld1 {v8.4s-v11.4s}, [x1], #64 + sub w0, w0, #1 + +1: +CPU_LE( rev32 v8.16b, v8.16b ) +CPU_LE( rev32 v9.16b, v9.16b ) +CPU_LE( rev32 v10.16b, v10.16b ) +CPU_LE( rev32 v11.16b, v11.16b ) + +2: add t0.4s, v8.4s, k0.4s + mov dg0v.16b, dgav.16b + + add_update c, ev, k0, 8, 9, 10, 11, dgb + add_update c, od, k0, 9, 10, 11, 8 + add_update c, ev, k0, 10, 11, 8, 9 + add_update c, od, k0, 11, 8, 9, 10 + add_update c, ev, k1, 8, 9, 10, 11 + + add_update p, od, k1, 9, 10, 11, 8 + add_update p, ev, k1, 10, 11, 8, 9 + add_update p, od, k1, 11, 8, 9, 10 + add_update p, ev, k1, 8, 9, 10, 11 + add_update p, od, k2, 9, 10, 11, 8 + + add_update m, ev, k2, 10, 11, 8, 9 + add_update m, od, k2, 11, 8, 9, 10 + add_update m, ev, k2, 8, 9, 10, 11 + add_update m, od, k2, 9, 10, 11, 8 + add_update m, ev, k3, 10, 11, 8, 9 + + add_update p, od, k3, 11, 8, 9, 10 + add_only p, ev, k3, 9 + add_only p, od, k3, 10 + add_only p, ev, k3, 11 + add_only p, od + + /* update state */ + add dgbv.2s, dgbv.2s, dg1v.2s + add dgav.4s, dgav.4s, dg0v.4s + + cbnz w0, 0b + + /* + * Final block: add padding and total bit count. + * Skip if we have no total byte count in x4. In that case, the input + * size was not a round multiple of the block size, and the padding is + * handled by the C code. + */ + cbz x4, 3f + movi v9.2d, #0 + mov x8, #0x80000000 + movi v10.2d, #0 + ror x7, x4, #29 // ror(lsl(x4, 3), 32) + fmov d8, x8 + mov x4, #0 + mov v11.d[0], xzr + mov v11.d[1], x7 + b 2b + + /* store new state */ +3: str dga, [x2] + str dgb, [x2, #16] + ret +ENDPROC(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c new file mode 100644 index 000000000000..6fe83f37a750 --- /dev/null +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -0,0 +1,174 @@ +/* + * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <asm/unaligned.h> +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/module.h> + +MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state, + u8 *head, long bytes); + +static int sha1_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + return 0; +} + +static int sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + + sctx->count += len; + + if ((partial + len) >= SHA1_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA1_BLOCK_SIZE - partial; + + memcpy(sctx->buffer + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA1_BLOCK_SIZE; + len %= SHA1_BLOCK_SIZE; + + kernel_neon_begin_partial(16); + sha1_ce_transform(blocks, data, sctx->state, + partial ? sctx->buffer : NULL, 0); + kernel_neon_end(); + + data += blocks * SHA1_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx->buffer + partial, data, len); + return 0; +} + +static int sha1_final(struct shash_desc *desc, u8 *out) +{ + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + struct sha1_state *sctx = shash_desc_ctx(desc); + __be64 bits = cpu_to_be64(sctx->count << 3); + __be32 *dst = (__be32 *)out; + int i; + + u32 padlen = SHA1_BLOCK_SIZE + - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE); + + sha1_update(desc, padding, padlen); + sha1_update(desc, (const u8 *)&bits, sizeof(bits)); + + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha1_state){}; + return 0; +} + +static int sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int blocks; + int i; + + if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) { + sha1_update(desc, data, len); + return sha1_final(desc, out); + } + + /* + * Use a fast path if the input is a multiple of 64 bytes. In + * this case, there is no need to copy data around, and we can + * perform the entire digest calculation in a single invocation + * of sha1_ce_transform() + */ + blocks = len / SHA1_BLOCK_SIZE; + + kernel_neon_begin_partial(16); + sha1_ce_transform(blocks, data, sctx->state, NULL, len); + kernel_neon_end(); + + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha1_state){}; + return 0; +} + +static int sha1_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state *dst = out; + + *dst = *sctx; + return 0; +} + +static int sha1_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + struct sha1_state const *src = in; + + *sctx = *src; + return 0; +} + +static struct shash_alg alg = { + .init = sha1_init, + .update = sha1_update, + .final = sha1_final, + .finup = sha1_finup, + .export = sha1_export, + .import = sha1_import, + .descsize = sizeof(struct sha1_state), + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha1_ce_mod_init(void) +{ + return crypto_register_shash(&alg); +} + +static void __exit sha1_ce_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_cpu_feature_match(SHA1, sha1_ce_mod_init); +module_exit(sha1_ce_mod_fini); diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S new file mode 100644 index 000000000000..7f29fc031ea8 --- /dev/null +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -0,0 +1,156 @@ +/* + * sha2-ce-core.S - core SHA-224/SHA-256 transform using v8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .arch armv8-a+crypto + + dga .req q20 + dgav .req v20 + dgb .req q21 + dgbv .req v21 + + t0 .req v22 + t1 .req v23 + + dg0q .req q24 + dg0v .req v24 + dg1q .req q25 + dg1v .req v25 + dg2q .req q26 + dg2v .req v26 + + .macro add_only, ev, rc, s0 + mov dg2v.16b, dg0v.16b + .ifeq \ev + add t1.4s, v\s0\().4s, \rc\().4s + sha256h dg0q, dg1q, t0.4s + sha256h2 dg1q, dg2q, t0.4s + .else + .ifnb \s0 + add t0.4s, v\s0\().4s, \rc\().4s + .endif + sha256h dg0q, dg1q, t1.4s + sha256h2 dg1q, dg2q, t1.4s + .endif + .endm + + .macro add_update, ev, rc, s0, s1, s2, s3 + sha256su0 v\s0\().4s, v\s1\().4s + add_only \ev, \rc, \s1 + sha256su1 v\s0\().4s, v\s2\().4s, v\s3\().4s + .endm + + /* + * The SHA-256 round constants + */ + .align 4 +.Lsha2_rcon: + .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 + .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 + .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 + .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 + .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc + .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da + .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 + .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 + .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 + .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 + .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 + .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 + .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 + .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 + .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 + .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + + /* + * void sha2_ce_transform(int blocks, u8 const *src, u32 *state, + * u8 *head, long bytes) + */ +ENTRY(sha2_ce_transform) + /* load round constants */ + adr x8, .Lsha2_rcon + ld1 { v0.4s- v3.4s}, [x8], #64 + ld1 { v4.4s- v7.4s}, [x8], #64 + ld1 { v8.4s-v11.4s}, [x8], #64 + ld1 {v12.4s-v15.4s}, [x8] + + /* load state */ + ldp dga, dgb, [x2] + + /* load partial input (if supplied) */ + cbz x3, 0f + ld1 {v16.4s-v19.4s}, [x3] + b 1f + + /* load input */ +0: ld1 {v16.4s-v19.4s}, [x1], #64 + sub w0, w0, #1 + +1: +CPU_LE( rev32 v16.16b, v16.16b ) +CPU_LE( rev32 v17.16b, v17.16b ) +CPU_LE( rev32 v18.16b, v18.16b ) +CPU_LE( rev32 v19.16b, v19.16b ) + +2: add t0.4s, v16.4s, v0.4s + mov dg0v.16b, dgav.16b + mov dg1v.16b, dgbv.16b + + add_update 0, v1, 16, 17, 18, 19 + add_update 1, v2, 17, 18, 19, 16 + add_update 0, v3, 18, 19, 16, 17 + add_update 1, v4, 19, 16, 17, 18 + + add_update 0, v5, 16, 17, 18, 19 + add_update 1, v6, 17, 18, 19, 16 + add_update 0, v7, 18, 19, 16, 17 + add_update 1, v8, 19, 16, 17, 18 + + add_update 0, v9, 16, 17, 18, 19 + add_update 1, v10, 17, 18, 19, 16 + add_update 0, v11, 18, 19, 16, 17 + add_update 1, v12, 19, 16, 17, 18 + + add_only 0, v13, 17 + add_only 1, v14, 18 + add_only 0, v15, 19 + add_only 1 + + /* update state */ + add dgav.4s, dgav.4s, dg0v.4s + add dgbv.4s, dgbv.4s, dg1v.4s + + /* handled all input blocks? */ + cbnz w0, 0b + + /* + * Final block: add padding and total bit count. + * Skip if we have no total byte count in x4. In that case, the input + * size was not a round multiple of the block size, and the padding is + * handled by the C code. + */ + cbz x4, 3f + movi v17.2d, #0 + mov x8, #0x80000000 + movi v18.2d, #0 + ror x7, x4, #29 // ror(lsl(x4, 3), 32) + fmov d16, x8 + mov x4, #0 + mov v19.d[0], xzr + mov v19.d[1], x7 + b 2b + + /* store new state */ +3: stp dga, dgb, [x2] + ret +ENDPROC(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c new file mode 100644 index 000000000000..c294e67d3925 --- /dev/null +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -0,0 +1,255 @@ +/* + * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions + * + * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <asm/neon.h> +#include <asm/unaligned.h> +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/module.h> + +MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state, + u8 *head, long bytes); + +static int sha224_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, + } + }; + return 0; +} + +static int sha256_init(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state){ + .state = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, + } + }; + return 0; +} + +static int sha2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA256_BLOCK_SIZE; + + sctx->count += len; + + if ((partial + len) >= SHA256_BLOCK_SIZE) { + int blocks; + + if (partial) { + int p = SHA256_BLOCK_SIZE - partial; + + memcpy(sctx->buf + partial, data, p); + data += p; + len -= p; + } + + blocks = len / SHA256_BLOCK_SIZE; + len %= SHA256_BLOCK_SIZE; + + kernel_neon_begin_partial(28); + sha2_ce_transform(blocks, data, sctx->state, + partial ? sctx->buf : NULL, 0); + kernel_neon_end(); + + data += blocks * SHA256_BLOCK_SIZE; + partial = 0; + } + if (len) + memcpy(sctx->buf + partial, data, len); + return 0; +} + +static void sha2_final(struct shash_desc *desc) +{ + static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, }; + + struct sha256_state *sctx = shash_desc_ctx(desc); + __be64 bits = cpu_to_be64(sctx->count << 3); + u32 padlen = SHA256_BLOCK_SIZE + - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE); + + sha2_update(desc, padding, padlen); + sha2_update(desc, (const u8 *)&bits, sizeof(bits)); +} + +static int sha224_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_final(desc); + + for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha256_final(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_final(desc); + + for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static void sha2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + int blocks; + + if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) { + sha2_update(desc, data, len); + sha2_final(desc); + return; + } + + /* + * Use a fast path if the input is a multiple of 64 bytes. In + * this case, there is no need to copy data around, and we can + * perform the entire digest calculation in a single invocation + * of sha2_ce_transform() + */ + blocks = len / SHA256_BLOCK_SIZE; + + kernel_neon_begin_partial(28); + sha2_ce_transform(blocks, data, sctx->state, NULL, len); + kernel_neon_end(); + data += blocks * SHA256_BLOCK_SIZE; +} + +static int sha224_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_finup(desc, data, len); + + for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + __be32 *dst = (__be32 *)out; + int i; + + sha2_finup(desc, data, len); + + for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++) + put_unaligned_be32(sctx->state[i], dst++); + + *sctx = (struct sha256_state){}; + return 0; +} + +static int sha2_export(struct shash_desc *desc, void *out) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state *dst = out; + + *dst = *sctx; + return 0; +} + +static int sha2_import(struct shash_desc *desc, const void *in) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + struct sha256_state const *src = in; + + *sctx = *src; + return 0; +} + +static struct shash_alg algs[] = { { + .init = sha224_init, + .update = sha2_update, + .final = sha224_final, + .finup = sha224_finup, + .export = sha2_export, + .import = sha2_import, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .init = sha256_init, + .update = sha2_update, + .final = sha256_final, + .finup = sha256_finup, + .export = sha2_export, + .import = sha2_import, + .descsize = sizeof(struct sha256_state), + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha2_ce_mod_init(void) +{ + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} + +static void __exit sha2_ce_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); +} + +module_cpu_feature_match(SHA2, sha2_ce_mod_init); +module_exit(sha2_ce_mod_fini); diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 71c53ecfcc3a..43a2c9d2d6f4 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -35,6 +35,7 @@ generic-y += segment.h generic-y += sembuf.h generic-y += serial.h generic-y += shmbuf.h +generic-y += simd.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index a79e7e9ab86e..0122bec38564 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -70,14 +70,12 @@ static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len) return max(start, end_page); } -static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm, - struct blkcipher_walk *walk, +static inline unsigned int blkcipher_done_slow(struct blkcipher_walk *walk, unsigned int bsize) { u8 *addr; - unsigned int alignmask = crypto_blkcipher_alignmask(tfm); - addr = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1); + addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); addr = blkcipher_get_spot(addr, bsize); scatterwalk_copychunks(addr, &walk->out, bsize, 1); return bsize; @@ -105,7 +103,6 @@ static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk, int blkcipher_walk_done(struct blkcipher_desc *desc, struct blkcipher_walk *walk, int err) { - struct crypto_blkcipher *tfm = desc->tfm; unsigned int nbytes = 0; if (likely(err >= 0)) { @@ -117,7 +114,7 @@ int blkcipher_walk_done(struct blkcipher_desc *desc, err = -EINVAL; goto err; } else - n = blkcipher_done_slow(tfm, walk, n); + n = blkcipher_done_slow(walk, n); nbytes = walk->total - n; err = 0; @@ -136,7 +133,7 @@ err: } if (walk->iv != desc->info) - memcpy(desc->info, walk->iv, crypto_blkcipher_ivsize(tfm)); + memcpy(desc->info, walk->iv, walk->ivsize); if (walk->buffer != walk->page) kfree(walk->buffer); if (walk->page) @@ -226,22 +223,20 @@ static inline int blkcipher_next_fast(struct blkcipher_desc *desc, static int blkcipher_walk_next(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { - struct crypto_blkcipher *tfm = desc->tfm; - unsigned int alignmask = crypto_blkcipher_alignmask(tfm); unsigned int bsize; unsigned int n; int err; n = walk->total; - if (unlikely(n < crypto_blkcipher_blocksize(tfm))) { + if (unlikely(n < walk->cipher_blocksize)) { desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; return blkcipher_walk_done(desc, walk, -EINVAL); } walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY | BLKCIPHER_WALK_DIFF); - if (!scatterwalk_aligned(&walk->in, alignmask) || - !scatterwalk_aligned(&walk->out, alignmask)) { + if (!scatterwalk_aligned(&walk->in, walk->alignmask) || + !scatterwalk_aligned(&walk->out, walk->alignmask)) { walk->flags |= BLKCIPHER_WALK_COPY; if (!walk->page) { walk->page = (void *)__get_free_page(GFP_ATOMIC); @@ -250,12 +245,12 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc, } } - bsize = min(walk->blocksize, n); + bsize = min(walk->walk_blocksize, n); n = scatterwalk_clamp(&walk->in, n); n = scatterwalk_clamp(&walk->out, n); if (unlikely(n < bsize)) { - err = blkcipher_next_slow(desc, walk, bsize, alignmask); + err = blkcipher_next_slow(desc, walk, bsize, walk->alignmask); goto set_phys_lowmem; } @@ -277,28 +272,26 @@ set_phys_lowmem: return err; } -static inline int blkcipher_copy_iv(struct blkcipher_walk *walk, - struct crypto_blkcipher *tfm, - unsigned int alignmask) +static inline int blkcipher_copy_iv(struct blkcipher_walk *walk) { - unsigned bs = walk->blocksize; - unsigned int ivsize = crypto_blkcipher_ivsize(tfm); - unsigned aligned_bs = ALIGN(bs, alignmask + 1); - unsigned int size = aligned_bs * 2 + ivsize + max(aligned_bs, ivsize) - - (alignmask + 1); + unsigned bs = walk->walk_blocksize; + unsigned aligned_bs = ALIGN(bs, walk->alignmask + 1); + unsigned int size = aligned_bs * 2 + + walk->ivsize + max(aligned_bs, walk->ivsize) - + (walk->alignmask + 1); u8 *iv; - size += alignmask & ~(crypto_tfm_ctx_alignment() - 1); + size += walk->alignmask & ~(crypto_tfm_ctx_alignment() - 1); walk->buffer = kmalloc(size, GFP_ATOMIC); if (!walk->buffer) return -ENOMEM; - iv = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1); + iv = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); iv = blkcipher_get_spot(iv, bs) + aligned_bs; iv = blkcipher_get_spot(iv, bs) + aligned_bs; - iv = blkcipher_get_spot(iv, ivsize); + iv = blkcipher_get_spot(iv, walk->ivsize); - walk->iv = memcpy(iv, walk->iv, ivsize); + walk->iv = memcpy(iv, walk->iv, walk->ivsize); return 0; } @@ -306,7 +299,10 @@ int blkcipher_walk_virt(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { walk->flags &= ~BLKCIPHER_WALK_PHYS; - walk->blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->cipher_blocksize = walk->walk_blocksize; + walk->ivsize = crypto_blkcipher_ivsize(desc->tfm); + walk->alignmask = crypto_blkcipher_alignmask(desc->tfm); return blkcipher_walk_first(desc, walk); } EXPORT_SYMBOL_GPL(blkcipher_walk_virt); @@ -315,7 +311,10 @@ int blkcipher_walk_phys(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { walk->flags |= BLKCIPHER_WALK_PHYS; - walk->blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->cipher_blocksize = walk->walk_blocksize; + walk->ivsize = crypto_blkcipher_ivsize(desc->tfm); + walk->alignmask = crypto_blkcipher_alignmask(desc->tfm); return blkcipher_walk_first(desc, walk); } EXPORT_SYMBOL_GPL(blkcipher_walk_phys); @@ -323,9 +322,6 @@ EXPORT_SYMBOL_GPL(blkcipher_walk_phys); static int blkcipher_walk_first(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { - struct crypto_blkcipher *tfm = desc->tfm; - unsigned int alignmask = crypto_blkcipher_alignmask(tfm); - if (WARN_ON_ONCE(in_irq())) return -EDEADLK; @@ -335,8 +331,8 @@ static int blkcipher_walk_first(struct blkcipher_desc *desc, walk->buffer = NULL; walk->iv = desc->info; - if (unlikely(((unsigned long)walk->iv & alignmask))) { - int err = blkcipher_copy_iv(walk, tfm, alignmask); + if (unlikely(((unsigned long)walk->iv & walk->alignmask))) { + int err = blkcipher_copy_iv(walk); if (err) return err; } @@ -353,11 +349,28 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc, unsigned int blocksize) { walk->flags &= ~BLKCIPHER_WALK_PHYS; - walk->blocksize = blocksize; + walk->walk_blocksize = blocksize; + walk->cipher_blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->ivsize = crypto_blkcipher_ivsize(desc->tfm); + walk->alignmask = crypto_blkcipher_alignmask(desc->tfm); return blkcipher_walk_first(desc, walk); } EXPORT_SYMBOL_GPL(blkcipher_walk_virt_block); +int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_aead *tfm, + unsigned int blocksize) +{ + walk->flags &= ~BLKCIPHER_WALK_PHYS; + walk->walk_blocksize = blocksize; + walk->cipher_blocksize = crypto_aead_blocksize(tfm); + walk->ivsize = crypto_aead_ivsize(tfm); + walk->alignmask = crypto_aead_alignmask(tfm); + return blkcipher_walk_first(desc, walk); +} +EXPORT_SYMBOL_GPL(blkcipher_aead_walk_virt_block); + static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 962fd35cbd8d..bc8e1af279c0 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -26,6 +26,7 @@ #include <asm/cacheflush.h> #include <asm/irq_regs.h> +#include <asm/psci.h> #include <asm/pmu.h> #include <asm/smp_plat.h> @@ -544,6 +545,7 @@ static int cci_pmu_init(struct arm_pmu *cci_pmu, struct platform_device *pdev) cci_pmu->plat_device = pdev; cci_pmu->num_events = pmu_get_max_counters(); + cpumask_setall(&cci_pmu->valid_cpus); return armpmu_register(cci_pmu, -1); } @@ -969,6 +971,11 @@ static int cci_probe(void) const char *match_str; bool is_ace; + if (psci_probe() == 0) { + pr_debug("psci found. Aborting cci probe\n"); + return -ENODEV; + } + np = of_find_matching_node(NULL, arm_cci_matches); if (!np) return -ENODEV; diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index e90061f56128..99e145ee0cef 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -39,6 +39,11 @@ config VIDEOMODE_HELPERS config HDMI bool +config VEXPRESS_DVI_CONTROL + bool "Versatile Express DVI control" + depends on FB && VEXPRESS_CONFIG + default y + menuconfig FB tristate "Support for frame buffer devices" ---help--- @@ -327,6 +332,21 @@ config FB_ARMCLCD here and read <file:Documentation/kbuild/modules.txt>. The module will be called amba-clcd. +config FB_ARMHDLCD + tristate "ARM High Definition LCD support" + depends on FB && ARM + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + This framebuffer device driver is for the ARM High Definition + Colour LCD controller. + + If you want to compile this as a module (=code which can be + inserted into and removed from the running kernel), say M + here and read <file:Documentation/kbuild/modules.txt>. The module + will be called arm-hdlcd. + config FB_ACORN bool "Acorn VIDC support" depends on (FB = y) && ARM && ARCH_ACORN diff --git a/drivers/video/Makefile b/drivers/video/Makefile index accf67ae97b8..8abfb4db9b71 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -100,6 +100,7 @@ obj-$(CONFIG_FB_ATMEL) += atmel_lcdfb.o obj-$(CONFIG_FB_PVR2) += pvr2fb.o obj-$(CONFIG_FB_VOODOO1) += sstfb.o obj-$(CONFIG_FB_ARMCLCD) += amba-clcd.o +obj-$(CONFIG_FB_ARMHDLCD) += arm-hdlcd.o obj-$(CONFIG_FB_GOLDFISH) += goldfishfb.o obj-$(CONFIG_FB_68328) += 68328fb.o obj-$(CONFIG_FB_GBE) += gbefb.o @@ -179,3 +180,6 @@ obj-$(CONFIG_VIDEOMODE_HELPERS) += display_timing.o videomode.o ifeq ($(CONFIG_OF),y) obj-$(CONFIG_VIDEOMODE_HELPERS) += of_display_timing.o of_videomode.o endif + +# platform specific output drivers +obj-$(CONFIG_VEXPRESS_DVI_CONTROL) += vexpress-dvi.o diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index 14d6b3793e0a..12f8c4640397 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -17,7 +17,10 @@ #include <linux/string.h> #include <linux/slab.h> #include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/memblock.h> #include <linux/mm.h> +#include <linux/of.h> #include <linux/fb.h> #include <linux/init.h> #include <linux/ioport.h> @@ -31,8 +34,20 @@ #define to_clcd(info) container_of(info, struct clcd_fb, fb) +#ifdef CONFIG_ARM +#define clcdfb_dma_alloc dma_alloc_writecombine +#define clcdfb_dma_free dma_free_writecombine +#define clcdfb_dma_mmap dma_mmap_writecombine +#else +#define clcdfb_dma_alloc dma_alloc_coherent +#define clcdfb_dma_free dma_free_coherent +#define clcdfb_dma_mmap dma_mmap_coherent +#endif + /* This is limited to 16 characters when displayed by X startup */ static const char *clcd_name = "CLCD FB"; +static char *def_mode; +module_param_named(mode, def_mode, charp, 0); /* * Unfortunately, the enable/disable functions may be called either from @@ -234,6 +249,17 @@ clcdfb_set_bitfields(struct clcd_fb *fb, struct fb_var_screeninfo *var) bgr = caps & CLCD_CAP_BGR && var->blue.offset == 0; rgb = caps & CLCD_CAP_RGB && var->red.offset == 0; + /* + * Seems that for 32-bit mode there is confusion about RGB + * ordering somewhere between user-side, kernel and hardware. + * The following hack seems get things working, at least on + * vexpress hardware and models... + */ + if (var->bits_per_pixel == 32) { + bgr = false; + rgb = true; + } + if (!bgr && !rgb) /* * The requested format was not possible, try just @@ -393,6 +419,44 @@ static int clcdfb_blank(int blank_mode, struct fb_info *info) return 0; } +int clcdfb_mmap_dma(struct clcd_fb *fb, struct vm_area_struct *vma) +{ + return clcdfb_dma_mmap(&fb->dev->dev, vma, + fb->fb.screen_base, + fb->fb.fix.smem_start, + fb->fb.fix.smem_len); +} + +int clcdfb_mmap_io(struct clcd_fb *fb, struct vm_area_struct *vma) +{ + unsigned long user_count, count, pfn, off; + + user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + count = PAGE_ALIGN(fb->fb.fix.smem_len) >> PAGE_SHIFT; + pfn = fb->fb.fix.smem_start >> PAGE_SHIFT; + off = vma->vm_pgoff; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (off < count && user_count <= (count - off)) + return remap_pfn_range(vma, vma->vm_start, pfn + off, + user_count << PAGE_SHIFT, + vma->vm_page_prot); + + return -ENXIO; +} + +void clcdfb_remove_dma(struct clcd_fb *fb) +{ + clcdfb_dma_free(&fb->dev->dev, fb->fb.fix.smem_len, + fb->fb.screen_base, fb->fb.fix.smem_start); +} + +void clcdfb_remove_io(struct clcd_fb *fb) +{ + iounmap(fb->fb.screen_base); +} + static int clcdfb_mmap(struct fb_info *info, struct vm_area_struct *vma) { @@ -543,14 +607,247 @@ static int clcdfb_register(struct clcd_fb *fb) return ret; } +struct string_lookup { + const char *string; + const u32 val; +}; + +static struct string_lookup vmode_lookups[] = { + { "FB_VMODE_NONINTERLACED", FB_VMODE_NONINTERLACED}, + { "FB_VMODE_INTERLACED", FB_VMODE_INTERLACED}, + { "FB_VMODE_DOUBLE", FB_VMODE_DOUBLE}, + { "FB_VMODE_ODD_FLD_FIRST", FB_VMODE_ODD_FLD_FIRST}, + { NULL, 0 }, +}; + +static struct string_lookup tim2_lookups[] = { + { "TIM2_CLKSEL", TIM2_CLKSEL}, + { "TIM2_IVS", TIM2_IVS}, + { "TIM2_IHS", TIM2_IHS}, + { "TIM2_IPC", TIM2_IPC}, + { "TIM2_IOE", TIM2_IOE}, + { "TIM2_BCD", TIM2_BCD}, + { NULL, 0}, +}; +static struct string_lookup cntl_lookups[] = { + {"CNTL_LCDEN", CNTL_LCDEN}, + {"CNTL_LCDBPP1", CNTL_LCDBPP1}, + {"CNTL_LCDBPP2", CNTL_LCDBPP2}, + {"CNTL_LCDBPP4", CNTL_LCDBPP4}, + {"CNTL_LCDBPP8", CNTL_LCDBPP8}, + {"CNTL_LCDBPP16", CNTL_LCDBPP16}, + {"CNTL_LCDBPP16_565", CNTL_LCDBPP16_565}, + {"CNTL_LCDBPP16_444", CNTL_LCDBPP16_444}, + {"CNTL_LCDBPP24", CNTL_LCDBPP24}, + {"CNTL_LCDBW", CNTL_LCDBW}, + {"CNTL_LCDTFT", CNTL_LCDTFT}, + {"CNTL_LCDMONO8", CNTL_LCDMONO8}, + {"CNTL_LCDDUAL", CNTL_LCDDUAL}, + {"CNTL_BGR", CNTL_BGR}, + {"CNTL_BEBO", CNTL_BEBO}, + {"CNTL_BEPO", CNTL_BEPO}, + {"CNTL_LCDPWR", CNTL_LCDPWR}, + {"CNTL_LCDVCOMP(1)", CNTL_LCDVCOMP(1)}, + {"CNTL_LCDVCOMP(2)", CNTL_LCDVCOMP(2)}, + {"CNTL_LCDVCOMP(3)", CNTL_LCDVCOMP(3)}, + {"CNTL_LCDVCOMP(4)", CNTL_LCDVCOMP(4)}, + {"CNTL_LCDVCOMP(5)", CNTL_LCDVCOMP(5)}, + {"CNTL_LCDVCOMP(6)", CNTL_LCDVCOMP(6)}, + {"CNTL_LCDVCOMP(7)", CNTL_LCDVCOMP(7)}, + {"CNTL_LDMAFIFOTIME", CNTL_LDMAFIFOTIME}, + {"CNTL_WATERMARK", CNTL_WATERMARK}, + { NULL, 0}, +}; +static struct string_lookup caps_lookups[] = { + {"CLCD_CAP_RGB444", CLCD_CAP_RGB444}, + {"CLCD_CAP_RGB5551", CLCD_CAP_RGB5551}, + {"CLCD_CAP_RGB565", CLCD_CAP_RGB565}, + {"CLCD_CAP_RGB888", CLCD_CAP_RGB888}, + {"CLCD_CAP_BGR444", CLCD_CAP_BGR444}, + {"CLCD_CAP_BGR5551", CLCD_CAP_BGR5551}, + {"CLCD_CAP_BGR565", CLCD_CAP_BGR565}, + {"CLCD_CAP_BGR888", CLCD_CAP_BGR888}, + {"CLCD_CAP_444", CLCD_CAP_444}, + {"CLCD_CAP_5551", CLCD_CAP_5551}, + {"CLCD_CAP_565", CLCD_CAP_565}, + {"CLCD_CAP_888", CLCD_CAP_888}, + {"CLCD_CAP_RGB", CLCD_CAP_RGB}, + {"CLCD_CAP_BGR", CLCD_CAP_BGR}, + {"CLCD_CAP_ALL", CLCD_CAP_ALL}, + { NULL, 0}, +}; + +u32 parse_setting(struct string_lookup *lookup, const char *name) +{ + int i = 0; + while (lookup[i].string != NULL) { + if (strcmp(lookup[i].string, name) == 0) + return lookup[i].val; + ++i; + } + return -EINVAL; +} + +u32 get_string_lookup(struct device_node *node, const char *name, + struct string_lookup *lookup) +{ + const char *string; + int count, i, ret = 0; + + count = of_property_count_strings(node, name); + if (count >= 0) + for (i = 0; i < count; i++) + if (of_property_read_string_index(node, name, i, + &string) == 0) + ret |= parse_setting(lookup, string); + return ret; +} + +int get_val(struct device_node *node, const char *string) +{ + u32 ret = 0; + + if (of_property_read_u32(node, string, &ret)) + ret = -1; + return ret; +} + +struct clcd_panel *getPanel(struct device_node *node) +{ + static struct clcd_panel panel; + + panel.mode.refresh = get_val(node, "refresh"); + panel.mode.xres = get_val(node, "xres"); + panel.mode.yres = get_val(node, "yres"); + panel.mode.pixclock = get_val(node, "pixclock"); + panel.mode.left_margin = get_val(node, "left_margin"); + panel.mode.right_margin = get_val(node, "right_margin"); + panel.mode.upper_margin = get_val(node, "upper_margin"); + panel.mode.lower_margin = get_val(node, "lower_margin"); + panel.mode.hsync_len = get_val(node, "hsync_len"); + panel.mode.vsync_len = get_val(node, "vsync_len"); + panel.mode.sync = get_val(node, "sync"); + panel.bpp = get_val(node, "bpp"); + panel.width = (signed short) get_val(node, "width"); + panel.height = (signed short) get_val(node, "height"); + + panel.mode.vmode = get_string_lookup(node, "vmode", vmode_lookups); + panel.tim2 = get_string_lookup(node, "tim2", tim2_lookups); + panel.cntl = get_string_lookup(node, "cntl", cntl_lookups); + panel.caps = get_string_lookup(node, "caps", caps_lookups); + + return &panel; +} + +struct clcd_panel *clcdfb_get_panel(const char *name) +{ + struct device_node *node = NULL; + const char *mode; + struct clcd_panel *panel = NULL; + + do { + node = of_find_compatible_node(node, NULL, "panel"); + if (node) + if (of_property_read_string(node, "mode", &mode) == 0) + if (strcmp(mode, name) == 0) { + panel = getPanel(node); + panel->mode.name = name; + } + } while (node != NULL); + + return panel; +} + +#ifdef CONFIG_OF +static int clcdfb_dt_init(struct clcd_fb *fb) +{ + int err = 0; + struct device_node *node; + const char *mode; + dma_addr_t dma; + u32 use_dma; + const __be32 *prop; + int len, na, ns; + phys_addr_t fb_base, fb_size; + + node = fb->dev->dev.of_node; + if (!node) + return -ENODEV; + + na = of_n_addr_cells(node); + ns = of_n_size_cells(node); + + if (def_mode && strlen(def_mode) > 0) { + fb->panel = clcdfb_get_panel(def_mode); + if (!fb->panel) + printk(KERN_ERR "CLCD: invalid mode specified on the command line (%s)\n", def_mode); + } + + if (!fb->panel) { + if (WARN_ON(of_property_read_string(node, "mode", &mode))) + return -ENODEV; + fb->panel = clcdfb_get_panel(mode); + } + + if (!fb->panel) + return -EINVAL; + fb->fb.fix.smem_len = fb->panel->mode.xres * fb->panel->mode.yres * 4; + + fb->board->name = "Device Tree CLCD PL111"; + fb->board->caps = CLCD_CAP_5551 | CLCD_CAP_565 | CLCD_CAP_888; + fb->board->check = clcdfb_check; + fb->board->decode = clcdfb_decode; + + if (of_property_read_u32(node, "use_dma", &use_dma)) + use_dma = 0; + + if (use_dma) { + fb->fb.screen_base = clcdfb_dma_alloc(&fb->dev->dev, + fb->fb.fix.smem_len, + &dma, GFP_KERNEL); + if (!fb->fb.screen_base) { + pr_err("CLCD: unable to map framebuffer\n"); + return -ENOMEM; + } + + fb->fb.fix.smem_start = dma; + fb->board->mmap = clcdfb_mmap_dma; + fb->board->remove = clcdfb_remove_dma; + } else { + prop = of_get_property(node, "framebuffer", &len); + if (WARN_ON(!prop || len < (na + ns) * sizeof(*prop))) + return -EINVAL; + + fb_base = of_read_number(prop, na); + fb_size = of_read_number(prop + na, ns); + + fb->fb.fix.smem_start = fb_base; + fb->fb.screen_base = ioremap_wc(fb_base, fb_size); + fb->board->mmap = clcdfb_mmap_io; + fb->board->remove = clcdfb_remove_io; + } + + return err; +} +#endif /* CONFIG_OF */ + static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id) { struct clcd_board *board = dev_get_platdata(&dev->dev); struct clcd_fb *fb; int ret; - if (!board) - return -EINVAL; + if (!board) { +#ifdef CONFIG_OF + if (dev->dev.of_node) { + board = kzalloc(sizeof(struct clcd_board), GFP_KERNEL); + if (!board) + return -ENOMEM; + board->setup = clcdfb_dt_init; + } else +#endif + return -EINVAL; + } ret = dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32)); if (ret) diff --git a/drivers/video/arm-hdlcd.c b/drivers/video/arm-hdlcd.c new file mode 100644 index 000000000000..cfd631e3dc52 --- /dev/null +++ b/drivers/video/arm-hdlcd.c @@ -0,0 +1,844 @@ +/* + * drivers/video/arm-hdlcd.c + * + * Copyright (C) 2011 ARM Limited + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + * ARM HDLCD Controller + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/mm.h> +#include <linux/delay.h> +#include <linux/of.h> +#include <linux/fb.h> +#include <linux/clk.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/dma-mapping.h> +#include <linux/platform_device.h> +#include <linux/memblock.h> +#include <linux/arm-hdlcd.h> +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#endif + +#include "edid.h" + +#ifdef CONFIG_SERIAL_AMBA_PCU_UART +int get_edid(u8 *msgbuf); +#else +#endif + +#define to_hdlcd_device(info) container_of(info, struct hdlcd_device, fb) + +static struct of_device_id hdlcd_of_matches[] = { + { .compatible = "arm,hdlcd" }, + {}, +}; + +/* Framebuffer size. */ +static unsigned long framebuffer_size; + +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS +static unsigned long buffer_underrun_events; +static DEFINE_SPINLOCK(hdlcd_underrun_lock); + +static void hdlcd_underrun_set(unsigned long val) +{ + spin_lock(&hdlcd_underrun_lock); + buffer_underrun_events = val; + spin_unlock(&hdlcd_underrun_lock); +} + +static unsigned long hdlcd_underrun_get(void) +{ + unsigned long val; + spin_lock(&hdlcd_underrun_lock); + val = buffer_underrun_events; + spin_unlock(&hdlcd_underrun_lock); + return val; +} + +#ifdef CONFIG_PROC_FS +static int hdlcd_underrun_show(struct seq_file *m, void *v) +{ + unsigned char underrun_string[32]; + snprintf(underrun_string, 32, "%lu\n", hdlcd_underrun_get()); + seq_puts(m, underrun_string); + return 0; +} + +static int proc_hdlcd_underrun_open(struct inode *inode, struct file *file) +{ + return single_open(file, hdlcd_underrun_show, NULL); +} + +static const struct file_operations proc_hdlcd_underrun_operations = { + .open = proc_hdlcd_underrun_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int hdlcd_underrun_init(void) +{ + hdlcd_underrun_set(0); + proc_create("hdlcd_underrun", 0, NULL, &proc_hdlcd_underrun_operations); + return 0; +} +static void hdlcd_underrun_close(void) +{ + remove_proc_entry("hdlcd_underrun", NULL); +} +#else +static int hdlcd_underrun_init(void) { return 0; } +static void hdlcd_underrun_close(void) { } +#endif +#endif + +static char *fb_mode = "1680x1050-32@60\0\0\0\0\0"; + +static struct fb_var_screeninfo cached_var_screeninfo; + +static struct fb_videomode hdlcd_default_mode = { + .refresh = 60, + .xres = 1680, + .yres = 1050, + .pixclock = 8403, + .left_margin = 80, + .right_margin = 48, + .upper_margin = 21, + .lower_margin = 3, + .hsync_len = 32, + .vsync_len = 6, + .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, + .vmode = FB_VMODE_NONINTERLACED +}; + +static inline void hdlcd_enable(struct hdlcd_device *hdlcd) +{ + dev_dbg(hdlcd->dev, "HDLCD: output enabled\n"); + writel(1, hdlcd->base + HDLCD_REG_COMMAND); +} + +static inline void hdlcd_disable(struct hdlcd_device *hdlcd) +{ + dev_dbg(hdlcd->dev, "HDLCD: output disabled\n"); + writel(0, hdlcd->base + HDLCD_REG_COMMAND); +} + +static int hdlcd_set_bitfields(struct hdlcd_device *hdlcd, + struct fb_var_screeninfo *var) +{ + int ret = 0; + + memset(&var->transp, 0, sizeof(var->transp)); + var->red.msb_right = 0; + var->green.msb_right = 0; + var->blue.msb_right = 0; + var->blue.offset = 0; + + switch (var->bits_per_pixel) { + case 8: + /* pseudocolor */ + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + break; + case 16: + /* 565 format */ + var->red.length = 5; + var->green.length = 6; + var->blue.length = 5; + break; + case 32: + var->transp.length = 8; + case 24: + var->red.length = 8; + var->green.length = 8; + var->blue.length = 8; + break; + default: + ret = -EINVAL; + break; + } + + if (!ret) { + if(var->bits_per_pixel != 32) + { + var->green.offset = var->blue.length; + var->red.offset = var->green.offset + var->green.length; + } + else + { + /* Previously, the byte ordering for 32-bit color was + * (msb)<alpha><red><green><blue>(lsb) + * but this does not match what android expects and + * the colors are odd. Instead, use + * <alpha><blue><green><red> + * Since we tell fb what we are doing, console + * , X and directfb access should work fine. + */ + var->green.offset = var->red.length; + var->blue.offset = var->green.offset + var->green.length; + var->transp.offset = var->blue.offset + var->blue.length; + } + } + + return ret; +} + +static int hdlcd_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + int bytes_per_pixel = var->bits_per_pixel / 8; + +#ifdef HDLCD_NO_VIRTUAL_SCREEN + var->yres_virtual = var->yres; +#else + var->yres_virtual = 2 * var->yres; +#endif + + if ((var->xres_virtual * bytes_per_pixel * var->yres_virtual) > hdlcd->fb.fix.smem_len) + return -ENOMEM; + + if (var->xres > HDLCD_MAX_XRES || var->yres > HDLCD_MAX_YRES) + return -EINVAL; + + /* make sure the bitfields are set appropriately */ + return hdlcd_set_bitfields(hdlcd, var); +} + +/* prototype */ +static int hdlcd_pan_display(struct fb_var_screeninfo *var, + struct fb_info *info); + +#define WRITE_HDLCD_REG(reg, value) writel((value), hdlcd->base + (reg)) +#define READ_HDLCD_REG(reg) readl(hdlcd->base + (reg)) + +static int hdlcd_set_par(struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + int bytes_per_pixel = hdlcd->fb.var.bits_per_pixel / 8; + int polarities; + int old_yoffset; + + /* check for shortcuts */ + old_yoffset = cached_var_screeninfo.yoffset; + cached_var_screeninfo.yoffset = info->var.yoffset; + if (!memcmp(&info->var, &cached_var_screeninfo, + sizeof(struct fb_var_screeninfo))) { + if(old_yoffset != info->var.yoffset) { + /* we only changed yoffset, and we already + * already recorded it a couple lines up + */ + hdlcd_pan_display(&info->var, info); + } + /* or no change */ + return 0; + } + + hdlcd->fb.fix.line_length = hdlcd->fb.var.xres * bytes_per_pixel; + + if (hdlcd->fb.var.bits_per_pixel >= 16) + hdlcd->fb.fix.visual = FB_VISUAL_TRUECOLOR; + else + hdlcd->fb.fix.visual = FB_VISUAL_PSEUDOCOLOR; + + memcpy(&cached_var_screeninfo, &info->var, sizeof(struct fb_var_screeninfo)); + + polarities = HDLCD_POLARITY_DATAEN | +#ifndef CONFIG_ARCH_TUSCAN + HDLCD_POLARITY_PIXELCLK | +#endif + HDLCD_POLARITY_DATA; + polarities |= (hdlcd->fb.var.sync & FB_SYNC_HOR_HIGH_ACT) ? HDLCD_POLARITY_HSYNC : 0; + polarities |= (hdlcd->fb.var.sync & FB_SYNC_VERT_HIGH_ACT) ? HDLCD_POLARITY_VSYNC : 0; + + hdlcd_disable(hdlcd); + + WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_LENGTH, hdlcd->fb.var.xres * bytes_per_pixel); + WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_PITCH, hdlcd->fb.var.xres * bytes_per_pixel); + WRITE_HDLCD_REG(HDLCD_REG_FB_LINE_COUNT, hdlcd->fb.var.yres - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_SYNC, hdlcd->fb.var.vsync_len - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_BACK_PORCH, hdlcd->fb.var.upper_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_DATA, hdlcd->fb.var.yres - 1); + WRITE_HDLCD_REG(HDLCD_REG_V_FRONT_PORCH, hdlcd->fb.var.lower_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_SYNC, hdlcd->fb.var.hsync_len - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_BACK_PORCH, hdlcd->fb.var.left_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_DATA, hdlcd->fb.var.xres - 1); + WRITE_HDLCD_REG(HDLCD_REG_H_FRONT_PORCH, hdlcd->fb.var.right_margin - 1); + WRITE_HDLCD_REG(HDLCD_REG_POLARITIES, polarities); + WRITE_HDLCD_REG(HDLCD_REG_PIXEL_FORMAT, (bytes_per_pixel - 1) << 3); +#ifdef HDLCD_RED_DEFAULT_COLOUR + WRITE_HDLCD_REG(HDLCD_REG_RED_SELECT, (0x00ff0000 | (hdlcd->fb.var.red.length & 0xf) << 8) \ + | hdlcd->fb.var.red.offset); +#else + WRITE_HDLCD_REG(HDLCD_REG_RED_SELECT, ((hdlcd->fb.var.red.length & 0xf) << 8) | hdlcd->fb.var.red.offset); +#endif + WRITE_HDLCD_REG(HDLCD_REG_GREEN_SELECT, ((hdlcd->fb.var.green.length & 0xf) << 8) | hdlcd->fb.var.green.offset); + WRITE_HDLCD_REG(HDLCD_REG_BLUE_SELECT, ((hdlcd->fb.var.blue.length & 0xf) << 8) | hdlcd->fb.var.blue.offset); + + clk_set_rate(hdlcd->clk, (1000000000 / hdlcd->fb.var.pixclock) * 1000); + clk_enable(hdlcd->clk); + + hdlcd_enable(hdlcd); + + return 0; +} + +static int hdlcd_setcolreg(unsigned int regno, unsigned int red, unsigned int green, + unsigned int blue, unsigned int transp, struct fb_info *info) +{ + if (regno < 16) { + u32 *pal = info->pseudo_palette; + + pal[regno] = ((red >> 8) << info->var.red.offset) | + ((green >> 8) << info->var.green.offset) | + ((blue >> 8) << info->var.blue.offset); + } + + return 0; +} + +static irqreturn_t hdlcd_irq(int irq, void *data) +{ + struct hdlcd_device *hdlcd = data; + unsigned long irq_mask, irq_status; + + irq_mask = READ_HDLCD_REG(HDLCD_REG_INT_MASK); + irq_status = READ_HDLCD_REG(HDLCD_REG_INT_STATUS); + + /* acknowledge interrupt(s) */ + WRITE_HDLCD_REG(HDLCD_REG_INT_CLEAR, irq_status); +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + if (irq_status & HDLCD_INTERRUPT_UNDERRUN) { + /* increment the count */ + hdlcd_underrun_set(hdlcd_underrun_get() + 1); + } +#endif + if (irq_status & HDLCD_INTERRUPT_VSYNC) { + /* disable future VSYNC interrupts */ + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, irq_mask & ~HDLCD_INTERRUPT_VSYNC); + + complete(&hdlcd->vsync_completion); + } + + return IRQ_HANDLED; +} + +static int hdlcd_wait_for_vsync(struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + unsigned long irq_mask; + int err; + + /* enable VSYNC interrupt */ + irq_mask = READ_HDLCD_REG(HDLCD_REG_INT_MASK); + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, irq_mask | HDLCD_INTERRUPT_VSYNC); + + err = wait_for_completion_interruptible_timeout(&hdlcd->vsync_completion, + msecs_to_jiffies(100)); + + if (!err) + return -ETIMEDOUT; + + return 0; +} + +static int hdlcd_blank(int blank_mode, struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + + switch (blank_mode) { + case FB_BLANK_POWERDOWN: + clk_disable(hdlcd->clk); + case FB_BLANK_NORMAL: + hdlcd_disable(hdlcd); + break; + case FB_BLANK_UNBLANK: + clk_enable(hdlcd->clk); + hdlcd_enable(hdlcd); + break; + case FB_BLANK_VSYNC_SUSPEND: + case FB_BLANK_HSYNC_SUSPEND: + default: + return 1; + } + + return 0; +} + +static void hdlcd_mmap_open(struct vm_area_struct *vma) +{ +} + +static void hdlcd_mmap_close(struct vm_area_struct *vma) +{ +} + +static struct vm_operations_struct hdlcd_mmap_ops = { + .open = hdlcd_mmap_open, + .close = hdlcd_mmap_close, +}; + +static int hdlcd_mmap(struct fb_info *info, struct vm_area_struct *vma) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + unsigned long off; + unsigned long start; + unsigned long len = hdlcd->fb.fix.smem_len; + + if (vma->vm_end - vma->vm_start == 0) + return 0; + if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) + return -EINVAL; + + off = vma->vm_pgoff << PAGE_SHIFT; + if ((off >= len) || (vma->vm_end - vma->vm_start + off) > len) + return -EINVAL; + + start = hdlcd->fb.fix.smem_start; + off += start; + + vma->vm_pgoff = off >> PAGE_SHIFT; + vma->vm_flags |= VM_IO; + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + vma->vm_ops = &hdlcd_mmap_ops; + if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static int hdlcd_pan_display(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct hdlcd_device *hdlcd = to_hdlcd_device(info); + + hdlcd->fb.var.yoffset = var->yoffset; + WRITE_HDLCD_REG(HDLCD_REG_FB_BASE, hdlcd->fb.fix.smem_start + + (var->yoffset * hdlcd->fb.fix.line_length)); + + hdlcd_wait_for_vsync(info); + + return 0; +} + +static int hdlcd_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg) +{ + int err; + + switch (cmd) { + case FBIO_WAITFORVSYNC: + err = hdlcd_wait_for_vsync(info); + break; + default: + err = -ENOIOCTLCMD; + break; + } + + return err; +} + +static struct fb_ops hdlcd_ops = { + .owner = THIS_MODULE, + .fb_check_var = hdlcd_check_var, + .fb_set_par = hdlcd_set_par, + .fb_setcolreg = hdlcd_setcolreg, + .fb_blank = hdlcd_blank, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, + .fb_mmap = hdlcd_mmap, + .fb_pan_display = hdlcd_pan_display, + .fb_ioctl = hdlcd_ioctl, + .fb_compat_ioctl = hdlcd_ioctl +}; + +static int hdlcd_setup(struct hdlcd_device *hdlcd) +{ + u32 version; + int err = -EFAULT; + + hdlcd->fb.device = hdlcd->dev; + + hdlcd->clk = clk_get(hdlcd->dev, NULL); + if (IS_ERR(hdlcd->clk)) { + dev_err(hdlcd->dev, "HDLCD: unable to find clock data\n"); + return PTR_ERR(hdlcd->clk); + } + + err = clk_prepare(hdlcd->clk); + if (err) + goto clk_prepare_err; + + hdlcd->base = ioremap_nocache(hdlcd->fb.fix.mmio_start, hdlcd->fb.fix.mmio_len); + if (!hdlcd->base) { + dev_err(hdlcd->dev, "HDLCD: unable to map registers\n"); + goto remap_err; + } + + hdlcd->fb.pseudo_palette = kmalloc(sizeof(u32) * 16, GFP_KERNEL); + if (!hdlcd->fb.pseudo_palette) { + dev_err(hdlcd->dev, "HDLCD: unable to allocate pseudo_palette memory\n"); + err = -ENOMEM; + goto kmalloc_err; + } + + version = readl(hdlcd->base + HDLCD_REG_VERSION); + if ((version & HDLCD_PRODUCT_MASK) != HDLCD_PRODUCT_ID) { + dev_err(hdlcd->dev, "HDLCD: unknown product id: 0x%x\n", version); + err = -EINVAL; + goto kmalloc_err; + } + dev_info(hdlcd->dev, "HDLCD: found ARM HDLCD version r%dp%d\n", + (version & HDLCD_VERSION_MAJOR_MASK) >> 8, + version & HDLCD_VERSION_MINOR_MASK); + + strcpy(hdlcd->fb.fix.id, "hdlcd"); + hdlcd->fb.fbops = &hdlcd_ops; + hdlcd->fb.flags = FBINFO_FLAG_DEFAULT/* | FBINFO_VIRTFB*/; + + hdlcd->fb.fix.type = FB_TYPE_PACKED_PIXELS; + hdlcd->fb.fix.type_aux = 0; + hdlcd->fb.fix.xpanstep = 0; + hdlcd->fb.fix.ypanstep = 1; + hdlcd->fb.fix.ywrapstep = 0; + hdlcd->fb.fix.accel = FB_ACCEL_NONE; + + hdlcd->fb.var.nonstd = 0; + hdlcd->fb.var.activate = FB_ACTIVATE_NOW; + hdlcd->fb.var.height = -1; + hdlcd->fb.var.width = -1; + hdlcd->fb.var.accel_flags = 0; + + init_completion(&hdlcd->vsync_completion); + + if (hdlcd->edid) { + /* build modedb from EDID */ + fb_edid_to_monspecs(hdlcd->edid, &hdlcd->fb.monspecs); + fb_videomode_to_modelist(hdlcd->fb.monspecs.modedb, + hdlcd->fb.monspecs.modedb_len, + &hdlcd->fb.modelist); + fb_find_mode(&hdlcd->fb.var, &hdlcd->fb, fb_mode, + hdlcd->fb.monspecs.modedb, + hdlcd->fb.monspecs.modedb_len, + &hdlcd_default_mode, 32); + } else { + hdlcd->fb.monspecs.hfmin = 0; + hdlcd->fb.monspecs.hfmax = 100000; + hdlcd->fb.monspecs.vfmin = 0; + hdlcd->fb.monspecs.vfmax = 400; + hdlcd->fb.monspecs.dclkmin = 1000000; + hdlcd->fb.monspecs.dclkmax = 100000000; + fb_find_mode(&hdlcd->fb.var, &hdlcd->fb, fb_mode, NULL, 0, &hdlcd_default_mode, 32); + } + + dev_info(hdlcd->dev, "using %dx%d-%d@%d mode\n", hdlcd->fb.var.xres, + hdlcd->fb.var.yres, hdlcd->fb.var.bits_per_pixel, + hdlcd->fb.mode ? hdlcd->fb.mode->refresh : 60); + hdlcd->fb.var.xres_virtual = hdlcd->fb.var.xres; +#ifdef HDLCD_NO_VIRTUAL_SCREEN + hdlcd->fb.var.yres_virtual = hdlcd->fb.var.yres; +#else + hdlcd->fb.var.yres_virtual = hdlcd->fb.var.yres * 2; +#endif + + /* initialise and set the palette */ + if (fb_alloc_cmap(&hdlcd->fb.cmap, NR_PALETTE, 0)) { + dev_err(hdlcd->dev, "failed to allocate cmap memory\n"); + err = -ENOMEM; + goto setup_err; + } + fb_set_cmap(&hdlcd->fb.cmap, &hdlcd->fb); + + /* Allow max number of outstanding requests with the largest beat burst */ + WRITE_HDLCD_REG(HDLCD_REG_BUS_OPTIONS, HDLCD_BUS_MAX_OUTSTAND | HDLCD_BUS_BURST_16); + /* Set the framebuffer base to start of allocated memory */ + WRITE_HDLCD_REG(HDLCD_REG_FB_BASE, hdlcd->fb.fix.smem_start); +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + /* turn on underrun interrupt for counting */ + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, HDLCD_INTERRUPT_UNDERRUN); +#else + /* Ensure interrupts are disabled */ + WRITE_HDLCD_REG(HDLCD_REG_INT_MASK, 0); +#endif + fb_set_var(&hdlcd->fb, &hdlcd->fb.var); + + if (!register_framebuffer(&hdlcd->fb)) { + return 0; + } + + dev_err(hdlcd->dev, "HDLCD: cannot register framebuffer\n"); + + fb_dealloc_cmap(&hdlcd->fb.cmap); +setup_err: + iounmap(hdlcd->base); +kmalloc_err: + kfree(hdlcd->fb.pseudo_palette); +remap_err: + clk_unprepare(hdlcd->clk); +clk_prepare_err: + clk_put(hdlcd->clk); + return err; +} + +static inline unsigned char atohex(u8 data) +{ + if (!isxdigit(data)) + return 0; + /* truncate the upper nibble and add 9 to non-digit values */ + return (data > 0x39) ? ((data & 0xf) + 9) : (data & 0xf); +} + +/* EDID data is passed from devicetree in a literal string that can contain spaces and + the hexadecimal dump of the data */ +static int parse_edid_data(struct hdlcd_device *hdlcd, const u8 *edid_data, int data_len) +{ + int i, j; + + if (!edid_data) + return -EINVAL; + + hdlcd->edid = kzalloc(EDID_LENGTH, GFP_KERNEL); + if (!hdlcd->edid) + return -ENOMEM; + + for (i = 0, j = 0; i < data_len; i++) { + if (isspace(edid_data[i])) + continue; + hdlcd->edid[j++] = atohex(edid_data[i]); + if (j >= EDID_LENGTH) + break; + } + + if (j < EDID_LENGTH) { + kfree(hdlcd->edid); + hdlcd->edid = NULL; + return -EINVAL; + } + + return 0; +} + +static int hdlcd_probe(struct platform_device *pdev) +{ + int err = 0, i; + struct hdlcd_device *hdlcd; + struct resource *mem; +#ifdef CONFIG_OF + struct device_node *of_node; +#endif + + memset(&cached_var_screeninfo, 0, sizeof(struct fb_var_screeninfo)); + + dev_dbg(&pdev->dev, "HDLCD: probing\n"); + + hdlcd = kzalloc(sizeof(*hdlcd), GFP_KERNEL); + if (!hdlcd) + return -ENOMEM; + +#ifdef CONFIG_OF + of_node = pdev->dev.of_node; + if (of_node) { + int len; + const u8 *edid; + const __be32 *prop = of_get_property(of_node, "mode", &len); + if (prop) + strncpy(fb_mode, (char *)prop, len); + prop = of_get_property(of_node, "framebuffer", &len); + if (prop) { + hdlcd->fb.fix.smem_start = of_read_ulong(prop, + of_n_addr_cells(of_node)); + prop += of_n_addr_cells(of_node); + framebuffer_size = of_read_ulong(prop, + of_n_size_cells(of_node)); + if (framebuffer_size > HDLCD_MAX_FRAMEBUFFER_SIZE) + framebuffer_size = HDLCD_MAX_FRAMEBUFFER_SIZE; + dev_dbg(&pdev->dev, "HDLCD: phys_addr = 0x%lx, size = 0x%lx\n", + hdlcd->fb.fix.smem_start, framebuffer_size); + } + edid = of_get_property(of_node, "edid", &len); + if (edid) { + err = parse_edid_data(hdlcd, edid, len); +#ifdef CONFIG_SERIAL_AMBA_PCU_UART + } else { + /* ask the firmware to fetch the EDID */ + dev_dbg(&pdev->dev, "HDLCD: Requesting EDID data\n"); + hdlcd->edid = kzalloc(EDID_LENGTH, GFP_KERNEL); + if (!hdlcd->edid) + return -ENOMEM; + err = get_edid(hdlcd->edid); +#endif /* CONFIG_SERIAL_AMBA_PCU_UART */ + } + if (err) + dev_info(&pdev->dev, "HDLCD: Failed to parse EDID data\n"); + } +#endif /* CONFIG_OF */ + + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + dev_err(&pdev->dev, "HDLCD: cannot get platform resources\n"); + err = -EINVAL; + goto resource_err; + } + + i = platform_get_irq(pdev, 0); + if (i < 0) { + dev_err(&pdev->dev, "HDLCD: no irq defined for vsync\n"); + err = -ENOENT; + goto resource_err; + } else { + err = request_irq(i, hdlcd_irq, 0, dev_name(&pdev->dev), hdlcd); + if (err) { + dev_err(&pdev->dev, "HDLCD: unable to request irq\n"); + goto resource_err; + } + hdlcd->irq = i; + } + + if (!request_mem_region(mem->start, resource_size(mem), dev_name(&pdev->dev))) { + err = -ENXIO; + goto request_err; + } + + if (!hdlcd->fb.fix.smem_start) { + dev_err(&pdev->dev, "platform did not allocate frame buffer memory\n"); + err = -ENOMEM; + goto memalloc_err; + } + hdlcd->fb.screen_base = ioremap_wc(hdlcd->fb.fix.smem_start, framebuffer_size); + if (!hdlcd->fb.screen_base) { + dev_err(&pdev->dev, "unable to ioremap framebuffer\n"); + err = -ENOMEM; + goto probe_err; + } + + hdlcd->fb.screen_size = framebuffer_size; + hdlcd->fb.fix.smem_len = framebuffer_size; + hdlcd->fb.fix.mmio_start = mem->start; + hdlcd->fb.fix.mmio_len = resource_size(mem); + + /* Clear the framebuffer */ + memset(hdlcd->fb.screen_base, 0, framebuffer_size); + + hdlcd->dev = &pdev->dev; + + dev_dbg(&pdev->dev, "HDLCD: framebuffer virt base %p, phys base 0x%lX\n", + hdlcd->fb.screen_base, (unsigned long)hdlcd->fb.fix.smem_start); + + err = hdlcd_setup(hdlcd); + + if (err) + goto probe_err; + + platform_set_drvdata(pdev, hdlcd); + return 0; + +probe_err: + iounmap(hdlcd->fb.screen_base); + memblock_free(hdlcd->fb.fix.smem_start, hdlcd->fb.fix.smem_start); + +memalloc_err: + release_mem_region(mem->start, resource_size(mem)); + +request_err: + free_irq(hdlcd->irq, hdlcd); + +resource_err: + kfree(hdlcd); + + return err; +} + +static int hdlcd_remove(struct platform_device *pdev) +{ + struct hdlcd_device *hdlcd = platform_get_drvdata(pdev); + + clk_disable(hdlcd->clk); + clk_unprepare(hdlcd->clk); + clk_put(hdlcd->clk); + + /* unmap memory */ + iounmap(hdlcd->fb.screen_base); + iounmap(hdlcd->base); + + /* deallocate fb memory */ + fb_dealloc_cmap(&hdlcd->fb.cmap); + kfree(hdlcd->fb.pseudo_palette); + memblock_free(hdlcd->fb.fix.smem_start, hdlcd->fb.fix.smem_start); + release_mem_region(hdlcd->fb.fix.mmio_start, hdlcd->fb.fix.mmio_len); + + free_irq(hdlcd->irq, NULL); + kfree(hdlcd); + + return 0; +} + +#ifdef CONFIG_PM +static int hdlcd_suspend(struct platform_device *pdev, pm_message_t state) +{ + /* not implemented yet */ + return 0; +} + +static int hdlcd_resume(struct platform_device *pdev) +{ + /* not implemented yet */ + return 0; +} +#else +#define hdlcd_suspend NULL +#define hdlcd_resume NULL +#endif + +static struct platform_driver hdlcd_driver = { + .probe = hdlcd_probe, + .remove = hdlcd_remove, + .suspend = hdlcd_suspend, + .resume = hdlcd_resume, + .driver = { + .name = "hdlcd", + .owner = THIS_MODULE, + .of_match_table = hdlcd_of_matches, + }, +}; + +static int __init hdlcd_init(void) +{ +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + int err = platform_driver_register(&hdlcd_driver); + if (!err) + hdlcd_underrun_init(); + return err; +#else + return platform_driver_register(&hdlcd_driver); +#endif +} + +void __exit hdlcd_exit(void) +{ +#ifdef HDLCD_COUNT_BUFFERUNDERRUNS + hdlcd_underrun_close(); +#endif + platform_driver_unregister(&hdlcd_driver); +} + +module_init(hdlcd_init); +module_exit(hdlcd_exit); + +MODULE_AUTHOR("Liviu Dudau"); +MODULE_DESCRIPTION("ARM HDLCD core driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/video/vexpress-dvi.c b/drivers/video/vexpress-dvi.c new file mode 100644 index 000000000000..f08753450ee4 --- /dev/null +++ b/drivers/video/vexpress-dvi.c @@ -0,0 +1,220 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + */ + +#define pr_fmt(fmt) "vexpress-dvi: " fmt + +#include <linux/fb.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/vexpress.h> + + +static struct vexpress_config_func *vexpress_dvimode_func; + +static struct { + u32 xres, yres, mode; +} vexpress_dvi_dvimodes[] = { + { 640, 480, 0 }, /* VGA */ + { 800, 600, 1 }, /* SVGA */ + { 1024, 768, 2 }, /* XGA */ + { 1280, 1024, 3 }, /* SXGA */ + { 1600, 1200, 4 }, /* UXGA */ + { 1920, 1080, 5 }, /* HD1080 */ +}; + +static void vexpress_dvi_mode_set(struct fb_info *info, u32 xres, u32 yres) +{ + int err = -ENOENT; + int i; + + if (!vexpress_dvimode_func) + return; + + for (i = 0; i < ARRAY_SIZE(vexpress_dvi_dvimodes); i++) { + if (vexpress_dvi_dvimodes[i].xres == xres && + vexpress_dvi_dvimodes[i].yres == yres) { + pr_debug("mode: %ux%u = %d\n", xres, yres, + vexpress_dvi_dvimodes[i].mode); + err = vexpress_config_write(vexpress_dvimode_func, 0, + vexpress_dvi_dvimodes[i].mode); + break; + } + } + + if (err) + pr_warn("Failed to set %ux%u mode! (%d)\n", xres, yres, err); +} + + +static struct vexpress_config_func *vexpress_muxfpga_func; +static int vexpress_dvi_fb = -1; + +static int vexpress_dvi_mux_set(struct fb_info *info) +{ + int err; + u32 site = vexpress_get_site_by_dev(info->device); + + if (!vexpress_muxfpga_func) + return -ENXIO; + + err = vexpress_config_write(vexpress_muxfpga_func, 0, site); + if (!err) { + pr_debug("Selected MUXFPGA input %d (fb%d)\n", site, + info->node); + vexpress_dvi_fb = info->node; + vexpress_dvi_mode_set(info, info->var.xres, + info->var.yres); + } else { + pr_warn("Failed to select MUXFPGA input %d (fb%d)! (%d)\n", + site, info->node, err); + } + + return err; +} + +static int vexpress_dvi_fb_select(int fb) +{ + int err; + struct fb_info *info; + + /* fb0 is the default */ + if (fb < 0) + fb = 0; + + info = registered_fb[fb]; + if (!info || !lock_fb_info(info)) + return -ENODEV; + + err = vexpress_dvi_mux_set(info); + + unlock_fb_info(info); + + return err; +} + +static ssize_t vexpress_dvi_fb_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", vexpress_dvi_fb); +} + +static ssize_t vexpress_dvi_fb_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + long value; + int err = kstrtol(buf, 0, &value); + + if (!err) + err = vexpress_dvi_fb_select(value); + + return err ? err : count; +} + +DEVICE_ATTR(fb, S_IRUGO | S_IWUSR, vexpress_dvi_fb_show, + vexpress_dvi_fb_store); + + +static int vexpress_dvi_fb_event_notify(struct notifier_block *self, + unsigned long action, void *data) +{ + struct fb_event *event = data; + struct fb_info *info = event->info; + struct fb_videomode *mode = event->data; + + switch (action) { + case FB_EVENT_FB_REGISTERED: + if (vexpress_dvi_fb < 0) + vexpress_dvi_mux_set(info); + break; + case FB_EVENT_MODE_CHANGE: + case FB_EVENT_MODE_CHANGE_ALL: + if (info->node == vexpress_dvi_fb) + vexpress_dvi_mode_set(info, mode->xres, mode->yres); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block vexpress_dvi_fb_notifier = { + .notifier_call = vexpress_dvi_fb_event_notify, +}; +static bool vexpress_dvi_fb_notifier_registered; + + +enum vexpress_dvi_func { FUNC_MUXFPGA, FUNC_DVIMODE }; + +static struct of_device_id vexpress_dvi_of_match[] = { + { + .compatible = "arm,vexpress-muxfpga", + .data = (void *)FUNC_MUXFPGA, + }, { + .compatible = "arm,vexpress-dvimode", + .data = (void *)FUNC_DVIMODE, + }, + {} +}; + +static int vexpress_dvi_probe(struct platform_device *pdev) +{ + enum vexpress_dvi_func func; + const struct of_device_id *match = + of_match_device(vexpress_dvi_of_match, &pdev->dev); + + if (match) + func = (enum vexpress_dvi_func)match->data; + else + func = pdev->id_entry->driver_data; + + switch (func) { + case FUNC_MUXFPGA: + vexpress_muxfpga_func = + vexpress_config_func_get_by_dev(&pdev->dev); + device_create_file(&pdev->dev, &dev_attr_fb); + break; + case FUNC_DVIMODE: + vexpress_dvimode_func = + vexpress_config_func_get_by_dev(&pdev->dev); + break; + } + + if (!vexpress_dvi_fb_notifier_registered) { + fb_register_client(&vexpress_dvi_fb_notifier); + vexpress_dvi_fb_notifier_registered = true; + } + + vexpress_dvi_fb_select(vexpress_dvi_fb); + + return 0; +} + +static const struct platform_device_id vexpress_dvi_id_table[] = { + { .name = "vexpress-muxfpga", .driver_data = FUNC_MUXFPGA, }, + { .name = "vexpress-dvimode", .driver_data = FUNC_DVIMODE, }, + {} +}; + +static struct platform_driver vexpress_dvi_driver = { + .probe = vexpress_dvi_probe, + .driver = { + .name = "vexpress-dvi", + .of_match_table = vexpress_dvi_of_match, + }, + .id_table = vexpress_dvi_id_table, +}; + +static int __init vexpress_dvi_init(void) +{ + return platform_driver_register(&vexpress_dvi_driver); +} +device_initcall(vexpress_dvi_init); diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index a66768ebc8d1..3def5122904a 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -1,5 +1,6 @@ config BTRFS_FS tristate "Btrfs filesystem support" + select LIBCRC32C select CRYPTO select CRYPTO_CRC32C select ZLIB_INFLATE diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index e73c19e90e38..016c2f110f63 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -100,9 +100,12 @@ struct blkcipher_walk { void *page; u8 *buffer; u8 *iv; + unsigned int ivsize; int flags; - unsigned int blocksize; + unsigned int walk_blocksize; + unsigned int cipher_blocksize; + unsigned int alignmask; }; struct ablkcipher_walk { @@ -192,6 +195,10 @@ int blkcipher_walk_phys(struct blkcipher_desc *desc, int blkcipher_walk_virt_block(struct blkcipher_desc *desc, struct blkcipher_walk *walk, unsigned int blocksize); +int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_aead *tfm, + unsigned int blocksize); int ablkcipher_walk_done(struct ablkcipher_request *req, struct ablkcipher_walk *walk, int err); diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h index e82e3ee2c54a..4a53250f970e 100644 --- a/include/linux/amba/clcd.h +++ b/include/linux/amba/clcd.h @@ -243,6 +243,9 @@ static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) val |= CNTL_BGR; } + /* Reset the current colour depth */ + val &= ~CNTL_LCDBPP16_444; + switch (var->bits_per_pixel) { case 1: val |= CNTL_LCDBPP1; @@ -264,14 +267,15 @@ static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) */ if (amba_part(fb->dev) == 0x110 || var->green.length == 5) - val |= CNTL_LCDBPP16; + val |= CNTL_LCDBPP16 | CNTL_BGR; else if (var->green.length == 6) - val |= CNTL_LCDBPP16_565; + val |= CNTL_LCDBPP16_565 | CNTL_BGR; else - val |= CNTL_LCDBPP16_444; + val |= CNTL_LCDBPP16_444 | CNTL_BGR; break; case 32: val |= CNTL_LCDBPP24; + val &= ~CNTL_BGR; break; } diff --git a/include/linux/arm-hdlcd.h b/include/linux/arm-hdlcd.h new file mode 100644 index 000000000000..939f3a81d56b --- /dev/null +++ b/include/linux/arm-hdlcd.h @@ -0,0 +1,122 @@ +/* + * include/linux/arm-hdlcd.h + * + * Copyright (C) 2011 ARM Limited + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + * + * ARM HDLCD Controller register definition + */ + +#include <linux/fb.h> +#include <linux/completion.h> + +/* register offsets */ +#define HDLCD_REG_VERSION 0x0000 /* ro */ +#define HDLCD_REG_INT_RAWSTAT 0x0010 /* rw */ +#define HDLCD_REG_INT_CLEAR 0x0014 /* wo */ +#define HDLCD_REG_INT_MASK 0x0018 /* rw */ +#define HDLCD_REG_INT_STATUS 0x001c /* ro */ +#define HDLCD_REG_USER_OUT 0x0020 /* rw */ +#define HDLCD_REG_FB_BASE 0x0100 /* rw */ +#define HDLCD_REG_FB_LINE_LENGTH 0x0104 /* rw */ +#define HDLCD_REG_FB_LINE_COUNT 0x0108 /* rw */ +#define HDLCD_REG_FB_LINE_PITCH 0x010c /* rw */ +#define HDLCD_REG_BUS_OPTIONS 0x0110 /* rw */ +#define HDLCD_REG_V_SYNC 0x0200 /* rw */ +#define HDLCD_REG_V_BACK_PORCH 0x0204 /* rw */ +#define HDLCD_REG_V_DATA 0x0208 /* rw */ +#define HDLCD_REG_V_FRONT_PORCH 0x020c /* rw */ +#define HDLCD_REG_H_SYNC 0x0210 /* rw */ +#define HDLCD_REG_H_BACK_PORCH 0x0214 /* rw */ +#define HDLCD_REG_H_DATA 0x0218 /* rw */ +#define HDLCD_REG_H_FRONT_PORCH 0x021c /* rw */ +#define HDLCD_REG_POLARITIES 0x0220 /* rw */ +#define HDLCD_REG_COMMAND 0x0230 /* rw */ +#define HDLCD_REG_PIXEL_FORMAT 0x0240 /* rw */ +#define HDLCD_REG_BLUE_SELECT 0x0244 /* rw */ +#define HDLCD_REG_GREEN_SELECT 0x0248 /* rw */ +#define HDLCD_REG_RED_SELECT 0x024c /* rw */ + +/* version */ +#define HDLCD_PRODUCT_ID 0x1CDC0000 +#define HDLCD_PRODUCT_MASK 0xFFFF0000 +#define HDLCD_VERSION_MAJOR_MASK 0x0000FF00 +#define HDLCD_VERSION_MINOR_MASK 0x000000FF + +/* interrupts */ +#define HDLCD_INTERRUPT_DMA_END (1 << 0) +#define HDLCD_INTERRUPT_BUS_ERROR (1 << 1) +#define HDLCD_INTERRUPT_VSYNC (1 << 2) +#define HDLCD_INTERRUPT_UNDERRUN (1 << 3) + +/* polarity */ +#define HDLCD_POLARITY_VSYNC (1 << 0) +#define HDLCD_POLARITY_HSYNC (1 << 1) +#define HDLCD_POLARITY_DATAEN (1 << 2) +#define HDLCD_POLARITY_DATA (1 << 3) +#define HDLCD_POLARITY_PIXELCLK (1 << 4) + +/* commands */ +#define HDLCD_COMMAND_DISABLE (0 << 0) +#define HDLCD_COMMAND_ENABLE (1 << 0) + +/* pixel format */ +#define HDLCD_PIXEL_FMT_LITTLE_ENDIAN (0 << 31) +#define HDLCD_PIXEL_FMT_BIG_ENDIAN (1 << 31) +#define HDLCD_BYTES_PER_PIXEL_MASK (3 << 3) + +/* bus options */ +#define HDLCD_BUS_BURST_MASK 0x01f +#define HDLCD_BUS_MAX_OUTSTAND 0xf00 +#define HDLCD_BUS_BURST_NONE (0 << 0) +#define HDLCD_BUS_BURST_1 (1 << 0) +#define HDLCD_BUS_BURST_2 (1 << 1) +#define HDLCD_BUS_BURST_4 (1 << 2) +#define HDLCD_BUS_BURST_8 (1 << 3) +#define HDLCD_BUS_BURST_16 (1 << 4) + +/* Max resolution supported is 4096x4096, 8 bit per color component, + 8 bit alpha, but we are going to choose the usual hardware default + (2048x2048, 32 bpp) and enable double buffering */ +#define HDLCD_MAX_XRES 2048 +#define HDLCD_MAX_YRES 2048 +#define HDLCD_MAX_FRAMEBUFFER_SIZE (HDLCD_MAX_XRES * HDLCD_MAX_YRES << 2) + +#define HDLCD_MEM_BASE (CONFIG_PAGE_OFFSET - 0x1000000) + +#define NR_PALETTE 256 + +/* OEMs using HDLCD may wish to enable these settings if + * display disruption is apparent and you suspect HDLCD + * access to RAM may be starved. + */ +/* Turn HDLCD default color red instead of black so + * that it's easy to see pixel clock data underruns + * (compared to other visual disruption) + */ +//#define HDLCD_RED_DEFAULT_COLOUR +/* Add a counter in the IRQ handler to count buffer underruns + * and /proc/hdlcd_underrun to read the counter + */ +//#define HDLCD_COUNT_BUFFERUNDERRUNS +/* Restrict height to 1x screen size + * + */ +//#define HDLCD_NO_VIRTUAL_SCREEN + +#ifdef CONFIG_ANDROID +#define HDLCD_NO_VIRTUAL_SCREEN +#endif + +struct hdlcd_device { + struct fb_info fb; + struct device *dev; + struct clk *clk; + void __iomem *base; + int irq; + struct completion vsync_completion; + unsigned char *edid; +}; diff --git a/linaro/configs/vexpress-tuning.conf b/linaro/configs/vexpress-tuning.conf new file mode 100644 index 000000000000..adea6cc66ded --- /dev/null +++ b/linaro/configs/vexpress-tuning.conf @@ -0,0 +1 @@ +# CONFIG_PROVE_LOCKING is not set diff --git a/linaro/configs/vexpress.conf b/linaro/configs/vexpress.conf new file mode 100644 index 000000000000..0f02f3abd96f --- /dev/null +++ b/linaro/configs/vexpress.conf @@ -0,0 +1,64 @@ +CONFIG_ARCH_VEXPRESS=y +CONFIG_ARCH_VEXPRESS_CA9X4=y +CONFIG_HAVE_ARM_ARCH_TIMER=y +CONFIG_NR_CPUS=8 +CONFIG_HIGHMEM=y +CONFIG_HIGHPTE=y +CONFIG_ARM_PSCI=y +CONFIG_MCPM=y +CONFIG_ARCH_VEXPRESS_DCSCB=y +CONFIG_ARCH_VEXPRESS_TC2_PM=y +CONFIG_ARM_BIG_LITTLE_CPUIDLE=y +CONFIG_BIG_LITTLE=y +CONFIG_ARM_VEXPRESS_SPC_CPUFREQ=y +CONFIG_PM_OPP=y +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_CMDLINE="console=ttyAMA0,38400n8 root=/dev/mmcblk0p2 rootwait mmci.fmax=4000000" +CONFIG_VFP=y +CONFIG_NEON=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_SMSC911X=y +CONFIG_SMC91X=y +CONFIG_INPUT_EVDEV=y +CONFIG_SERIO_AMBAKMI=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_FB=y +CONFIG_FB_ARMCLCD=y +CONFIG_FB_ARMHDLCD=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_ARMAACI=y +CONFIG_USB=y +CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_MMC=y +CONFIG_MMC_ARMMMCI=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_PL031=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_VEXPRESS_CONFIG=y +CONFIG_SENSORS_VEXPRESS=y +CONFIG_REGULATOR=y +CONFIG_REGULATOR_VEXPRESS=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_LEDS_TRIGGER_CPU=y +CONFIG_VIRTIO=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y |