aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/arm64/memory.txt7
-rw-r--r--Documentation/virtual/kvm/api.txt186
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic.txt83
-rw-r--r--Documentation/virtual/kvm/devices/vfio.txt22
-rw-r--r--Documentation/virtual/kvm/locking.txt8
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/arm/include/asm/assembler.h12
-rw-r--r--arch/arm/include/asm/barrier.h32
-rw-r--r--arch/arm/include/asm/cputype.h35
-rw-r--r--arch/arm/include/asm/kvm_arm.h12
-rw-r--r--arch/arm/include/asm/kvm_asm.h26
-rw-r--r--arch/arm/include/asm/kvm_emulate.h75
-rw-r--r--arch/arm/include/asm/kvm_host.h50
-rw-r--r--arch/arm/include/asm/kvm_mmu.h69
-rw-r--r--arch/arm/include/asm/kvm_psci.h6
-rw-r--r--arch/arm/include/asm/kvm_vgic.h220
-rw-r--r--arch/arm/include/asm/memory.h16
-rw-r--r--arch/arm/include/asm/pgtable-3level.h2
-rw-r--r--arch/arm/include/asm/pgtable.h2
-rw-r--r--arch/arm/include/asm/smp_scu.h2
-rw-r--r--arch/arm/include/uapi/asm/kvm.h43
-rw-r--r--arch/arm/kernel/asm-offsets.c15
-rw-r--r--arch/arm/kernel/perf_event_cpu.c64
-rw-r--r--arch/arm/kernel/smp.c4
-rw-r--r--arch/arm/kvm/Kconfig9
-rw-r--r--arch/arm/kvm/Makefile10
-rw-r--r--arch/arm/kvm/arm.c145
-rw-r--r--arch/arm/kvm/coproc.c292
-rw-r--r--arch/arm/kvm/coproc.h14
-rw-r--r--arch/arm/kvm/coproc_a15.c119
-rw-r--r--arch/arm/kvm/coproc_a7.c54
-rw-r--r--arch/arm/kvm/emulate.c2
-rw-r--r--arch/arm/kvm/guest.c115
-rw-r--r--arch/arm/kvm/handle_exit.c35
-rw-r--r--arch/arm/kvm/init.S6
-rw-r--r--arch/arm/kvm/interrupts.S32
-rw-r--r--arch/arm/kvm/interrupts_head.S69
-rw-r--r--arch/arm/kvm/mmio.c95
-rw-r--r--arch/arm/kvm/mmu.c488
-rw-r--r--arch/arm/kvm/psci.c263
-rw-r--r--arch/arm/kvm/reset.c27
-rw-r--r--arch/arm/kvm/trace.h7
-rw-r--r--arch/arm/kvm/vgic.c1499
-rw-r--r--arch/arm/lib/copy_template.S36
-rw-r--r--arch/arm/lib/csumpartialcopygeneric.S96
-rw-r--r--arch/arm/lib/io-readsl.S12
-rw-r--r--arch/arm/lib/io-writesl.S12
-rw-r--r--arch/arm/lib/memmove.S36
-rw-r--r--arch/arm/lib/uaccess.S192
-rw-r--r--arch/arm/mm/idmap.c5
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/Makefile1
-rw-r--r--arch/arm64/include/asm/cputype.h1
-rw-r--r--arch/arm64/include/asm/debug-monitors.h19
-rw-r--r--arch/arm64/include/asm/kvm_arm.h259
-rw-r--r--arch/arm64/include/asm/kvm_asm.h141
-rw-r--r--arch/arm64/include/asm/kvm_coproc.h57
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h268
-rw-r--r--arch/arm64/include/asm/kvm_host.h251
-rw-r--r--arch/arm64/include/asm/kvm_mmio.h59
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h157
-rw-r--r--arch/arm64/include/asm/kvm_psci.h27
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h7
-rw-r--r--arch/arm64/include/asm/virt.h4
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h201
-rw-r--r--arch/arm64/kernel/asm-offsets.c27
-rw-r--r--arch/arm64/kernel/debug-monitors.c9
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S20
-rw-r--r--arch/arm64/kvm/Kconfig63
-rw-r--r--arch/arm64/kvm/Makefile27
-rw-r--r--arch/arm64/kvm/emulate.c158
-rw-r--r--arch/arm64/kvm/guest.c359
-rw-r--r--arch/arm64/kvm/handle_exit.c133
-rw-r--r--arch/arm64/kvm/hyp-init.S116
-rw-r--r--arch/arm64/kvm/hyp.S1274
-rw-r--r--arch/arm64/kvm/inject_fault.c203
-rw-r--r--arch/arm64/kvm/regmap.c168
-rw-r--r--arch/arm64/kvm/reset.c112
-rw-r--r--arch/arm64/kvm/sys_regs.c1528
-rw-r--r--arch/arm64/kvm/sys_regs.h140
-rw-r--r--arch/arm64/kvm/sys_regs_generic_v8.c100
-rw-r--r--arch/arm64/kvm/vgic-v2-switch.S133
-rw-r--r--arch/arm64/kvm/vgic-v3-switch.S267
-rw-r--r--arch/arm64/mm/proc.S8
-rw-r--r--arch/ia64/include/asm/kvm_host.h15
-rw-r--r--arch/ia64/kvm/Kconfig1
-rw-r--r--arch/ia64/kvm/Makefile7
-rw-r--r--arch/ia64/kvm/kvm-ia64.c37
-rw-r--r--arch/mips/include/asm/kvm_host.h16
-rw-r--r--arch/mips/kvm/kvm_mips.c9
-rw-r--r--arch/mips/mm/init.c1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h13
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h6
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/Makefile13
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2
-rw-r--r--arch/powerpc/kvm/booke.c4
-rw-r--r--arch/powerpc/kvm/mpic.c4
-rw-r--r--arch/powerpc/kvm/powerpc.c33
-rw-r--r--arch/s390/include/asm/kvm_host.h17
-rw-r--r--arch/s390/kvm/Makefile3
-rw-r--r--arch/s390/kvm/kvm-s390.c42
-rw-r--r--arch/s390/mm/init.c1
-rw-r--r--arch/x86/include/asm/kvm_host.h15
-rw-r--r--arch/x86/include/uapi/asm/kvm.h6
-rw-r--r--arch/x86/kvm/Kconfig2
-rw-r--r--arch/x86/kvm/Makefile13
-rw-r--r--arch/x86/kvm/cpuid.c57
-rw-r--r--arch/x86/kvm/cpuid.h5
-rw-r--r--arch/x86/kvm/mmu.c6
-rw-r--r--arch/x86/kvm/paging_tmpl.h20
-rw-r--r--arch/x86/kvm/svm.c4
-rw-r--r--arch/x86/kvm/vmx.c4
-rw-r--r--arch/x86/kvm/x86.c50
-rw-r--r--include/kvm/arm_arch_timer.h (renamed from arch/arm/include/asm/kvm_arch_timer.h)18
-rw-r--r--include/kvm/arm_vgic.h370
-rw-r--r--include/linux/irqchip/arm-gic.h15
-rw-r--r--include/linux/kvm_host.h108
-rw-r--r--include/linux/kvm_types.h14
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/trace/events/kvm.h18
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/kvm.h46
-rw-r--r--include/uapi/linux/psci.h90
-rw-r--r--mm/memory.c2
-rw-r--r--virt/kvm/Kconfig10
-rw-r--r--virt/kvm/arm/arch_timer.c (renamed from arch/arm/kvm/arch_timer.c)68
-rw-r--r--virt/kvm/arm/vgic-v2.c265
-rw-r--r--virt/kvm/arm/vgic-v3.c247
-rw-r--r--virt/kvm/arm/vgic.c2464
-rw-r--r--virt/kvm/async_pf.c60
-rw-r--r--virt/kvm/eventfd.c154
-rw-r--r--virt/kvm/ioapic.c2
-rw-r--r--virt/kvm/ioapic.h1
-rw-r--r--virt/kvm/iommu.c12
-rw-r--r--virt/kvm/irq_comm.c41
-rw-r--r--virt/kvm/irqchip.c107
-rw-r--r--virt/kvm/kvm_main.c388
-rw-r--r--virt/kvm/vfio.c228
139 files changed, 12902 insertions, 3206 deletions
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index c6941f815f15..d50fa618371b 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -102,3 +102,10 @@ Translation table lookup with 64KB pages:
| | +--------------------------> [41:29] L2 index (only 38:29 used)
| +-------------------------------> [47:42] L1 index (not used)
+-------------------------------------------------> [63] TTBR0/1
+
+When using KVM, the hypervisor maps kernel pages in EL2, at a fixed
+offset from the kernel VA (top 24bits of the kernel VA set to zero):
+
+Start End Size Use
+-----------------------------------------------------------------------
+0000004000000000 0000007fffffffff 256GB kernel objects mapped in HYP
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 5f91eda91647..257a1f1eecc7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl.
4.4 KVM_CHECK_EXTENSION
-Capability: basic
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
Architectures: all
-Type: system ioctl
+Type: system ioctl, vm ioctl
Parameters: extension identifier (KVM_CAP_*)
Returns: 0 if unsupported; 1 (or some other positive integer) if supported
@@ -160,6 +160,9 @@ receives an integer that describes the extension availability.
Generally 0 means no and 1 means yes, but some extensions may report
additional information in the integer return value.
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
4.5 KVM_GET_VCPU_MMAP_SIZE
@@ -280,7 +283,7 @@ kvm_run' (see below).
4.11 KVM_GET_REGS
Capability: basic
-Architectures: all except ARM
+Architectures: all except ARM, arm64
Type: vcpu ioctl
Parameters: struct kvm_regs (out)
Returns: 0 on success, -1 on error
@@ -301,7 +304,7 @@ struct kvm_regs {
4.12 KVM_SET_REGS
Capability: basic
-Architectures: all except ARM
+Architectures: all except ARM, arm64
Type: vcpu ioctl
Parameters: struct kvm_regs (in)
Returns: 0 on success, -1 on error
@@ -587,7 +590,7 @@ struct kvm_fpu {
4.24 KVM_CREATE_IRQCHIP
Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, ARM
+Architectures: x86, ia64, ARM, arm64
Type: vm ioctl
Parameters: none
Returns: 0 on success, -1 on error
@@ -595,14 +598,14 @@ Returns: 0 on success, -1 on error
Creates an interrupt controller model in the kernel. On x86, creates a virtual
ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM, a GIC is
+only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
created.
4.25 KVM_IRQ_LINE
Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, arm
+Architectures: x86, ia64, arm, arm64
Type: vm ioctl
Parameters: struct kvm_irq_level
Returns: 0 on success, -1 on error
@@ -612,9 +615,10 @@ On some architectures it is required that an interrupt controller model has
been previously created with KVM_CREATE_IRQCHIP. Note that edge-triggered
interrupts require the level to be set to 1 and then back to 0.
-ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip
-(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for
-specific cpus. The irq field is interpreted like this:
+ARM/arm64 can signal an interrupt either at the CPU level, or at the
+in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
+use PPIs designated for specific cpus. The irq field is interpreted
+like this:
 bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 |
field: | irq_type | vcpu_index | irq_id |
@@ -968,18 +972,20 @@ uniprocessor guests).
Possible values are:
- - KVM_MP_STATE_RUNNABLE: the vcpu is currently running
+ - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86, ia64]
- KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP)
- which has not yet received an INIT signal
+ which has not yet received an INIT signal [x86,
+ ia64]
- KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is
- now ready for a SIPI
+ now ready for a SIPI [x86, ia64]
- KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and
- is waiting for an interrupt
+ is waiting for an interrupt [x86, ia64]
- KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector
- accessible via KVM_GET_VCPU_EVENTS)
+ accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
-This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
4.39 KVM_SET_MP_STATE
@@ -993,8 +999,9 @@ Returns: 0 on success; -1 on error
Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
arguments.
-This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
4.40 KVM_SET_IDENTITY_MAP_ADDR
@@ -1121,9 +1128,9 @@ struct kvm_cpuid2 {
struct kvm_cpuid_entry2 entries[0];
};
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC 2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT 4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
struct kvm_cpuid_entry2 {
__u32 function;
@@ -1831,6 +1838,22 @@ ARM 32-bit VFP control registers have the following id bit patterns:
ARM 64-bit FP registers have the following id bit patterns:
0x4030 0000 0012 0 <regno:12>
+
+arm64 registers are mapped using the lower 32 bits. The upper 16 of
+that is the register group type, or coprocessor number:
+
+arm64 core/FP-SIMD registers have the following id bit patterns. Note
+that the size of the access is variable, as the kvm_regs structure
+contains elements ranging from 32 to 128 bits. The index is a 32bit
+value in the kvm_regs structure seen as a 32bit array.
+ 0x60x0 0000 0010 <index into the kvm_regs struct:16>
+
+arm64 CCSIDR registers are demultiplexed by CSSELR value:
+ 0x6020 0000 0011 00 <csselr:8>
+
+arm64 system registers have the following id bit patterns:
+ 0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
+
4.69 KVM_GET_ONE_REG
Capability: KVM_CAP_ONE_REG
@@ -2264,7 +2287,7 @@ current state. "addr" is ignored.
4.77 KVM_ARM_VCPU_INIT
Capability: basic
-Architectures: arm
+Architectures: arm, arm64
Type: vcpu ioctl
Parameters: struct struct kvm_vcpu_init (in)
Returns: 0 on success; -1 on error
@@ -2283,12 +2306,14 @@ should be created before this ioctl is invoked.
Possible features:
- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
Depends on KVM_CAP_ARM_PSCI.
+ - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
+ Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
4.78 KVM_GET_REG_LIST
Capability: basic
-Architectures: arm
+Architectures: arm, arm64
Type: vcpu ioctl
Parameters: struct kvm_reg_list (in/out)
Returns: 0 on success; -1 on error
@@ -2305,10 +2330,10 @@ This ioctl returns the guest registers that are supported for the
KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
-4.80 KVM_ARM_SET_DEVICE_ADDR
+4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
-Architectures: arm
+Architectures: arm, arm64
Type: vm ioctl
Parameters: struct kvm_arm_device_address (in)
Returns: 0 on success, -1 on error
@@ -2329,20 +2354,25 @@ can access emulated or directly exposed devices, which the host kernel needs
to know about. The id field is an architecture specific identifier for a
specific device.
-ARM divides the id field into two parts, a device id and an address type id
-specific to the individual device.
+ARM/arm64 divides the id field into two parts, a device id and an
+address type id specific to the individual device.
 bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 |
field: | 0x00000000 | device id | addr type id |
-ARM currently only require this when using the in-kernel GIC support for the
-hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id. When
-setting the base address for the guest's mapping of the VGIC virtual CPU
-and distributor interface, the ioctl must be called after calling
-KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling
-this ioctl twice for any of the base addresses will return -EEXIST.
+ARM/arm64 currently only require this when using the in-kernel GIC
+support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2
+as the device id. When setting the base address for the guest's
+mapping of the VGIC virtual CPU and distributor interface, the ioctl
+must be called after calling KVM_CREATE_IRQCHIP, but before calling
+KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the
+base addresses will return -EEXIST.
+
+Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API
+should be used instead.
-4.82 KVM_PPC_RTAS_DEFINE_TOKEN
+
+4.86 KVM_PPC_RTAS_DEFINE_TOKEN
Capability: KVM_CAP_PPC_RTAS
Architectures: ppc
@@ -2612,6 +2642,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
external interrupt has just been delivered into the guest. User space
should put the acknowledged interrupt vector into the 'epr' field.
+ /* KVM_EXIT_SYSTEM_EVENT */
+ struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN 1
+#define KVM_SYSTEM_EVENT_RESET 2
+ __u32 type;
+ __u64 flags;
+ } system_event;
+
+If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
+a system-level event using some architecture specific mechanism (hypercall
+or some special instruction). In case of ARM/ARM64, this is triggered using
+HVC instruction based PSCI call from the vcpu. The 'type' field describes
+the system-level event type. The 'flags' field describes architecture
+specific flags for the system-level event.
+
/* Fix the size of the union. */
char padding[256];
};
@@ -2641,6 +2686,77 @@ and usually define the validity of a groups of registers. (e.g. one bit
};
+4.81 KVM_GET_EMULATED_CPUID
+
+Capability: KVM_CAP_EXT_EMUL_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+ __u32 nent;
+ __u32 flags;
+ struct kvm_cpuid_entry2 entries[0];
+};
+
+The member 'flags' is used for passing flags from userspace.
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+
+struct kvm_cpuid_entry2 {
+ __u32 function;
+ __u32 index;
+ __u32 flags;
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+ __u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are emulated by
+kvm.Userspace can use the information returned by this ioctl to query
+which features are emulated by kvm instead of being present natively.
+
+Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
+structure with the 'nent' field indicating the number of entries in
+the variable-size array 'entries'. If the number of entries is too low
+to describe the cpu capabilities, an error (E2BIG) is returned. If the
+number is too high, the 'nent' field is adjusted and an error (ENOMEM)
+is returned. If the number is just right, the 'nent' field is adjusted
+to the number of valid entries in the 'entries' array, which is then
+filled.
+
+The entries returned are the set CPUID bits of the respective features
+which kvm emulates, as returned by the CPUID instruction, with unknown
+or unsupported feature bits cleared.
+
+Features like x2apic, for example, may not be present in the host cpu
+but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
+emulated efficiently and thus not included here.
+
+The fields in each entry are defined as follows:
+
+ function: the eax value used to obtain the entry
+ index: the ecx value used to obtain the entry (for entries that are
+ affected by ecx)
+ flags: an OR of zero or more of the following:
+ KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+ if the index field is valid
+ KVM_CPUID_FLAG_STATEFUL_FUNC:
+ if cpuid for this function returns different values for successive
+ invocations; there will be several entries with the same function,
+ all with this flag set
+ KVM_CPUID_FLAG_STATE_READ_NEXT:
+ for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+ the first entry to be read by a cpu
+ eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+ this function/index combination
+
+
6. Capabilities that can be enabled
-----------------------------------
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
new file mode 100644
index 000000000000..df8b0c7540b6
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -0,0 +1,83 @@
+ARM Virtual Generic Interrupt Controller (VGIC)
+===============================================
+
+Device types supported:
+ KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
+
+Only one VGIC instance may be instantiated through either this API or the
+legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt
+controller, requiring emulated user-space devices to inject interrupts to the
+VGIC instead of directly to CPUs.
+
+Groups:
+ KVM_DEV_ARM_VGIC_GRP_ADDR
+ Attributes:
+ KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
+ Base address in the guest physical address space of the GIC distributor
+ register mappings.
+
+ KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
+ Base address in the guest physical address space of the GIC virtual cpu
+ interface register mappings.
+
+ KVM_DEV_ARM_VGIC_GRP_DIST_REGS
+ Attributes:
+ The attr field of kvm_device_attr encodes two values:
+ bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
+ values: | reserved | cpu id | offset |
+
+ All distributor regs are (rw, 32-bit)
+
+ The offset is relative to the "Distributor base address" as defined in the
+ GICv2 specs. Getting or setting such a register has the same effect as
+ reading or writing the register on the actual hardware from the cpu
+ specified with cpu id field. Note that most distributor fields are not
+ banked, but return the same value regardless of the cpu id used to access
+ the register.
+ Limitations:
+ - Priorities are not implemented, and registers are RAZ/WI
+ Errors:
+ -ENODEV: Getting or setting this register is not yet supported
+ -EBUSY: One or more VCPUs are running
+
+ KVM_DEV_ARM_VGIC_GRP_CPU_REGS
+ Attributes:
+ The attr field of kvm_device_attr encodes two values:
+ bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
+ values: | reserved | cpu id | offset |
+
+ All CPU interface regs are (rw, 32-bit)
+
+ The offset specifies the offset from the "CPU interface base address" as
+ defined in the GICv2 specs. Getting or setting such a register has the
+ same effect as reading or writing the register on the actual hardware.
+
+ The Active Priorities Registers APRn are implementation defined, so we set a
+ fixed format for our implementation that fits with the model of a "GICv2
+ implementation without the security extensions" which we present to the
+ guest. This interface always exposes four register APR[0-3] describing the
+ maximum possible 128 preemption levels. The semantics of the register
+ indicate if any interrupts in a given preemption level are in the active
+ state by setting the corresponding bit.
+
+ Thus, preemption level X has one or more active interrupts if and only if:
+
+ APRn[X mod 32] == 0b1, where n = X / 32
+
+ Bits for undefined preemption levels are RAZ/WI.
+
+ Limitations:
+ - Priorities are not implemented, and registers are RAZ/WI
+ Errors:
+ -ENODEV: Getting or setting this register is not yet supported
+ -EBUSY: One or more VCPUs are running
+
+ KVM_DEV_ARM_VGIC_GRP_NR_IRQS
+ Attributes:
+ A value describing the number of interrupts (SGI, PPI and SPI) for
+ this GIC instance, ranging from 64 to 1024, in increments of 32.
+
+ Errors:
+ -EINVAL: Value set is out of the expected range
+ -EBUSY: Value has already be set, or GIC has already been initialized
+ with default values.
diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
new file mode 100644
index 000000000000..ef51740c67ca
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -0,0 +1,22 @@
+VFIO virtual device
+===================
+
+Device types supported:
+ KVM_DEV_TYPE_VFIO
+
+Only one VFIO instance may be created per VM. The created device
+tracks VFIO groups in use by the VM and features of those groups
+important to the correctness and acceleration of the VM. As groups
+are enabled and disabled for use by the VM, KVM should be updated
+about their presence. When registered with KVM, a reference to the
+VFIO-group is held by KVM.
+
+Groups:
+ KVM_DEV_VFIO_GROUP
+
+KVM_DEV_VFIO_GROUP attributes:
+ KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
+ KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
+
+For each, kvm_device_attr.addr points to an int32_t file descriptor
+for the VFIO group.
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 41b7ac9884b5..ba035c33d01c 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -132,10 +132,14 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
------------
Name: kvm_lock
-Type: raw_spinlock
+Type: spinlock_t
Arch: any
Protects: - vm_list
- - hardware virtualization enable/disable
+
+Name: kvm_count_lock
+Type: raw_spinlock_t
+Arch: any
+Protects: - hardware virtualization enable/disable
Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
migration.
diff --git a/MAINTAINERS b/MAINTAINERS
index b4f332475b73..7433b84439f3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4719,6 +4719,15 @@ F: arch/arm/include/uapi/asm/kvm*
F: arch/arm/include/asm/kvm*
F: arch/arm/kvm/
+KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
+M: Marc Zyngier <marc.zyngier@arm.com>
+L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L: kvmarm@lists.cs.columbia.edu
+S: Maintained
+F: arch/arm64/include/uapi/asm/kvm*
+F: arch/arm64/include/asm/kvm*
+F: arch/arm64/kvm/
+
KEXEC
M: Eric Biederman <ebiederm@xmission.com>
W: http://kernel.org/pub/linux/utils/kernel/kexec/
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index e780afbcee54..f0963bb79935 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -30,8 +30,8 @@
* Endian independent macros for shifting bytes within registers.
*/
#ifndef __ARMEB__
-#define pull lsr
-#define push lsl
+#define lspull lsr
+#define lspush lsl
#define get_byte_0 lsl #0
#define get_byte_1 lsr #8
#define get_byte_2 lsr #16
@@ -41,8 +41,8 @@
#define put_byte_2 lsl #16
#define put_byte_3 lsl #24
#else
-#define pull lsl
-#define push lsr
+#define lspull lsl
+#define lspush lsr
#define get_byte_0 lsr #24
#define get_byte_1 lsr #16
#define get_byte_2 lsr #8
@@ -219,9 +219,9 @@
#ifdef CONFIG_SMP
#if __LINUX_ARM_ARCH__ >= 7
.ifeqs "\mode","arm"
- ALT_SMP(dmb)
+ ALT_SMP(dmb ish)
.else
- ALT_SMP(W(dmb))
+ ALT_SMP(W(dmb) ish)
.endif
#elif __LINUX_ARM_ARCH__ == 6
ALT_SMP(mcr p15, 0, r0, c7, c10, 5) @ dmb
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 8dcd9c702d90..60f15e274e6d 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -14,27 +14,27 @@
#endif
#if __LINUX_ARM_ARCH__ >= 7
-#define isb() __asm__ __volatile__ ("isb" : : : "memory")
-#define dsb() __asm__ __volatile__ ("dsb" : : : "memory")
-#define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
+#define isb(option) __asm__ __volatile__ ("isb " #option : : : "memory")
+#define dsb(option) __asm__ __volatile__ ("dsb " #option : : : "memory")
+#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
#elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6
-#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
: : "r" (0) : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
: : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
+#define dmb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
: : "r" (0) : "memory")
#elif defined(CONFIG_CPU_FA526)
-#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
+#define isb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
: : "r" (0) : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
: : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
+#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
#else
-#define isb() __asm__ __volatile__ ("" : : : "memory")
-#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
+#define isb(x) __asm__ __volatile__ ("" : : : "memory")
+#define dsb(x) __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
: : "r" (0) : "memory")
-#define dmb() __asm__ __volatile__ ("" : : : "memory")
+#define dmb(x) __asm__ __volatile__ ("" : : : "memory")
#endif
#ifdef CONFIG_ARCH_HAS_BARRIERS
@@ -42,7 +42,7 @@
#elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP)
#define mb() do { dsb(); outer_sync(); } while (0)
#define rmb() dsb()
-#define wmb() mb()
+#define wmb() do { dsb(st); outer_sync(); } while (0)
#else
#define mb() barrier()
#define rmb() barrier()
@@ -54,9 +54,9 @@
#define smp_rmb() barrier()
#define smp_wmb() barrier()
#else
-#define smp_mb() dmb()
-#define smp_rmb() dmb()
-#define smp_wmb() dmb()
+#define smp_mb() dmb(ish)
+#define smp_rmb() smp_mb()
+#define smp_wmb() dmb(ishst)
#endif
#define read_barrier_depends() do { } while(0)
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index dba62cb1ad08..3392fe2d3174 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -43,15 +43,18 @@
#define ARM_CPU_IMP_ARM 0x41
#define ARM_CPU_IMP_INTEL 0x69
-#define ARM_CPU_PART_ARM1136 0xB360
-#define ARM_CPU_PART_ARM1156 0xB560
-#define ARM_CPU_PART_ARM1176 0xB760
-#define ARM_CPU_PART_ARM11MPCORE 0xB020
-#define ARM_CPU_PART_CORTEX_A8 0xC080
-#define ARM_CPU_PART_CORTEX_A9 0xC090
-#define ARM_CPU_PART_CORTEX_A5 0xC050
-#define ARM_CPU_PART_CORTEX_A15 0xC0F0
-#define ARM_CPU_PART_CORTEX_A7 0xC070
+/* ARM implemented processors */
+#define ARM_CPU_PART_ARM1136 0x4100b360
+#define ARM_CPU_PART_ARM1156 0x4100b560
+#define ARM_CPU_PART_ARM1176 0x4100b760
+#define ARM_CPU_PART_ARM11MPCORE 0x4100b020
+#define ARM_CPU_PART_CORTEX_A8 0x4100c080
+#define ARM_CPU_PART_CORTEX_A9 0x4100c090
+#define ARM_CPU_PART_CORTEX_A5 0x4100c050
+#define ARM_CPU_PART_CORTEX_A7 0x4100c070
+#define ARM_CPU_PART_CORTEX_A12 0x4100c0d0
+#define ARM_CPU_PART_CORTEX_A17 0x4100c0e0
+#define ARM_CPU_PART_CORTEX_A15 0x4100c0f0
#define ARM_CPU_XSCALE_ARCH_MASK 0xe000
#define ARM_CPU_XSCALE_ARCH_V1 0x2000
@@ -122,14 +125,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
return (read_cpuid_id() & 0xFF000000) >> 24;
}
-static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
+/*
+ * The CPU part number is meaningless without referring to the CPU
+ * implementer: implementers are free to define their own part numbers
+ * which are permitted to clash with other implementer part numbers.
+ */
+static inline unsigned int __attribute_const__ read_cpuid_part(void)
+{
+ return read_cpuid_id() & 0xff00fff0;
+}
+
+static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void)
{
return read_cpuid_id() & 0xFFF0;
}
static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
{
- return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK;
+ return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
}
static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 124623e5ef14..816db0bf2dd8 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -55,8 +55,10 @@
* The bits we set in HCR:
* TAC: Trap ACTLR
* TSC: Trap SMC
+ * TVM: Trap VM ops (until MMU and caches are on)
* TSW: Trap cache operations by set/way
* TWI: Trap WFI
+ * TWE: Trap WFE
* TIDCP: Trap L2CTLR/L2ECTLR
* BSU_IS: Upgrade barriers to the inner shareable domain
* FB: Force broadcast of all maintainance operations
@@ -67,8 +69,7 @@
*/
#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
- HCR_SWIO | HCR_TIDCP)
-#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+ HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP)
/* System Control Register (SCTLR) bits */
#define SCTLR_TE (1 << 30)
@@ -95,12 +96,12 @@
#define TTBCR_IRGN1 (3 << 24)
#define TTBCR_EPD1 (1 << 23)
#define TTBCR_A1 (1 << 22)
-#define TTBCR_T1SZ (3 << 16)
+#define TTBCR_T1SZ (7 << 16)
#define TTBCR_SH0 (3 << 12)
#define TTBCR_ORGN0 (3 << 10)
#define TTBCR_IRGN0 (3 << 8)
#define TTBCR_EPD0 (1 << 7)
-#define TTBCR_T0SZ 3
+#define TTBCR_T0SZ (7 << 0)
#define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
/* Hyp System Trap Register */
@@ -135,7 +136,6 @@
#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL)
#define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30))
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
-#define S2_PGD_SIZE (1 << S2_PGD_ORDER)
/* Virtualization Translation Control Register (VTCR) bits */
#define VTCR_SH0 (3 << 12)
@@ -209,6 +209,8 @@
#define HSR_EC_DABT (0x24)
#define HSR_EC_DABT_HYP (0x25)
+#define HSR_WFI_IS_WFE (1U << 0)
+
#define HSR_HVC_IMM_MASK ((1UL << 16) - 1)
#define HSR_DABT_S1PTW (1U << 7)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 4bb08e3e52bc..3a67bec72d0c 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -39,7 +39,7 @@
#define c6_IFAR 17 /* Instruction Fault Address Register */
#define c7_PAR 18 /* Physical Address Register */
#define c7_PAR_high 19 /* PAR top 32 bits */
-#define c9_L2CTLR 20 /* Cortex A15 L2 Control Register */
+#define c9_L2CTLR 20 /* Cortex A15/A7 L2 Control Register */
#define c10_PRRR 21 /* Primary Region Remap Register */
#define c10_NMRR 22 /* Normal Memory Remap Register */
#define c12_VBAR 23 /* Vector Base Address Register */
@@ -48,7 +48,9 @@
#define c13_TID_URO 26 /* Thread ID, User R/O */
#define c13_TID_PRIV 27 /* Thread ID, Privileged */
#define c14_CNTKCTL 28 /* Timer Control Register (PL1) */
-#define NR_CP15_REGS 29 /* Number of regs (incl. invalid) */
+#define c10_AMAIR0 29 /* Auxilary Memory Attribute Indirection Reg0 */
+#define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */
+#define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */
#define ARM_EXCEPTION_RESET 0
#define ARM_EXCEPTION_UNDEFINED 1
@@ -59,6 +61,24 @@
#define ARM_EXCEPTION_FIQ 6
#define ARM_EXCEPTION_HVC 7
+/*
+ * The rr_lo_hi macro swaps a pair of registers depending on
+ * current endianness. It is used in conjunction with ldrd and strd
+ * instructions that load/store a 64-bit value from/to memory to/from
+ * a pair of registers which are used with the mrrc and mcrr instructions.
+ * If used with the ldrd/strd instructions, the a1 parameter is the first
+ * source/destination register and the a2 parameter is the second
+ * source/destination register. Note that the ldrd/strd instructions
+ * already swap the bytes within the words correctly according to the
+ * endianness setting, but the order of the registers need to be effectively
+ * swapped when used with the mrrc/mcrr instructions.
+ */
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define rr_lo_hi(a1, a2) a2, a1
+#else
+#define rr_lo_hi(a1, a2) a1, a2
+#endif
+
#ifndef __ASSEMBLY__
struct kvm;
struct kvm_vcpu;
@@ -74,8 +94,6 @@ extern char __kvm_hyp_vector[];
extern char __kvm_hyp_code_start[];
extern char __kvm_hyp_code_end[];
-extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
-
extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 82b4babead2c..b9db269c6e61 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -65,11 +65,6 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
return cpsr_mode > USR_MODE;;
}
-static inline bool kvm_vcpu_reg_is_pc(struct kvm_vcpu *vcpu, int reg)
-{
- return reg == 15;
-}
-
static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault.hsr;
@@ -154,6 +149,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
{
+ return kvm_vcpu_get_hsr(vcpu) & HSR_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
+{
return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
}
@@ -162,4 +162,69 @@ static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK;
}
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.cp15[c0_MPIDR];
+}
+
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+ *vcpu_cpsr(vcpu) |= PSR_E_BIT;
+}
+
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
+{
+ return !!(*vcpu_cpsr(vcpu) & PSR_E_BIT);
+}
+
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+ unsigned long data,
+ unsigned int len)
+{
+ if (kvm_vcpu_is_be(vcpu)) {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return be16_to_cpu(data & 0xffff);
+ default:
+ return be32_to_cpu(data);
+ }
+ } else {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return le16_to_cpu(data & 0xffff);
+ default:
+ return le32_to_cpu(data);
+ }
+ }
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+ unsigned long data,
+ unsigned int len)
+{
+ if (kvm_vcpu_is_be(vcpu)) {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return cpu_to_be16(data & 0xffff);
+ default:
+ return cpu_to_be32(data);
+ }
+ } else {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return cpu_to_le16(data & 0xffff);
+ default:
+ return cpu_to_le32(data);
+ }
+ }
+}
+
#endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 57cb786a6203..46e5d4da1989 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,30 +19,31 @@
#ifndef __ARM_KVM_HOST_H__
#define __ARM_KVM_HOST_H__
+#include <linux/types.h>
+#include <linux/kvm_types.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
#include <asm/fpstate.h>
-#include <asm/kvm_arch_timer.h>
+#include <kvm/arm_arch_timer.h>
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
#define KVM_USER_MEM_SLOTS 32
#define KVM_PRIVATE_MEM_SLOTS 4
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HAVE_ONE_REG
-#define KVM_VCPU_MAX_FEATURES 1
-
-/* We don't currently support large pages. */
-#define KVM_HPAGE_GFN_SHIFT(x) 0
-#define KVM_NR_PAGE_SIZES 1
-#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
+#define KVM_VCPU_MAX_FEATURES 2
-#include <asm/kvm_vgic.h>
+#include <kvm/arm_vgic.h>
-struct kvm_vcpu;
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
@@ -101,6 +102,12 @@ struct kvm_vcpu_arch {
/* The CPU type we expose to the VM */
u32 midr;
+ /* HYP trapping configuration */
+ u32 hcr;
+
+ /* Interrupt related fields */
+ u32 irq_lines; /* IRQ and FIQ levels */
+
/* Exception Information */
struct kvm_vcpu_fault_info fault;
@@ -128,9 +135,6 @@ struct kvm_vcpu_arch {
/* IO related fields */
struct kvm_decode mmio_decode;
- /* Interrupt related fields */
- u32 irq_lines; /* IRQ and FIQ levels */
-
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
@@ -146,19 +150,17 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
-struct kvm_vcpu_init;
int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
u64 kvm_call_hyp(void *hypfn, ...);
void force_vm_exit(const cpumask_t *mask);
#define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
@@ -183,15 +185,14 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-struct kvm_one_reg;
int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
-static inline void __cpu_init_hyp_mode(unsigned long long boot_pgd_ptr,
- unsigned long long pgd_ptr,
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+ phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
{
@@ -221,7 +222,18 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
return 0;
}
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+ BUG_ON(vgic->type != VGIC_V2);
+}
+
int kvm_perf_init(void);
int kvm_perf_teardown(void);
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 472ac7091003..3f688b458143 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -62,9 +62,15 @@ phys_addr_t kvm_get_idmap_vector(void);
int kvm_mmu_init(void);
void kvm_clear_hyp_idmap(void);
+static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
+{
+ *pmd = new_pmd;
+ flush_pmd_entry(pmd);
+}
+
static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
{
- pte_val(*pte) = new_pte;
+ *pte = new_pte;
/*
* flush_pmd_entry just takes a void pointer and cleans the necessary
* cache entries, so we can reuse the function for ptes.
@@ -72,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
flush_pmd_entry(pte);
}
-static inline bool kvm_is_write_fault(unsigned long hsr)
-{
- unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
- if (hsr_ec == HSR_EC_IABT)
- return false;
- else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR))
- return false;
- else
- return true;
-}
-
static inline void kvm_clean_pgd(pgd_t *pgd)
{
clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
@@ -103,10 +98,51 @@ static inline void kvm_set_s2pte_writable(pte_t *pte)
pte_val(*pte) |= L_PTE_S2_RDWR;
}
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+ pmd_val(*pmd) |= L_PMD_S2_RDWR;
+}
+
+/* Open coded p*d_addr_end that can deal with 64bit addresses */
+#define kvm_pgd_addr_end(addr, end) \
+({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+
+#define kvm_pud_addr_end(addr,end) (end)
+
+#define kvm_pmd_addr_end(addr, end) \
+({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
+ (__boundary - 1 < (end) - 1)? __boundary: (end); \
+})
+
+static inline bool kvm_page_empty(void *ptr)
+{
+ struct page *ptr_page = virt_to_page(ptr);
+ return page_count(ptr_page) == 1;
+}
+
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#define kvm_pud_table_empty(pudp) (0)
+
+
struct kvm;
-static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
+#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
+}
+
+static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+ unsigned long size)
{
+ if (!vcpu_has_cache_enabled(vcpu))
+ kvm_flush_dcache_to_poc((void *)hva, size);
+
/*
* If we are going to insert an instruction page and the icache is
* either VIPT or PIPT, there is a potential problem where the host
@@ -120,15 +156,16 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, gfn_t gfn)
* need any kind of flushing (DDI 0406C.b - Page B3-1392).
*/
if (icache_is_pipt()) {
- unsigned long hva = gfn_to_hva(kvm, gfn);
- __cpuc_coherent_user_range(hva, hva + PAGE_SIZE);
+ __cpuc_coherent_user_range(hva, hva + size);
} else if (!icache_is_vivt_asid_tagged()) {
/* any kind of VIPT cache */
__flush_icache_all();
}
}
-#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
+#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x))
+
+void stage2_flush_vm(struct kvm *kvm);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm/include/asm/kvm_psci.h b/arch/arm/include/asm/kvm_psci.h
index 9a83d98bf170..6bda945d31fa 100644
--- a/arch/arm/include/asm/kvm_psci.h
+++ b/arch/arm/include/asm/kvm_psci.h
@@ -18,6 +18,10 @@
#ifndef __ARM_KVM_PSCI_H__
#define __ARM_KVM_PSCI_H__
-bool kvm_psci_call(struct kvm_vcpu *vcpu);
+#define KVM_ARM_PSCI_0_1 1
+#define KVM_ARM_PSCI_0_2 2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
#endif /* __ARM_KVM_PSCI_H__ */
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
deleted file mode 100644
index 343744e4809c..000000000000
--- a/arch/arm/include/asm/kvm_vgic.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef __ASM_ARM_KVM_VGIC_H
-#define __ASM_ARM_KVM_VGIC_H
-
-#include <linux/kernel.h>
-#include <linux/kvm.h>
-#include <linux/irqreturn.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/irqchip/arm-gic.h>
-
-#define VGIC_NR_IRQS 128
-#define VGIC_NR_SGIS 16
-#define VGIC_NR_PPIS 16
-#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS)
-#define VGIC_NR_SHARED_IRQS (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
-#define VGIC_MAX_CPUS KVM_MAX_VCPUS
-#define VGIC_MAX_LRS (1 << 6)
-
-/* Sanity checks... */
-#if (VGIC_MAX_CPUS > 8)
-#error Invalid number of CPU interfaces
-#endif
-
-#if (VGIC_NR_IRQS & 31)
-#error "VGIC_NR_IRQS must be a multiple of 32"
-#endif
-
-#if (VGIC_NR_IRQS > 1024)
-#error "VGIC_NR_IRQS must be <= 1024"
-#endif
-
-/*
- * The GIC distributor registers describing interrupts have two parts:
- * - 32 per-CPU interrupts (SGI + PPI)
- * - a bunch of shared interrupts (SPI)
- */
-struct vgic_bitmap {
- union {
- u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
- DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
- } percpu[VGIC_MAX_CPUS];
- union {
- u32 reg[VGIC_NR_SHARED_IRQS / 32];
- DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
- } shared;
-};
-
-struct vgic_bytemap {
- u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
- u32 shared[VGIC_NR_SHARED_IRQS / 4];
-};
-
-struct vgic_dist {
-#ifdef CONFIG_KVM_ARM_VGIC
- spinlock_t lock;
- bool ready;
-
- /* Virtual control interface mapping */
- void __iomem *vctrl_base;
-
- /* Distributor and vcpu interface mapping in the guest */
- phys_addr_t vgic_dist_base;
- phys_addr_t vgic_cpu_base;
-
- /* Distributor enabled */
- u32 enabled;
-
- /* Interrupt enabled (one bit per IRQ) */
- struct vgic_bitmap irq_enabled;
-
- /* Interrupt 'pin' level */
- struct vgic_bitmap irq_state;
-
- /* Level-triggered interrupt in progress */
- struct vgic_bitmap irq_active;
-
- /* Interrupt priority. Not used yet. */
- struct vgic_bytemap irq_priority;
-
- /* Level/edge triggered */
- struct vgic_bitmap irq_cfg;
-
- /* Source CPU per SGI and target CPU */
- u8 irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
-
- /* Target CPU for each IRQ */
- u8 irq_spi_cpu[VGIC_NR_SHARED_IRQS];
- struct vgic_bitmap irq_spi_target[VGIC_MAX_CPUS];
-
- /* Bitmap indicating which CPU has something pending */
- unsigned long irq_pending_on_cpu;
-#endif
-};
-
-struct vgic_cpu {
-#ifdef CONFIG_KVM_ARM_VGIC
- /* per IRQ to LR mapping */
- u8 vgic_irq_lr_map[VGIC_NR_IRQS];
-
- /* Pending interrupts on this VCPU */
- DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
- DECLARE_BITMAP( pending_shared, VGIC_NR_SHARED_IRQS);
-
- /* Bitmap of used/free list registers */
- DECLARE_BITMAP( lr_used, VGIC_MAX_LRS);
-
- /* Number of list registers on this CPU */
- int nr_lr;
-
- /* CPU vif control registers for world switch */
- u32 vgic_hcr;
- u32 vgic_vmcr;
- u32 vgic_misr; /* Saved only */
- u32 vgic_eisr[2]; /* Saved only */
- u32 vgic_elrsr[2]; /* Saved only */
- u32 vgic_apr;
- u32 vgic_lr[VGIC_MAX_LRS];
-#endif
-};
-
-#define LR_EMPTY 0xff
-
-struct kvm;
-struct kvm_vcpu;
-struct kvm_run;
-struct kvm_exit_mmio;
-
-#ifdef CONFIG_KVM_ARM_VGIC
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
-int kvm_vgic_hyp_init(void);
-int kvm_vgic_init(struct kvm *kvm);
-int kvm_vgic_create(struct kvm *kvm);
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
-void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
-void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
- bool level);
-int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
- struct kvm_exit_mmio *mmio);
-
-#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base))
-#define vgic_initialized(k) ((k)->arch.vgic.ready)
-
-#else
-static inline int kvm_vgic_hyp_init(void)
-{
- return 0;
-}
-
-static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
- return 0;
-}
-
-static inline int kvm_vgic_init(struct kvm *kvm)
-{
- return 0;
-}
-
-static inline int kvm_vgic_create(struct kvm *kvm)
-{
- return 0;
-}
-
-static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
-static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
-
-static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
- unsigned int irq_num, bool level)
-{
- return 0;
-}
-
-static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
-static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
- struct kvm_exit_mmio *mmio)
-{
- return false;
-}
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
- return 0;
-}
-
-static inline bool vgic_initialized(struct kvm *kvm)
-{
- return true;
-}
-#endif
-
-#endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 57870ab313c5..21b458e6b0b8 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -157,6 +157,7 @@
*/
#define __PV_BITS_31_24 0x81000000
+extern phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
extern unsigned long __pv_phys_offset;
#define PHYS_OFFSET __pv_phys_offset
@@ -233,6 +234,21 @@ static inline void *phys_to_virt(phys_addr_t x)
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
/*
+ * These are for systems that have a hardware interconnect supported alias of
+ * physical memory for idmap purposes. Most cases should leave these
+ * untouched.
+ */
+static inline phys_addr_t __virt_to_idmap(unsigned long x)
+{
+ if (arch_virt_to_idmap)
+ return arch_virt_to_idmap(x);
+ else
+ return __virt_to_phys(x);
+}
+
+#define virt_to_idmap(x) __virt_to_idmap((unsigned long)(x))
+
+/*
* Virtual <-> DMA view memory address translations
* Again, these are *only* valid on the kernel direct mapped RAM
* memory. Use of these is *deprecated* (and that doesn't mean
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 54733e5ef7a1..c5d94b31bde4 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -126,6 +126,8 @@
#define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */
#define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
+#define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
+
/*
* Hyp-mode PL2 PTE definitions for LPAE.
*/
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 76a52aed14e8..b1b7a4907489 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -100,7 +100,7 @@ extern pgprot_t pgprot_s2_device;
#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP)
#define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
#define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
-#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_USER | L_PTE_S2_RDONLY)
+#define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDWR)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN | L_PTE_NONE)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h
index 18d169373612..1a292d8be988 100644
--- a/arch/arm/include/asm/smp_scu.h
+++ b/arch/arm/include/asm/smp_scu.h
@@ -11,7 +11,7 @@
static inline bool scu_a9_has_base(void)
{
- return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
+ return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
}
static inline unsigned long scu_a9_get_base(void)
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c1ee007523d7..09ee408c1a67 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -20,10 +20,12 @@
#define __ARM_KVM_H__
#include <linux/types.h>
+#include <linux/psci.h>
#include <asm/ptrace.h>
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
@@ -63,7 +65,8 @@ struct kvm_regs {
/* Supported Processor Types */
#define KVM_ARM_TARGET_CORTEX_A15 0
-#define KVM_ARM_NUM_TARGETS 1
+#define KVM_ARM_TARGET_CORTEX_A7 1
+#define KVM_ARM_NUM_TARGETS 2
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
@@ -82,6 +85,7 @@ struct kvm_regs {
#define KVM_VGIC_V2_CPU_SIZE 0x2000
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */
struct kvm_vcpu_init {
__u32 target;
@@ -118,6 +122,26 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
#define KVM_REG_ARM_32_CRN_SHIFT 11
+#define ARM_CP15_REG_SHIFT_MASK(x,n) \
+ (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
+
+#define __ARM_CP15_REG(op1,crn,crm,op2) \
+ (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
+ ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
+ ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
+ ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
+ ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
+
+#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
+
+#define __ARM_CP15_REG64(op1,crm) \
+ (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
+#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
+
+#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
+#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
+
/* Normal registers are mapped as coprocessor 16. */
#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4)
@@ -142,6 +166,15 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM_VFP_FPINST 0x1009
#define KVM_REG_ARM_VFP_FPINST2 0x100A
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
+#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
+#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
+#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
@@ -172,9 +205,9 @@ struct kvm_arch_memory_slot {
#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
-#define KVM_PSCI_RET_SUCCESS 0
-#define KVM_PSCI_RET_NI ((unsigned long)-1)
-#define KVM_PSCI_RET_INVAL ((unsigned long)-2)
-#define KVM_PSCI_RET_DENIED ((unsigned long)-3)
+#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index ee68cce6b48e..776d9186e9c1 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -168,6 +168,7 @@ int main(void)
DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
+ DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr));
DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr));
DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar));
@@ -175,13 +176,13 @@ int main(void)
DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
#ifdef CONFIG_KVM_ARM_VGIC
DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
- DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr));
- DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr));
- DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr));
- DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr));
- DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr));
- DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr));
- DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr));
+ DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+ DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+ DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+ DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+ DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+ DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+ DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr));
#ifdef CONFIG_KVM_ARM_TIMER
DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 0b48a38e3cf4..e0665b871f5b 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -234,49 +234,39 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
static int probe_current_pmu(struct arm_pmu *pmu)
{
int cpu = get_cpu();
- unsigned long implementor = read_cpuid_implementor();
- unsigned long part_number = read_cpuid_part_number();
int ret = -ENODEV;
pr_info("probing PMU on CPU %d\n", cpu);
+ switch (read_cpuid_part()) {
/* ARM Ltd CPUs. */
- if (implementor == ARM_CPU_IMP_ARM) {
- switch (part_number) {
- case ARM_CPU_PART_ARM1136:
- case ARM_CPU_PART_ARM1156:
- case ARM_CPU_PART_ARM1176:
- ret = armv6pmu_init(pmu);
- break;
- case ARM_CPU_PART_ARM11MPCORE:
- ret = armv6mpcore_pmu_init(pmu);
- break;
- case ARM_CPU_PART_CORTEX_A8:
- ret = armv7_a8_pmu_init(pmu);
- break;
- case ARM_CPU_PART_CORTEX_A9:
- ret = armv7_a9_pmu_init(pmu);
- break;
- case ARM_CPU_PART_CORTEX_A5:
- ret = armv7_a5_pmu_init(pmu);
- break;
- case ARM_CPU_PART_CORTEX_A15:
- ret = armv7_a15_pmu_init(pmu);
- break;
- case ARM_CPU_PART_CORTEX_A7:
- ret = armv7_a7_pmu_init(pmu);
- break;
- }
- /* Intel CPUs [xscale]. */
- } else if (implementor == ARM_CPU_IMP_INTEL) {
- switch (xscale_cpu_arch_version()) {
- case ARM_CPU_XSCALE_ARCH_V1:
- ret = xscale1pmu_init(pmu);
- break;
- case ARM_CPU_XSCALE_ARCH_V2:
- ret = xscale2pmu_init(pmu);
- break;
+ case ARM_CPU_PART_ARM1136:
+ case ARM_CPU_PART_ARM1156:
+ case ARM_CPU_PART_ARM1176:
+ ret = armv6pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_ARM11MPCORE:
+ ret = armv6mpcore_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_CORTEX_A8:
+ ret = armv7_a8_pmu_init(pmu);
+ break;
+ case ARM_CPU_PART_CORTEX_A9:
+ ret = armv7_a9_pmu_init(pmu);
+ break;
+
+ default:
+ if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
+ switch (xscale_cpu_arch_version()) {
+ case ARM_CPU_XSCALE_ARCH_V1:
+ ret = xscale1pmu_init(pmu);
+ break;
+ case ARM_CPU_XSCALE_ARCH_V2:
+ ret = xscale2pmu_init(pmu);
+ break;
+ }
}
+ break;
}
/* assume PMU support all the CPUs in this case */
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index dc2843f337af..d6e3d1ca4ddf 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -91,8 +91,8 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
* its stack and the page tables.
*/
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
- secondary_data.pgdir = virt_to_phys(idmap_pgd);
- secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir);
+ secondary_data.pgdir = virt_to_idmap(idmap_pgd);
+ secondary_data.swapper_pg_dir = virt_to_idmap(swapper_pg_dir);
__cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data));
outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1));
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 370e1a8af6ac..466bd299b1a8 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
bool "Kernel-based Virtual Machine (KVM) support"
select PREEMPT_NOTIFIERS
select ANON_INODES
+ select HAVE_KVM_CPU_RELAX_INTERCEPT
select KVM_MMIO
select KVM_ARM_HOST
depends on ARM_VIRT_EXT && ARM_LPAE
@@ -41,9 +42,9 @@ config KVM_ARM_HOST
Provides host support for ARM processors.
config KVM_ARM_MAX_VCPUS
- int "Number maximum supported virtual CPUs per VM" if KVM_ARM_HOST
- default 4 if KVM_ARM_HOST
- default 0
+ int "Number maximum supported virtual CPUs per VM"
+ depends on KVM_ARM_HOST
+ default 4
help
Static number of max supported virtual CPUs per VM.
@@ -67,6 +68,4 @@ config KVM_ARM_TIMER
---help---
Adds support for the Architected Timers in virtual machines
-source drivers/virtio/Kconfig
-
endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 53c5ed83d16f..f7057ed045b6 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -14,10 +14,12 @@ CFLAGS_mmu.o := -I.
AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
-kvm-arm-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+KVM := ../../../virt/kvm
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
obj-y += kvm-arm.o init.o interrupts.o
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
-obj-y += coproc.o coproc_a15.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += vgic.o
-obj-$(CONFIG_KVM_ARM_TIMER) += arch_timer.o
+obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 1d55afe7fd4b..d0c8ee654bbf 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -82,12 +82,12 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
/**
* kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
*/
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
{
return &kvm_arm_running_vcpu;
}
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
return 0;
}
@@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
int kvm_arch_hardware_setup(void)
{
return 0;
}
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
void kvm_arch_check_processor_compat(void *rtn)
{
*(int *)rtn = 0;
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
/**
* kvm_arch_init_vm - initializes a VM data structure
@@ -138,6 +127,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out_free_stage2_pgd;
+ kvm_timer_init(kvm);
+
/* Mark the initial VMID generation invalid */
kvm->arch.vmid_gen = 0;
@@ -153,15 +144,6 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
-{
- return 0;
-}
/**
* kvm_arch_destroy_vm - destroy the VM data structure
@@ -179,20 +161,25 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm->vcpus[i] = NULL;
}
}
+
+ kvm_vgic_destroy(kvm);
}
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
r = vgic_present;
break;
+ case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
case KVM_CAP_ONE_REG:
case KVM_CAP_ARM_PSCI:
+ case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_READONLY_MEM:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -220,29 +207,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
- enum kvm_mr_change change)
-{
- return 0;
-}
-
-void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
- enum kvm_mr_change change)
-{
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
-}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
@@ -281,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
kvm_mmu_free_memory_caches(vcpu);
kvm_timer_vcpu_terminate(vcpu);
+ kvm_vgic_vcpu_destroy(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@@ -296,26 +261,15 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
- int ret;
-
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu->arch.target = -1;
- /* Set up VGIC */
- ret = kvm_vgic_vcpu_init(vcpu);
- if (ret)
- return ret;
-
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
return 0;
}
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
vcpu->cpu = cpu;
@@ -335,6 +289,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ /*
+ * The arch-generic KVM code expects the cpu field of a vcpu to be -1
+ * if the vcpu is no longer assigned to a cpu. This is used for the
+ * optimized make_all_cpus_request path.
+ */
+ vcpu->cpu = -1;
+
kvm_arm_set_running_vcpu(NULL);
}
@@ -449,15 +410,17 @@ static void update_vttbr(struct kvm *kvm)
/* update vttbr to be used with the new vmid */
pgd_phys = virt_to_phys(kvm->arch.pgd);
+ BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
- kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
- kvm->arch.vttbr |= vmid;
+ kvm->arch.vttbr = pgd_phys | vmid;
spin_unlock(&kvm_vmid_lock);
}
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
{
+ int ret;
+
if (likely(vcpu->arch.has_run_once))
return 0;
@@ -467,22 +430,12 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
* Initialize the VGIC before running a vcpu the first time on
* this VM.
*/
- if (irqchip_in_kernel(vcpu->kvm) &&
- unlikely(!vgic_initialized(vcpu->kvm))) {
- int ret = kvm_vgic_init(vcpu->kvm);
+ if (unlikely(!vgic_initialized(vcpu->kvm))) {
+ ret = kvm_vgic_init(vcpu->kvm);
if (ret)
return ret;
}
- /*
- * Handle the "start in power-off" case by calling into the
- * PSCI code.
- */
- if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
- *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
- kvm_psci_call(vcpu);
- }
-
return 0;
}
@@ -696,6 +649,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
return -EINVAL;
}
+static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_init *init)
+{
+ int ret;
+
+ ret = kvm_vcpu_set_target(vcpu, init);
+ if (ret)
+ return ret;
+
+ /*
+ * Handle the "start in power-off" case by marking the VCPU as paused.
+ */
+ if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+ vcpu->arch.pause = true;
+
+ return 0;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -709,8 +680,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&init, argp, sizeof(init)))
return -EFAULT;
- return kvm_vcpu_set_target(vcpu, &init);
-
+ return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
}
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
@@ -768,7 +738,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
case KVM_ARM_DEVICE_VGIC_V2:
if (!vgic_present)
return -ENXIO;
- return kvm_vgic_set_addr(kvm, type, dev_addr->addr);
+ return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
default:
return -ENODEV;
}
@@ -794,6 +764,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
return -EFAULT;
return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
}
+ case KVM_ARM_PREFERRED_TARGET: {
+ int err;
+ struct kvm_vcpu_init init;
+
+ err = kvm_vcpu_preferred_target(&init);
+ if (err)
+ return err;
+
+ if (copy_to_user(argp, &init, sizeof(init)))
+ return -EFAULT;
+
+ return 0;
+ }
default:
return -EINVAL;
}
@@ -801,8 +784,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
static void cpu_init_hyp_mode(void *dummy)
{
- unsigned long long boot_pgd_ptr;
- unsigned long long pgd_ptr;
+ phys_addr_t boot_pgd_ptr;
+ phys_addr_t pgd_ptr;
unsigned long hyp_stack_ptr;
unsigned long stack_page;
unsigned long vector_ptr;
@@ -810,8 +793,8 @@ static void cpu_init_hyp_mode(void *dummy)
/* Switch from the HYP stub to our own HYP init vector */
__hyp_set_vectors(kvm_get_idmap_vector());
- boot_pgd_ptr = (unsigned long long)kvm_mmu_get_boot_httbr();
- pgd_ptr = (unsigned long long)kvm_mmu_get_httbr();
+ boot_pgd_ptr = kvm_mmu_get_boot_httbr();
+ pgd_ptr = kvm_mmu_get_httbr();
stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
hyp_stack_ptr = stack_page + PAGE_SIZE;
vector_ptr = (unsigned long)__kvm_hyp_vector;
@@ -825,7 +808,8 @@ static int hyp_init_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_STARTING:
case CPU_STARTING_FROZEN:
- cpu_init_hyp_mode(NULL);
+ if (__hyp_get_vectors() == hyp_default_vectors)
+ cpu_init_hyp_mode(NULL);
break;
}
@@ -841,7 +825,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
unsigned long cmd,
void *v)
{
- if (cmd == CPU_PM_EXIT) {
+ if (cmd == CPU_PM_EXIT &&
+ __hyp_get_vectors() == hyp_default_vectors) {
cpu_init_hyp_mode(NULL);
return NOTIFY_OK;
}
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index db9cf692d4dd..7928dbdf2102 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -23,6 +23,7 @@
#include <asm/kvm_host.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
#include <asm/cacheflush.h>
#include <asm/cputype.h>
#include <trace/events/kvm.h>
@@ -43,6 +44,31 @@ static u32 cache_levels;
/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
#define CSSELR_MAX 12
+/*
+ * kvm_vcpu_arch.cp15 holds cp15 registers as an array of u32, but some
+ * of cp15 registers can be viewed either as couple of two u32 registers
+ * or one u64 register. Current u64 register encoding is that least
+ * significant u32 word is followed by most significant u32 word.
+ */
+static inline void vcpu_cp15_reg64_set(struct kvm_vcpu *vcpu,
+ const struct coproc_reg *r,
+ u64 val)
+{
+ vcpu->arch.cp15[r->reg] = val & 0xffffffff;
+ vcpu->arch.cp15[r->reg + 1] = val >> 32;
+}
+
+static inline u64 vcpu_cp15_reg64_get(struct kvm_vcpu *vcpu,
+ const struct coproc_reg *r)
+{
+ u64 val;
+
+ val = vcpu->arch.cp15[r->reg + 1];
+ val = val << 32;
+ val = val | vcpu->arch.cp15[r->reg];
+ return val;
+}
+
int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
kvm_inject_undefined(vcpu);
@@ -71,6 +97,98 @@ int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+ /*
+ * Compute guest MPIDR. We build a virtual cluster out of the
+ * vcpu_id, but we read the 'U' bit from the underlying
+ * hardware directly.
+ */
+ vcpu->arch.cp15[c0_MPIDR] = ((read_cpuid_mpidr() & MPIDR_SMP_BITMASK) |
+ ((vcpu->vcpu_id >> 2) << MPIDR_LEVEL_BITS) |
+ (vcpu->vcpu_id & 3));
+}
+
+/* TRM entries A7:4.3.31 A15:4.3.28 - RO WI */
+static bool access_actlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
+ return true;
+}
+
+/* TRM entries A7:4.3.56, A15:4.3.60 - R/O. */
+static bool access_cbar(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return write_to_read_only(vcpu, p);
+ return read_zero(vcpu, p);
+}
+
+/* TRM entries A7:4.3.49, A15:4.3.48 - R/O WI */
+static bool access_l2ctlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
+ return true;
+}
+
+static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+ u32 l2ctlr, ncores;
+
+ asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+ l2ctlr &= ~(3 << 24);
+ ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+ /* How many cores in the current cluster and the next ones */
+ ncores -= (vcpu->vcpu_id & ~3);
+ /* Cap it to the maximum number of cores in a single cluster */
+ ncores = min(ncores, 3U);
+ l2ctlr |= (ncores & 3) << 24;
+
+ vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
+{
+ u32 actlr;
+
+ /* ACTLR contains SMP bit: make sure you create all cpus first! */
+ asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+ /* Make the SMP bit consistent with the guest configuration */
+ if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+ actlr |= 1U << 6;
+ else
+ actlr &= ~(1U << 6);
+
+ vcpu->arch.cp15[c1_ACTLR] = actlr;
+}
+
+/*
+ * TRM entries: A7:4.3.50, A15:4.3.49
+ * R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored).
+ */
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ *vcpu_reg(vcpu, p->Rt1) = 0;
+ return true;
+}
+
/* See note at ARM ARM B1.14.4 */
static bool access_dcsw(struct kvm_vcpu *vcpu,
const struct coproc_params *p,
@@ -113,6 +231,44 @@ done:
}
/*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ BUG_ON(!p->is_write);
+
+ vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
+ if (p->is_64bit)
+ vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
+
+ return true;
+}
+
+/*
+ * SCTLR accessor. Only called as long as HCR_TVM is set. If the
+ * guest enables the MMU, we stop trapping the VM sys_regs and leave
+ * it in complete control of the caches.
+ *
+ * Used by the cpu-specific code.
+ */
+bool access_sctlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r)
+{
+ access_vm_reg(vcpu, p, r);
+
+ if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */
+ vcpu->arch.hcr &= ~HCR_TVM;
+ stage2_flush_vm(vcpu->kvm);
+ }
+
+ return true;
+}
+
+/*
* We could trap ID_DFR0 and tell the guest we don't support performance
* monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
* NAKed, so it will read the PMCR anyway.
@@ -153,37 +309,52 @@ static bool pm_fake(struct kvm_vcpu *vcpu,
* registers preceding 32-bit ones.
*/
static const struct coproc_reg cp15_regs[] = {
+ /* MPIDR: we use VMPIDR for guest access. */
+ { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
+ NULL, reset_mpidr, c0_MPIDR },
+
/* CSSELR: swapped by interrupt.S. */
{ CRn( 0), CRm( 0), Op1( 2), Op2( 0), is32,
NULL, reset_unknown, c0_CSSELR },
- /* TTBR0/TTBR1: swapped by interrupt.S. */
- { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
- { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
+ /* ACTLR: trapped by HCR.TAC bit. */
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
+ access_actlr, reset_actlr, c1_ACTLR },
- /* TTBCR: swapped by interrupt.S. */
+ /* CPACR: swapped by interrupt.S. */
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
+ NULL, reset_val, c1_CPACR, 0x00000000 },
+
+ /* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */
+ { CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 },
+ { CRn(2), CRm( 0), Op1( 0), Op2( 0), is32,
+ access_vm_reg, reset_unknown, c2_TTBR0 },
+ { CRn(2), CRm( 0), Op1( 0), Op2( 1), is32,
+ access_vm_reg, reset_unknown, c2_TTBR1 },
{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
- NULL, reset_val, c2_TTBCR, 0x00000000 },
+ access_vm_reg, reset_val, c2_TTBCR, 0x00000000 },
+ { CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 },
+
/* DACR: swapped by interrupt.S. */
{ CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32,
- NULL, reset_unknown, c3_DACR },
+ access_vm_reg, reset_unknown, c3_DACR },
/* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */
{ CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32,
- NULL, reset_unknown, c5_DFSR },
+ access_vm_reg, reset_unknown, c5_DFSR },
{ CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32,
- NULL, reset_unknown, c5_IFSR },
+ access_vm_reg, reset_unknown, c5_IFSR },
{ CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32,
- NULL, reset_unknown, c5_ADFSR },
+ access_vm_reg, reset_unknown, c5_ADFSR },
{ CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32,
- NULL, reset_unknown, c5_AIFSR },
+ access_vm_reg, reset_unknown, c5_AIFSR },
/* DFAR/IFAR: swapped by interrupt.S. */
{ CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32,
- NULL, reset_unknown, c6_DFAR },
+ access_vm_reg, reset_unknown, c6_DFAR },
{ CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
- NULL, reset_unknown, c6_IFAR },
+ access_vm_reg, reset_unknown, c6_IFAR },
/* PAR swapped by interrupt.S */
{ CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
@@ -195,6 +366,13 @@ static const struct coproc_reg cp15_regs[] = {
{ CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, access_dcsw},
{ CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, access_dcsw},
/*
+ * L2CTLR access (guest wants to know #CPUs).
+ */
+ { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
+ access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
+ { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
+
+ /*
* Dummy performance monitor implementation.
*/
{ CRn( 9), CRm(12), Op1( 0), Op2( 0), is32, access_pmcr},
@@ -213,9 +391,15 @@ static const struct coproc_reg cp15_regs[] = {
/* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */
{ CRn(10), CRm( 2), Op1( 0), Op2( 0), is32,
- NULL, reset_unknown, c10_PRRR},
+ access_vm_reg, reset_unknown, c10_PRRR},
{ CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
- NULL, reset_unknown, c10_NMRR},
+ access_vm_reg, reset_unknown, c10_NMRR},
+
+ /* AMAIR0/AMAIR1: swapped by interrupt.S. */
+ { CRn(10), CRm( 3), Op1( 0), Op2( 0), is32,
+ access_vm_reg, reset_unknown, c10_AMAIR0},
+ { CRn(10), CRm( 3), Op1( 0), Op2( 1), is32,
+ access_vm_reg, reset_unknown, c10_AMAIR1},
/* VBAR: swapped by interrupt.S. */
{ CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
@@ -223,7 +407,7 @@ static const struct coproc_reg cp15_regs[] = {
/* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */
{ CRn(13), CRm( 0), Op1( 0), Op2( 1), is32,
- NULL, reset_val, c13_CID, 0x00000000 },
+ access_vm_reg, reset_val, c13_CID, 0x00000000 },
{ CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,
NULL, reset_unknown, c13_TID_URW },
{ CRn(13), CRm( 0), Op1( 0), Op2( 3), is32,
@@ -234,6 +418,9 @@ static const struct coproc_reg cp15_regs[] = {
/* CNTKCTL: swapped by interrupt.S. */
{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
NULL, reset_val, c14_CNTKCTL, 0x00000000 },
+
+ /* The Configuration Base Address Register. */
+ { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
};
/* Target specific emulation tables */
@@ -241,6 +428,12 @@ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS];
void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table)
{
+ unsigned int i;
+
+ for (i = 1; i < table->num; i++)
+ BUG_ON(cmp_reg(&table->table[i-1],
+ &table->table[i]) >= 0);
+
target_tables[table->target] = table;
}
@@ -323,7 +516,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct coproc_params params;
- params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
+ params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
params.is_64bit = true;
@@ -331,7 +524,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
params.Op2 = 0;
params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
- params.CRn = 0;
+ params.CRm = 0;
return emulate_cp15(vcpu, &params);
}
@@ -514,17 +707,23 @@ static struct coproc_reg invariant_cp15[] = {
{ CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR },
};
+/*
+ * Reads a register value from a userspace address to a kernel
+ * variable. Make sure that register size matches sizeof(*__val).
+ */
static int reg_from_user(void *val, const void __user *uaddr, u64 id)
{
- /* This Just Works because we are little endian. */
if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
return -EFAULT;
return 0;
}
+/*
+ * Writes a register value to a userspace address from a kernel variable.
+ * Make sure that register size matches sizeof(*__val).
+ */
static int reg_to_user(void __user *uaddr, const void *val, u64 id)
{
- /* This Just Works because we are little endian. */
if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
return -EFAULT;
return 0;
@@ -534,6 +733,7 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
{
struct coproc_params params;
const struct coproc_reg *r;
+ int ret;
if (!index_to_params(id, &params))
return -ENOENT;
@@ -542,7 +742,15 @@ static int get_invariant_cp15(u64 id, void __user *uaddr)
if (!r)
return -ENOENT;
- return reg_to_user(uaddr, &r->val, id);
+ ret = -ENOENT;
+ if (KVM_REG_SIZE(id) == 4) {
+ u32 val = r->val;
+
+ ret = reg_to_user(uaddr, &val, id);
+ } else if (KVM_REG_SIZE(id) == 8) {
+ ret = reg_to_user(uaddr, &r->val, id);
+ }
+ return ret;
}
static int set_invariant_cp15(u64 id, void __user *uaddr)
@@ -550,7 +758,7 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
struct coproc_params params;
const struct coproc_reg *r;
int err;
- u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+ u64 val;
if (!index_to_params(id, &params))
return -ENOENT;
@@ -558,7 +766,16 @@ static int set_invariant_cp15(u64 id, void __user *uaddr)
if (!r)
return -ENOENT;
- err = reg_from_user(&val, uaddr, id);
+ err = -ENOENT;
+ if (KVM_REG_SIZE(id) == 4) {
+ u32 val32;
+
+ err = reg_from_user(&val32, uaddr, id);
+ if (!err)
+ val = val32;
+ } else if (KVM_REG_SIZE(id) == 8) {
+ err = reg_from_user(&val, uaddr, id);
+ }
if (err)
return err;
@@ -574,7 +791,7 @@ static bool is_valid_cache(u32 val)
u32 level, ctype;
if (val >= CSSELR_MAX)
- return -ENOENT;
+ return false;
/* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
level = (val >> 1);
@@ -836,6 +1053,7 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
const struct coproc_reg *r;
void __user *uaddr = (void __user *)(long)reg->addr;
+ int ret;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
return demux_c15_get(reg->id, uaddr);
@@ -847,14 +1065,24 @@ int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (!r)
return get_invariant_cp15(reg->id, uaddr);
- /* Note: copies two regs if size is 64 bit. */
- return reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+ ret = -ENOENT;
+ if (KVM_REG_SIZE(reg->id) == 8) {
+ u64 val;
+
+ val = vcpu_cp15_reg64_get(vcpu, r);
+ ret = reg_to_user(uaddr, &val, reg->id);
+ } else if (KVM_REG_SIZE(reg->id) == 4) {
+ ret = reg_to_user(uaddr, &vcpu->arch.cp15[r->reg], reg->id);
+ }
+
+ return ret;
}
int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
const struct coproc_reg *r;
void __user *uaddr = (void __user *)(long)reg->addr;
+ int ret;
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
return demux_c15_set(reg->id, uaddr);
@@ -866,8 +1094,18 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (!r)
return set_invariant_cp15(reg->id, uaddr);
- /* Note: copies two regs if size is 64 bit */
- return reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+ ret = -ENOENT;
+ if (KVM_REG_SIZE(reg->id) == 8) {
+ u64 val;
+
+ ret = reg_from_user(&val, uaddr, reg->id);
+ if (!ret)
+ vcpu_cp15_reg64_set(vcpu, r, val);
+ } else if (KVM_REG_SIZE(reg->id) == 4) {
+ ret = reg_from_user(&vcpu->arch.cp15[r->reg], uaddr, reg->id);
+ }
+
+ return ret;
}
static unsigned int num_demux_regs(void)
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 0461d5c8d3de..1a44bbe39643 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p)
{
/* Look, we even formatted it for you to paste into the table! */
if (p->is_64bit) {
- kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n",
- p->CRm, p->Op1, p->is_write ? "write" : "read");
+ kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n",
+ p->CRn, p->Op1, p->is_write ? "write" : "read");
} else {
kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
" func_%s },\n",
@@ -135,13 +135,13 @@ static inline int cmp_reg(const struct coproc_reg *i1,
return -1;
if (i1->CRn != i2->CRn)
return i1->CRn - i2->CRn;
- if (i1->is_64 != i2->is_64)
- return i2->is_64 - i1->is_64;
if (i1->CRm != i2->CRm)
return i1->CRm - i2->CRm;
if (i1->Op1 != i2->Op1)
return i1->Op1 - i2->Op1;
- return i1->Op2 - i2->Op2;
+ if (i1->Op2 != i2->Op2)
+ return i1->Op2 - i2->Op2;
+ return i2->is_64 - i1->is_64;
}
@@ -153,4 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
#define is64 .is_64 = true
#define is32 .is_64 = false
+bool access_sctlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ const struct coproc_reg *r);
+
#endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index cf93472b9dd6..e6f4ae48bda9 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -17,101 +17,12 @@
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
-#include <asm/cputype.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_host.h>
-#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
#include <linux/init.h>
-static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- /*
- * Compute guest MPIDR:
- * (Even if we present only one VCPU to the guest on an SMP
- * host we don't set the U bit in the MPIDR, or vice versa, as
- * revealing the underlying hardware properties is likely to
- * be the best choice).
- */
- vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_LEVEL_MASK)
- | (vcpu->vcpu_id & MPIDR_LEVEL_MASK);
-}
-
#include "coproc.h"
-/* A15 TRM 4.3.28: RO WI */
-static bool access_actlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c1_ACTLR];
- return true;
-}
-
-/* A15 TRM 4.3.60: R/O. */
-static bool access_cbar(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return write_to_read_only(vcpu, p);
- return read_zero(vcpu, p);
-}
-
-/* A15 TRM 4.3.48: R/O WI. */
-static bool access_l2ctlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[c9_L2CTLR];
- return true;
-}
-
-static void reset_l2ctlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- u32 l2ctlr, ncores;
-
- asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
- l2ctlr &= ~(3 << 24);
- ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
- l2ctlr |= (ncores & 3) << 24;
-
- vcpu->arch.cp15[c9_L2CTLR] = l2ctlr;
-}
-
-static void reset_actlr(struct kvm_vcpu *vcpu, const struct coproc_reg *r)
-{
- u32 actlr;
-
- /* ACTLR contains SMP bit: make sure you create all cpus first! */
- asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
- /* Make the SMP bit consistent with the guest configuration */
- if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
- actlr |= 1U << 6;
- else
- actlr &= ~(1U << 6);
-
- vcpu->arch.cp15[c1_ACTLR] = actlr;
-}
-
-/* A15 TRM 4.3.49: R/O WI (even if NSACR.NS_L2ERR, a write of 1 is ignored). */
-static bool access_l2ectlr(struct kvm_vcpu *vcpu,
- const struct coproc_params *p,
- const struct coproc_reg *r)
-{
- if (p->is_write)
- return ignore_write(vcpu, p);
-
- *vcpu_reg(vcpu, p->Rt1) = 0;
- return true;
-}
-
/*
* A15-specific CP15 registers.
* CRn denotes the primary register number, but is copied to the CRm in the
@@ -121,29 +32,9 @@ static bool access_l2ectlr(struct kvm_vcpu *vcpu,
* registers preceding 32-bit ones.
*/
static const struct coproc_reg a15_regs[] = {
- /* MPIDR: we use VMPIDR for guest access. */
- { CRn( 0), CRm( 0), Op1( 0), Op2( 5), is32,
- NULL, reset_mpidr, c0_MPIDR },
-
/* SCTLR: swapped by interrupt.S. */
{ CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
- NULL, reset_val, c1_SCTLR, 0x00C50078 },
- /* ACTLR: trapped by HCR.TAC bit. */
- { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32,
- access_actlr, reset_actlr, c1_ACTLR },
- /* CPACR: swapped by interrupt.S. */
- { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
- NULL, reset_val, c1_CPACR, 0x00000000 },
-
- /*
- * L2CTLR access (guest wants to know #CPUs).
- */
- { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32,
- access_l2ctlr, reset_l2ctlr, c9_L2CTLR },
- { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, access_l2ectlr},
-
- /* The Configuration Base Address Register. */
- { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
+ access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
};
static struct kvm_coproc_target_table a15_target_table = {
@@ -154,12 +45,6 @@ static struct kvm_coproc_target_table a15_target_table = {
static int __init coproc_a15_init(void)
{
- unsigned int i;
-
- for (i = 1; i < ARRAY_SIZE(a15_regs); i++)
- BUG_ON(cmp_reg(&a15_regs[i-1],
- &a15_regs[i]) >= 0);
-
kvm_register_target_coproc_table(&a15_target_table);
return 0;
}
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
new file mode 100644
index 000000000000..17fc7cd479d3
--- /dev/null
+++ b/arch/arm/kvm/coproc_a7.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Copyright (C) 2013 - ARM Ltd
+ *
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ * Christoffer Dall <c.dall@virtualopensystems.com>
+ * Jonathan Austin <jonathan.austin@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include <linux/kvm_host.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_emulate.h>
+#include <linux/init.h>
+
+#include "coproc.h"
+
+/*
+ * Cortex-A7 specific CP15 registers.
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ * registers preceding 32-bit ones.
+ */
+static const struct coproc_reg a7_regs[] = {
+ /* SCTLR: swapped by interrupt.S. */
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
+ access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
+};
+
+static struct kvm_coproc_target_table a7_target_table = {
+ .target = KVM_ARM_TARGET_CORTEX_A7,
+ .table = a7_regs,
+ .num = ARRAY_SIZE(a7_regs),
+};
+
+static int __init coproc_a7_init(void)
+{
+ kvm_register_target_coproc_table(&a7_target_table);
+ return 0;
+}
+late_initcall(coproc_a7_init);
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index bdede9e7da51..d6c005283678 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -354,7 +354,7 @@ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
if (is_pabt) {
- /* Set DFAR and DFSR */
+ /* Set IFAR and IFSR */
vcpu->arch.cp15[c6_IFAR] = addr;
is_lpae = (vcpu->arch.cp15[c2_TTBCR] >> 31);
/* Always give debug fault for now - should give guest a clue */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 152d03612181..cc0b78769bd8 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.hcr = HCR_GUEST_MASK;
return 0;
}
@@ -109,6 +110,73 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return -EINVAL;
}
+#ifndef CONFIG_KVM_ARM_TIMER
+
+#define NUM_TIMER_REGS 0
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ return 0;
+}
+
+static bool is_timer_reg(u64 index)
+{
+ return false;
+}
+
+#else
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+ switch (index) {
+ case KVM_REG_ARM_TIMER_CTL:
+ case KVM_REG_ARM_TIMER_CNT:
+ case KVM_REG_ARM_TIMER_CVAL:
+ return true;
+ }
+ return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+ return -EFAULT;
+ uindices++;
+ if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+ return -EFAULT;
+ uindices++;
+ if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+ return -EFAULT;
+
+ return 0;
+}
+
+#endif
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+ int ret;
+
+ ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+ if (ret != 0)
+ return -EFAULT;
+
+ return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+
+ val = kvm_arm_timer_get_reg(vcpu, reg->id);
+ return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
static unsigned long num_core_regs(void)
{
return sizeof(struct kvm_regs) / sizeof(u32);
@@ -121,7 +189,8 @@ static unsigned long num_core_regs(void)
*/
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
{
- return num_core_regs() + kvm_arm_num_coproc_regs(vcpu);
+ return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
+ + NUM_TIMER_REGS;
}
/**
@@ -133,6 +202,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
unsigned int i;
const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
+ int ret;
for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
if (put_user(core_reg | i, uindices))
@@ -140,6 +210,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
uindices++;
}
+ ret = copy_timer_indices(vcpu, uindices);
+ if (ret)
+ return ret;
+ uindices += NUM_TIMER_REGS;
+
return kvm_arm_copy_coproc_indices(vcpu, uindices);
}
@@ -153,6 +228,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
return get_core_reg(vcpu, reg);
+ if (is_timer_reg(reg->id))
+ return get_timer_reg(vcpu, reg);
+
return kvm_arm_coproc_get_reg(vcpu, reg);
}
@@ -166,6 +244,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
return set_core_reg(vcpu, reg);
+ if (is_timer_reg(reg->id))
+ return set_timer_reg(vcpu, reg);
+
return kvm_arm_coproc_set_reg(vcpu, reg);
}
@@ -183,13 +264,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int __attribute_const__ kvm_target_cpu(void)
{
- unsigned long implementor = read_cpuid_implementor();
- unsigned long part_number = read_cpuid_part_number();
-
- if (implementor != ARM_CPU_IMP_ARM)
- return -EINVAL;
-
- switch (part_number) {
+ switch (read_cpuid_part()) {
+ case ARM_CPU_PART_CORTEX_A7:
+ return KVM_ARM_TARGET_CORTEX_A7;
case ARM_CPU_PART_CORTEX_A15:
return KVM_ARM_TARGET_CORTEX_A15;
default:
@@ -202,7 +279,7 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
{
unsigned int i;
- /* We can only do a cortex A15 for now. */
+ /* We can only cope with guest==host and only on A15/A7 (for now). */
if (init->target != kvm_target_cpu())
return -EINVAL;
@@ -222,6 +299,26 @@ int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
return kvm_reset_vcpu(vcpu);
}
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+ int target = kvm_target_cpu();
+
+ if (target < 0)
+ return -ENODEV;
+
+ memset(init, 0, sizeof(*init));
+
+ /*
+ * For now, we don't return any features.
+ * In future, we might use features to return target
+ * specific features available for the preferred
+ * target type.
+ */
+ init->target = (__u32)target;
+
+ return 0;
+}
+
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -EINVAL;
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 3d74a0be47db..4c979d466cc1 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -26,8 +26,6 @@
#include "trace.h"
-#include "trace.h"
-
typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
@@ -40,21 +38,22 @@ static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
+ int ret;
+
trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
kvm_vcpu_hvc_get_imm(vcpu));
- if (kvm_psci_call(vcpu))
+ ret = kvm_psci_call(vcpu);
+ if (ret < 0) {
+ kvm_inject_undefined(vcpu);
return 1;
+ }
- kvm_inject_undefined(vcpu);
- return 1;
+ return ret;
}
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- if (kvm_psci_call(vcpu))
- return 1;
-
kvm_inject_undefined(vcpu);
return 1;
}
@@ -76,23 +75,29 @@ static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
}
/**
- * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests
* @vcpu: the vcpu pointer
* @run: the kvm_run structure pointer
*
- * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
- * halt execution of world-switches and schedule other host processes until
- * there is an incoming IRQ or FIQ to the VM.
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
*/
-static int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
trace_kvm_wfi(*vcpu_pc(vcpu));
- kvm_vcpu_block(vcpu);
+ if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE)
+ kvm_vcpu_on_spin(vcpu);
+ else
+ kvm_vcpu_block(vcpu);
+
return 1;
}
static exit_handle_fn arm_exit_handlers[] = {
- [HSR_EC_WFI] = kvm_handle_wfi,
+ [HSR_EC_WFI] = kvm_handle_wfx,
[HSR_EC_CP15_32] = kvm_handle_cp15_32,
[HSR_EC_CP15_64] = kvm_handle_cp15_64,
[HSR_EC_CP14_MR] = kvm_handle_cp14_access,
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
index f048338135f7..2cc14dfad049 100644
--- a/arch/arm/kvm/init.S
+++ b/arch/arm/kvm/init.S
@@ -71,7 +71,7 @@ __do_hyp_init:
bne phase2 @ Yes, second stage init
@ Set the HTTBR to point to the hypervisor PGD pointer passed
- mcrr p15, 4, r2, r3, c2
+ mcrr p15, 4, rr_lo_hi(r2, r3), c2
@ Set the HTCR and VTCR to the same shareability and cacheability
@ settings as the non-secure TTBCR and with T0SZ == 0.
@@ -137,12 +137,12 @@ phase2:
mov pc, r0
target: @ We're now in the trampoline code, switch page tables
- mcrr p15, 4, r2, r3, c2
+ mcrr p15, 4, rr_lo_hi(r2, r3), c2
isb
@ Invalidate the old TLBs
mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH
- dsb
+ dsb ish
eret
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 16cd4ba5d7fd..01dcb0e752d9 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -52,10 +52,10 @@ ENTRY(__kvm_tlb_flush_vmid_ipa)
dsb ishst
add r0, r0, #KVM_VTTBR
ldrd r2, r3, [r0]
- mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+ mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
isb
mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored)
- dsb
+ dsb ish
isb
mov r2, #0
mov r3, #0
@@ -79,7 +79,7 @@ ENTRY(__kvm_flush_vm_context)
mcr p15, 4, r0, c8, c3, 4
/* Invalidate instruction caches Inner Shareable (ICIALLUIS) */
mcr p15, 0, r0, c7, c1, 0
- dsb
+ dsb ish
isb @ Not necessary if followed by eret
bx lr
@@ -135,7 +135,7 @@ ENTRY(__kvm_vcpu_run)
ldr r1, [vcpu, #VCPU_KVM]
add r1, r1, #KVM_VTTBR
ldrd r2, r3, [r1]
- mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+ mcrr p15, 6, rr_lo_hi(r2, r3), c2 @ Write VTTBR
@ We're all done, just restore the GPRs and go to the guest
restore_guest_regs
@@ -199,8 +199,13 @@ after_vfp_restore:
restore_host_regs
clrex @ Clear exclusive monitor
+#ifndef CONFIG_CPU_ENDIAN_BE8
mov r0, r1 @ Return the return code
mov r1, #0 @ Clear upper bits in return value
+#else
+ @ r1 already has return code
+ mov r0, #0 @ Clear upper bits in return value
+#endif /* CONFIG_CPU_ENDIAN_BE8 */
bx lr @ return to IOCTL
/********************************************************************
@@ -220,6 +225,10 @@ after_vfp_restore:
* in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are
* passed in r0 and r1.
*
+ * A function pointer with a value of 0xffffffff has a special meaning,
+ * and is used to implement __hyp_get_vectors in the same way as in
+ * arch/arm/kernel/hyp_stub.S.
+ *
* The calling convention follows the standard AAPCS:
* r0 - r3: caller save
* r12: caller save
@@ -363,6 +372,11 @@ hyp_hvc:
host_switch_to_hyp:
pop {r0, r1, r2}
+ /* Check for __hyp_get_vectors */
+ cmp r0, #-1
+ mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR
+ beq 1f
+
push {lr}
mrs lr, SPSR
push {lr}
@@ -378,7 +392,7 @@ THUMB( orr lr, #1)
pop {lr}
msr SPSR_csxf, lr
pop {lr}
- eret
+1: eret
guest_trap:
load_vcpu @ Load VCPU pointer to r0
@@ -492,10 +506,10 @@ __kvm_hyp_code_end:
.section ".rodata"
und_die_str:
- .ascii "unexpected undefined exception in Hyp mode at: %#08x"
+ .ascii "unexpected undefined exception in Hyp mode at: %#08x\n"
pabt_die_str:
- .ascii "unexpected prefetch abort in Hyp mode at: %#08x"
+ .ascii "unexpected prefetch abort in Hyp mode at: %#08x\n"
dabt_die_str:
- .ascii "unexpected data abort in Hyp mode at: %#08x"
+ .ascii "unexpected data abort in Hyp mode at: %#08x\n"
svc_die_str:
- .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x"
+ .ascii "unexpected HVC/SVC trap in Hyp mode at: %#08x\n"
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 6f18695a09cb..98c8c5b9a87f 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -1,4 +1,5 @@
#include <linux/irqchip/arm-gic.h>
+#include <asm/assembler.h>
#define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4))
#define VCPU_USR_SP (VCPU_USR_REG(13))
@@ -303,13 +304,17 @@ vcpu .req r0 @ vcpu pointer always in r0
mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL
mrrc p15, 0, r4, r5, c7 @ PAR
+ mrc p15, 0, r6, c10, c3, 0 @ AMAIR0
+ mrc p15, 0, r7, c10, c3, 1 @ AMAIR1
.if \store_to_vcpu == 0
- push {r2,r4-r5}
+ push {r2,r4-r7}
.else
str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
add r12, vcpu, #CP15_OFFSET(c7_PAR)
strd r4, r5, [r12]
+ str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
+ str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
.endif
.endm
@@ -322,15 +327,19 @@ vcpu .req r0 @ vcpu pointer always in r0
*/
.macro write_cp15_state read_from_vcpu
.if \read_from_vcpu == 0
- pop {r2,r4-r5}
+ pop {r2,r4-r7}
.else
ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
add r12, vcpu, #CP15_OFFSET(c7_PAR)
ldrd r4, r5, [r12]
+ ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
+ ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
.endif
mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL
mcrr p15, 0, r4, r5, c7 @ PAR
+ mcr p15, 0, r6, c10, c3, 0 @ AMAIR0
+ mcr p15, 0, r7, c10, c3, 1 @ AMAIR1
.if \read_from_vcpu == 0
pop {r2-r12}
@@ -412,15 +421,23 @@ vcpu .req r0 @ vcpu pointer always in r0
ldr r8, [r2, #GICH_ELRSR0]
ldr r9, [r2, #GICH_ELRSR1]
ldr r10, [r2, #GICH_APR]
-
- str r3, [r11, #VGIC_CPU_HCR]
- str r4, [r11, #VGIC_CPU_VMCR]
- str r5, [r11, #VGIC_CPU_MISR]
- str r6, [r11, #VGIC_CPU_EISR]
- str r7, [r11, #(VGIC_CPU_EISR + 4)]
- str r8, [r11, #VGIC_CPU_ELRSR]
- str r9, [r11, #(VGIC_CPU_ELRSR + 4)]
- str r10, [r11, #VGIC_CPU_APR]
+ARM_BE8(rev r3, r3 )
+ARM_BE8(rev r4, r4 )
+ARM_BE8(rev r5, r5 )
+ARM_BE8(rev r6, r6 )
+ARM_BE8(rev r7, r7 )
+ARM_BE8(rev r8, r8 )
+ARM_BE8(rev r9, r9 )
+ARM_BE8(rev r10, r10 )
+
+ str r3, [r11, #VGIC_V2_CPU_HCR]
+ str r4, [r11, #VGIC_V2_CPU_VMCR]
+ str r5, [r11, #VGIC_V2_CPU_MISR]
+ str r6, [r11, #VGIC_V2_CPU_EISR]
+ str r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
+ str r8, [r11, #VGIC_V2_CPU_ELRSR]
+ str r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+ str r10, [r11, #VGIC_V2_CPU_APR]
/* Clear GICH_HCR */
mov r5, #0
@@ -428,9 +445,10 @@ vcpu .req r0 @ vcpu pointer always in r0
/* Save list registers */
add r2, r2, #GICH_LR0
- add r3, r11, #VGIC_CPU_LR
+ add r3, r11, #VGIC_V2_CPU_LR
ldr r4, [r11, #VGIC_CPU_NR_LR]
1: ldr r6, [r2], #4
+ARM_BE8(rev r6, r6 )
str r6, [r3], #4
subs r4, r4, #1
bne 1b
@@ -455,9 +473,12 @@ vcpu .req r0 @ vcpu pointer always in r0
add r11, vcpu, #VCPU_VGIC_CPU
/* We only restore a minimal set of registers */
- ldr r3, [r11, #VGIC_CPU_HCR]
- ldr r4, [r11, #VGIC_CPU_VMCR]
- ldr r8, [r11, #VGIC_CPU_APR]
+ ldr r3, [r11, #VGIC_V2_CPU_HCR]
+ ldr r4, [r11, #VGIC_V2_CPU_VMCR]
+ ldr r8, [r11, #VGIC_V2_CPU_APR]
+ARM_BE8(rev r3, r3 )
+ARM_BE8(rev r4, r4 )
+ARM_BE8(rev r8, r8 )
str r3, [r2, #GICH_HCR]
str r4, [r2, #GICH_VMCR]
@@ -465,9 +486,10 @@ vcpu .req r0 @ vcpu pointer always in r0
/* Restore list registers */
add r2, r2, #GICH_LR0
- add r3, r11, #VGIC_CPU_LR
+ add r3, r11, #VGIC_V2_CPU_LR
ldr r4, [r11, #VGIC_CPU_NR_LR]
1: ldr r6, [r3], #4
+ARM_BE8(rev r6, r6 )
str r6, [r2], #4
subs r4, r4, #1
bne 1b
@@ -498,7 +520,7 @@ vcpu .req r0 @ vcpu pointer always in r0
mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
isb
- mrrc p15, 3, r2, r3, c14 @ CNTV_CVAL
+ mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL
ldr r4, =VCPU_TIMER_CNTV_CVAL
add r5, vcpu, r4
strd r2, r3, [r5]
@@ -538,12 +560,12 @@ vcpu .req r0 @ vcpu pointer always in r0
ldr r2, [r4, #KVM_TIMER_CNTVOFF]
ldr r3, [r4, #(KVM_TIMER_CNTVOFF + 4)]
- mcrr p15, 4, r2, r3, c14 @ CNTVOFF
+ mcrr p15, 4, rr_lo_hi(r2, r3), c14 @ CNTVOFF
ldr r4, =VCPU_TIMER_CNTV_CVAL
add r5, vcpu, r4
ldrd r2, r3, [r5]
- mcrr p15, 3, r2, r3, c14 @ CNTV_CVAL
+ mcrr p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL
isb
ldr r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
@@ -597,17 +619,14 @@ vcpu .req r0 @ vcpu pointer always in r0
/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
.macro configure_hyp_role operation
- mrc p15, 4, r2, c1, c1, 0 @ HCR
- bic r2, r2, #HCR_VIRT_EXCP_MASK
- ldr r3, =HCR_GUEST_MASK
.if \operation == vmentry
- orr r2, r2, r3
+ ldr r2, [vcpu, #VCPU_HCR]
ldr r3, [vcpu, #VCPU_IRQ_LINES]
orr r2, r2, r3
.else
- bic r2, r2, r3
+ mov r2, #0
.endif
- mcr p15, 4, r2, c1, c1, 0
+ mcr p15, 4, r2, c1, c1, 0 @ HCR
.endm
.macro load_vcpu
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 72a12f2171b2..4cb5a93182e9 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -23,6 +23,68 @@
#include "trace.h"
+static void mmio_write_buf(char *buf, unsigned int len, unsigned long data)
+{
+ void *datap = NULL;
+ union {
+ u8 byte;
+ u16 hword;
+ u32 word;
+ u64 dword;
+ } tmp;
+
+ switch (len) {
+ case 1:
+ tmp.byte = data;
+ datap = &tmp.byte;
+ break;
+ case 2:
+ tmp.hword = data;
+ datap = &tmp.hword;
+ break;
+ case 4:
+ tmp.word = data;
+ datap = &tmp.word;
+ break;
+ case 8:
+ tmp.dword = data;
+ datap = &tmp.dword;
+ break;
+ }
+
+ memcpy(buf, datap, len);
+}
+
+static unsigned long mmio_read_buf(char *buf, unsigned int len)
+{
+ unsigned long data = 0;
+ union {
+ u16 hword;
+ u32 word;
+ u64 dword;
+ } tmp;
+
+ switch (len) {
+ case 1:
+ data = buf[0];
+ break;
+ case 2:
+ memcpy(&tmp.hword, buf, len);
+ data = tmp.hword;
+ break;
+ case 4:
+ memcpy(&tmp.word, buf, len);
+ data = tmp.word;
+ break;
+ case 8:
+ memcpy(&tmp.dword, buf, len);
+ data = tmp.dword;
+ break;
+ }
+
+ return data;
+}
+
/**
* kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
* @vcpu: The VCPU pointer
@@ -33,28 +95,27 @@
*/
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- unsigned long *dest;
+ unsigned long data;
unsigned int len;
int mask;
if (!run->mmio.is_write) {
- dest = vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt);
- *dest = 0;
-
len = run->mmio.len;
if (len > sizeof(unsigned long))
return -EINVAL;
- memcpy(dest, run->mmio.data, len);
-
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
- *((u64 *)run->mmio.data));
+ data = mmio_read_buf(run->mmio.data, len);
if (vcpu->arch.mmio_decode.sign_extend &&
len < sizeof(unsigned long)) {
mask = 1U << ((len * 8) - 1);
- *dest = (*dest ^ mask) - mask;
+ data = (data ^ mask) - mask;
}
+
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+ data);
+ data = vcpu_data_host_to_guest(vcpu, data, len);
+ *vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;
}
return 0;
@@ -63,7 +124,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_exit_mmio *mmio)
{
- unsigned long rt, len;
+ unsigned long rt;
+ int len;
bool is_write, sign_extend;
if (kvm_vcpu_dabt_isextabt(vcpu)) {
@@ -86,12 +148,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
sign_extend = kvm_vcpu_dabt_issext(vcpu);
rt = kvm_vcpu_dabt_get_rd(vcpu);
- if (kvm_vcpu_reg_is_pc(vcpu, rt)) {
- /* IO memory trying to read/write pc */
- kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- return 1;
- }
-
mmio->is_write = is_write;
mmio->phys_addr = fault_ipa;
mmio->len = len;
@@ -110,6 +166,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa)
{
struct kvm_exit_mmio mmio;
+ unsigned long data;
unsigned long rt;
int ret;
@@ -130,13 +187,15 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
}
rt = vcpu->arch.mmio_decode.rt;
+ data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
+
trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
KVM_TRACE_MMIO_READ_UNSATISFIED,
mmio.len, fault_ipa,
- (mmio.is_write) ? *vcpu_reg(vcpu, rt) : 0);
+ (mmio.is_write) ? data : 0);
if (mmio.is_write)
- memcpy(mmio.data, vcpu_reg(vcpu, rt), mmio.len);
+ mmio_write_buf(mmio.data, mmio.len, data);
if (vgic_handle_mmio(vcpu, run, &mmio))
return 1;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index e04613906f1b..eea03069161b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -19,6 +19,7 @@
#include <linux/mman.h>
#include <linux/kvm_host.h>
#include <linux/io.h>
+#include <linux/hugetlb.h>
#include <trace/events/kvm.h>
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
@@ -41,6 +42,10 @@ static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
+#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+
+#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
+
static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
/*
@@ -85,9 +90,19 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
return p;
}
+static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
+{
+ pud_t *pud_table __maybe_unused = pud_offset(pgd, 0);
+ pgd_clear(pgd);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ pud_free(NULL, pud_table);
+ put_page(virt_to_page(pgd));
+}
+
static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
{
pmd_t *pmd_table = pmd_offset(pud, 0);
+ VM_BUG_ON(pud_huge(*pud));
pud_clear(pud);
kvm_tlb_flush_vmid_ipa(kvm, addr);
pmd_free(NULL, pmd_table);
@@ -97,73 +112,186 @@ static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
{
pte_t *pte_table = pte_offset_kernel(pmd, 0);
+ VM_BUG_ON(kvm_pmd_huge(*pmd));
pmd_clear(pmd);
kvm_tlb_flush_vmid_ipa(kvm, addr);
pte_free_kernel(NULL, pte_table);
put_page(virt_to_page(pmd));
}
-static bool pmd_empty(pmd_t *pmd)
+static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
+ phys_addr_t addr, phys_addr_t end)
{
- struct page *pmd_page = virt_to_page(pmd);
- return page_count(pmd_page) == 1;
+ phys_addr_t start_addr = addr;
+ pte_t *pte, *start_pte;
+
+ start_pte = pte = pte_offset_kernel(pmd, addr);
+ do {
+ if (!pte_none(*pte)) {
+ kvm_set_pte(pte, __pte(0));
+ put_page(virt_to_page(pte));
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ }
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+
+ if (kvm_pte_table_empty(start_pte))
+ clear_pmd_entry(kvm, pmd, start_addr);
}
-static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
+static void unmap_pmds(struct kvm *kvm, pud_t *pud,
+ phys_addr_t addr, phys_addr_t end)
{
- if (pte_present(*pte)) {
- kvm_set_pte(pte, __pte(0));
- put_page(virt_to_page(pte));
- kvm_tlb_flush_vmid_ipa(kvm, addr);
- }
+ phys_addr_t next, start_addr = addr;
+ pmd_t *pmd, *start_pmd;
+
+ start_pmd = pmd = pmd_offset(pud, addr);
+ do {
+ next = kvm_pmd_addr_end(addr, end);
+ if (!pmd_none(*pmd)) {
+ if (kvm_pmd_huge(*pmd)) {
+ pmd_clear(pmd);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ put_page(virt_to_page(pmd));
+ } else {
+ unmap_ptes(kvm, pmd, addr, next);
+ }
+ }
+ } while (pmd++, addr = next, addr != end);
+
+ if (kvm_pmd_table_empty(start_pmd))
+ clear_pud_entry(kvm, pud, start_addr);
}
-static bool pte_empty(pte_t *pte)
+static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
+ phys_addr_t addr, phys_addr_t end)
{
- struct page *pte_page = virt_to_page(pte);
- return page_count(pte_page) == 1;
+ phys_addr_t next, start_addr = addr;
+ pud_t *pud, *start_pud;
+
+ start_pud = pud = pud_offset(pgd, addr);
+ do {
+ next = kvm_pud_addr_end(addr, end);
+ if (!pud_none(*pud)) {
+ if (pud_huge(*pud)) {
+ pud_clear(pud);
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ put_page(virt_to_page(pud));
+ } else {
+ unmap_pmds(kvm, pud, addr, next);
+ }
+ }
+ } while (pud++, addr = next, addr != end);
+
+ if (kvm_pud_table_empty(start_pud))
+ clear_pgd_entry(kvm, pgd, start_addr);
}
+
static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
- unsigned long long start, u64 size)
+ phys_addr_t start, u64 size)
{
pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
+ phys_addr_t addr = start, end = start + size;
+ phys_addr_t next;
+
+ pgd = pgdp + pgd_index(addr);
+ do {
+ next = kvm_pgd_addr_end(addr, end);
+ unmap_puds(kvm, pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
+static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
+ phys_addr_t addr, phys_addr_t end)
+{
pte_t *pte;
- unsigned long long addr = start, end = start + size;
- u64 range;
- while (addr < end) {
- pgd = pgdp + pgd_index(addr);
- pud = pud_offset(pgd, addr);
- if (pud_none(*pud)) {
- addr += PUD_SIZE;
- continue;
+ pte = pte_offset_kernel(pmd, addr);
+ do {
+ if (!pte_none(*pte)) {
+ hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+ kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
}
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+}
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- addr += PMD_SIZE;
- continue;
+static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
+ phys_addr_t addr, phys_addr_t end)
+{
+ pmd_t *pmd;
+ phys_addr_t next;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = kvm_pmd_addr_end(addr, end);
+ if (!pmd_none(*pmd)) {
+ if (kvm_pmd_huge(*pmd)) {
+ hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+ kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
+ } else {
+ stage2_flush_ptes(kvm, pmd, addr, next);
+ }
}
+ } while (pmd++, addr = next, addr != end);
+}
- pte = pte_offset_kernel(pmd, addr);
- clear_pte_entry(kvm, pte, addr);
- range = PAGE_SIZE;
-
- /* If we emptied the pte, walk back up the ladder */
- if (pte_empty(pte)) {
- clear_pmd_entry(kvm, pmd, addr);
- range = PMD_SIZE;
- if (pmd_empty(pmd)) {
- clear_pud_entry(kvm, pud, addr);
- range = PUD_SIZE;
+static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
+ phys_addr_t addr, phys_addr_t end)
+{
+ pud_t *pud;
+ phys_addr_t next;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = kvm_pud_addr_end(addr, end);
+ if (!pud_none(*pud)) {
+ if (pud_huge(*pud)) {
+ hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
+ kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
+ } else {
+ stage2_flush_pmds(kvm, pud, addr, next);
}
}
+ } while (pud++, addr = next, addr != end);
+}
- addr += range;
- }
+static void stage2_flush_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+ phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
+ phys_addr_t next;
+ pgd_t *pgd;
+
+ pgd = kvm->arch.pgd + pgd_index(addr);
+ do {
+ next = kvm_pgd_addr_end(addr, end);
+ stage2_flush_puds(kvm, pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
+/**
+ * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the stage 2 page tables and invalidate any cache lines
+ * backing memory already mapped to the VM.
+ */
+void stage2_flush_vm(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots)
+ stage2_flush_memslot(kvm, memslot);
+
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
}
/**
@@ -178,14 +306,14 @@ void free_boot_hyp_pgd(void)
if (boot_hyp_pgd) {
unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
- kfree(boot_hyp_pgd);
+ free_pages((unsigned long)boot_hyp_pgd, pgd_order);
boot_hyp_pgd = NULL;
}
if (hyp_pgd)
unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
- kfree(init_bounce_page);
+ free_page((unsigned long)init_bounce_page);
init_bounce_page = NULL;
mutex_unlock(&kvm_hyp_pgd_mutex);
@@ -215,7 +343,7 @@ void free_hyp_pgds(void)
for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
- kfree(hyp_pgd);
+ free_pages((unsigned long)hyp_pgd, pgd_order);
hyp_pgd = NULL;
}
@@ -404,9 +532,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
if (!pgd)
return -ENOMEM;
- /* stage-2 pgd must be aligned to its size */
- VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1));
-
memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
@@ -451,29 +576,71 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
kvm->arch.pgd = NULL;
}
-
-static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
- phys_addr_t addr, const pte_t *new_pte, bool iomap)
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
- pte_t *pte, old_pte;
- /* Create 2nd stage page table mapping - Level 1 */
pgd = kvm->arch.pgd + pgd_index(addr);
pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
if (!cache)
- return 0; /* ignore calls from kvm_set_spte_hva */
+ return NULL;
pmd = mmu_memory_cache_alloc(cache);
pud_populate(NULL, pud, pmd);
get_page(virt_to_page(pud));
}
- pmd = pmd_offset(pud, addr);
+ return pmd_offset(pud, addr);
+}
+
+static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
+ *cache, phys_addr_t addr, const pmd_t *new_pmd)
+{
+ pmd_t *pmd, old_pmd;
- /* Create 2nd stage page table mapping - Level 2 */
+ pmd = stage2_get_pmd(kvm, cache, addr);
+ VM_BUG_ON(!pmd);
+
+ /*
+ * Mapping in huge pages should only happen through a fault. If a
+ * page is merged into a transparent huge page, the individual
+ * subpages of that huge page should be unmapped through MMU
+ * notifiers before we get here.
+ *
+ * Merging of CompoundPages is not supported; they should become
+ * splitting first, unmapped, merged, and mapped back in on-demand.
+ */
+ VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
+
+ old_pmd = *pmd;
+ kvm_set_pmd(pmd, *new_pmd);
+ if (pmd_present(old_pmd))
+ kvm_tlb_flush_vmid_ipa(kvm, addr);
+ else
+ get_page(virt_to_page(pmd));
+ return 0;
+}
+
+static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+ phys_addr_t addr, const pte_t *new_pte, bool iomap)
+{
+ pmd_t *pmd;
+ pte_t *pte, old_pte;
+
+ /* Create stage-2 page table mapping - Level 1 */
+ pmd = stage2_get_pmd(kvm, cache, addr);
+ if (!pmd) {
+ /*
+ * Ignore calls from kvm_set_spte_hva for unallocated
+ * address ranges.
+ */
+ return 0;
+ }
+
+ /* Create stage-2 page mappings - Level 2 */
if (pmd_none(*pmd)) {
if (!cache)
return 0; /* ignore calls from kvm_set_spte_hva */
@@ -520,7 +687,6 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
- kvm_set_s2pte_writable(&pte);
ret = mmu_topup_memory_cache(&cache, 2, 2);
if (ret)
@@ -539,23 +705,97 @@ out:
return ret;
}
+static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
+{
+ pfn_t pfn = *pfnp;
+ gfn_t gfn = *ipap >> PAGE_SHIFT;
+
+ if (PageTransCompound(pfn_to_page(pfn))) {
+ unsigned long mask;
+ /*
+ * The address we faulted on is backed by a transparent huge
+ * page. However, because we map the compound huge page and
+ * not the individual tail page, we need to transfer the
+ * refcount to the head page. We have to be careful that the
+ * THP doesn't start to split while we are adjusting the
+ * refcounts.
+ *
+ * We are sure this doesn't happen, because mmu_notifier_retry
+ * was successful and we are holding the mmu_lock, so if this
+ * THP is trying to split, it will be blocked in the mmu
+ * notifier before touching any of the pages, specifically
+ * before being able to call __split_huge_page_refcount().
+ *
+ * We can therefore safely transfer the refcount from PG_tail
+ * to PG_head and switch the pfn from a tail page to the head
+ * page accordingly.
+ */
+ mask = PTRS_PER_PMD - 1;
+ VM_BUG_ON((gfn & mask) != (pfn & mask));
+ if (pfn & mask) {
+ *ipap &= PMD_MASK;
+ kvm_release_pfn_clean(pfn);
+ pfn &= ~mask;
+ kvm_get_pfn(pfn);
+ *pfnp = pfn;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vcpu_trap_is_iabt(vcpu))
+ return false;
+
+ return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
- gfn_t gfn, struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
{
- pte_t new_pte;
- pfn_t pfn;
int ret;
- bool write_fault, writable;
+ bool write_fault, writable, hugetlb = false, force_pte = false;
unsigned long mmu_seq;
+ gfn_t gfn = fault_ipa >> PAGE_SHIFT;
+ struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+ struct vm_area_struct *vma;
+ pfn_t pfn;
+ pgprot_t mem_type = PAGE_S2;
- write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+ write_fault = kvm_is_write_fault(vcpu);
if (fault_status == FSC_PERM && !write_fault) {
kvm_err("Unexpected L2 read permission error\n");
return -EFAULT;
}
+ /* Let's check if we will get back a huge page backed by hugetlbfs */
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma_intersection(current->mm, hva, hva + 1);
+ if (is_vm_hugetlb_page(vma)) {
+ hugetlb = true;
+ gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
+ } else {
+ /*
+ * Pages belonging to memslots that don't have the same
+ * alignment for userspace and IPA cannot be mapped using
+ * block descriptors even if the pages belong to a THP for
+ * the process, because the stage-2 block descriptor will
+ * cover more than a single THP and we loose atomicity for
+ * unmapping, updates, and splits of the THP or other pages
+ * in the stage-2 block range.
+ */
+ if ((memslot->userspace_addr & ~PMD_MASK) !=
+ ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
+ force_pte = true;
+ }
+ up_read(&current->mm->mmap_sem);
+
/* We need minimum second+third level pages */
ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
if (ret)
@@ -573,26 +813,44 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
*/
smp_rmb();
- pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
+ pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
if (is_error_pfn(pfn))
return -EFAULT;
- new_pte = pfn_pte(pfn, PAGE_S2);
- coherent_icache_guest_page(vcpu->kvm, gfn);
+ if (kvm_is_mmio_pfn(pfn))
+ mem_type = PAGE_S2_DEVICE;
- spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+ spin_lock(&kvm->mmu_lock);
+ if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
- if (writable) {
- kvm_set_s2pte_writable(&new_pte);
- kvm_set_pfn_dirty(pfn);
+ if (!hugetlb && !force_pte)
+ hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
+
+ if (hugetlb) {
+ pmd_t new_pmd = pfn_pmd(pfn, mem_type);
+ new_pmd = pmd_mkhuge(new_pmd);
+ if (writable) {
+ kvm_set_s2pmd_writable(&new_pmd);
+ kvm_set_pfn_dirty(pfn);
+ }
+ coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+ ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
+ } else {
+ pte_t new_pte = pfn_pte(pfn, mem_type);
+ if (writable) {
+ kvm_set_s2pte_writable(&new_pte);
+ kvm_set_pfn_dirty(pfn);
+ }
+ coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+ ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
+ mem_type == PAGE_S2_DEVICE);
}
- stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false);
+
out_unlock:
- spin_unlock(&vcpu->kvm->mmu_lock);
+ spin_unlock(&kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
- return 0;
+ return ret;
}
/**
@@ -612,7 +870,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
unsigned long fault_status;
phys_addr_t fault_ipa;
struct kvm_memory_slot *memslot;
- bool is_iabt;
+ unsigned long hva;
+ bool is_iabt, write_fault, writable;
gfn_t gfn;
int ret, idx;
@@ -623,17 +882,22 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- fault_status = kvm_vcpu_trap_get_fault(vcpu);
+ fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
- kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
- kvm_vcpu_trap_get_class(vcpu), fault_status);
+ kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+ kvm_vcpu_trap_get_class(vcpu),
+ (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+ (unsigned long)kvm_vcpu_get_hsr(vcpu));
return -EFAULT;
}
idx = srcu_read_lock(&vcpu->kvm->srcu);
gfn = fault_ipa >> PAGE_SHIFT;
- if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+ write_fault = kvm_is_write_fault(vcpu);
+ if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
if (is_iabt) {
/* Prefetch Abort on I/O address */
kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -641,13 +905,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- if (fault_status != FSC_FAULT) {
- kvm_err("Unsupported fault status on io memory: %#lx\n",
- fault_status);
- ret = -EFAULT;
- goto out_unlock;
- }
-
/*
* The IPA is reported as [MAX:12], so we need to
* complement it with the bottom 12 bits from the
@@ -659,9 +916,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
goto out_unlock;
}
- memslot = gfn_to_memslot(vcpu->kvm, gfn);
-
- ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status);
+ ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
out_unlock:
@@ -779,9 +1034,9 @@ int kvm_mmu_init(void)
{
int err;
- hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
- hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
- hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
+ hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
+ hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
+ hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
/*
@@ -791,7 +1046,7 @@ int kvm_mmu_init(void)
size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start;
phys_addr_t phys_base;
- init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ init_bounce_page = (void *)__get_free_page(GFP_KERNEL);
if (!init_bounce_page) {
kvm_err("Couldn't allocate HYP init bounce page\n");
err = -ENOMEM;
@@ -808,7 +1063,7 @@ int kvm_mmu_init(void)
*/
kvm_flush_dcache_to_poc(init_bounce_page, len);
- phys_base = virt_to_phys(init_bounce_page);
+ phys_base = kvm_virt_to_phys(init_bounce_page);
hyp_idmap_vector += phys_base - hyp_idmap_start;
hyp_idmap_start = phys_base;
hyp_idmap_end = phys_base + len;
@@ -817,8 +1072,9 @@ int kvm_mmu_init(void)
(unsigned long)phys_base);
}
- hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
- boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+ hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+ boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+
if (!hyp_pgd || !boot_hyp_pgd) {
kvm_err("Hyp mode PGD not allocated\n");
err = -ENOMEM;
@@ -864,3 +1120,49 @@ out:
free_hyp_pgds();
return err;
}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ enum kvm_mr_change change)
+{
+ gpa_t gpa = old->base_gfn << PAGE_SHIFT;
+ phys_addr_t size = old->npages << PAGE_SHIFT;
+ if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+ spin_lock(&kvm->mmu_lock);
+ unmap_stage2_range(kvm, gpa, size);
+ spin_unlock(&kvm->mmu_lock);
+ }
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
+{
+ return 0;
+}
+
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
+{
+ return 0;
+}
+
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot)
+{
+}
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 7ee5bb7a3667..09cf37737ee2 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -18,6 +18,7 @@
#include <linux/kvm_host.h>
#include <linux/wait.h>
+#include <asm/cputype.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_psci.h>
@@ -26,6 +27,36 @@
* as described in ARM document number ARM DEN 0022A.
*/
+#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
+
+static unsigned long psci_affinity_mask(unsigned long affinity_level)
+{
+ if (affinity_level <= 3)
+ return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
+
+ return 0;
+}
+
+static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
+{
+ /*
+ * NOTE: For simplicity, we make VCPU suspend emulation to be
+ * same-as WFI (Wait-for-interrupt) emulation.
+ *
+ * This means for KVM the wakeup events are interrupts and
+ * this is consistent with intended use of StateID as described
+ * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
+ *
+ * Further, we also treat power-down request to be same as
+ * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
+ * specification (ARM DEN 0022A). This means all suspend states
+ * for KVM will preserve the register state.
+ */
+ kvm_vcpu_block(vcpu);
+
+ return PSCI_RET_SUCCESS;
+}
+
static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
{
vcpu->arch.pause = true;
@@ -34,25 +65,41 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
{
struct kvm *kvm = source_vcpu->kvm;
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu = NULL, *tmp;
wait_queue_head_t *wq;
unsigned long cpu_id;
+ unsigned long context_id;
+ unsigned long mpidr;
phys_addr_t target_pc;
+ int i;
cpu_id = *vcpu_reg(source_vcpu, 1);
if (vcpu_mode_is_32bit(source_vcpu))
cpu_id &= ~((u32) 0);
- if (cpu_id >= atomic_read(&kvm->online_vcpus))
- return KVM_PSCI_RET_INVAL;
-
- target_pc = *vcpu_reg(source_vcpu, 2);
+ kvm_for_each_vcpu(i, tmp, kvm) {
+ mpidr = kvm_vcpu_get_mpidr(tmp);
+ if ((mpidr & MPIDR_HWID_BITMASK) == (cpu_id & MPIDR_HWID_BITMASK)) {
+ vcpu = tmp;
+ break;
+ }
+ }
- vcpu = kvm_get_vcpu(kvm, cpu_id);
+ /*
+ * Make sure the caller requested a valid CPU and that the CPU is
+ * turned off.
+ */
+ if (!vcpu)
+ return PSCI_RET_INVALID_PARAMS;
+ if (!vcpu->arch.pause) {
+ if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
+ return PSCI_RET_ALREADY_ON;
+ else
+ return PSCI_RET_INVALID_PARAMS;
+ }
- wq = kvm_arch_vcpu_wq(vcpu);
- if (!waitqueue_active(wq))
- return KVM_PSCI_RET_INVAL;
+ target_pc = *vcpu_reg(source_vcpu, 2);
+ context_id = *vcpu_reg(source_vcpu, 3);
kvm_reset_vcpu(vcpu);
@@ -62,26 +109,165 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
vcpu_set_thumb(vcpu);
}
+ /* Propagate caller endianness */
+ if (kvm_vcpu_is_be(source_vcpu))
+ kvm_vcpu_set_be(vcpu);
+
*vcpu_pc(vcpu) = target_pc;
+ /*
+ * NOTE: We always update r0 (or x0) because for PSCI v0.1
+ * the general puspose registers are undefined upon CPU_ON.
+ */
+ *vcpu_reg(vcpu, 0) = context_id;
vcpu->arch.pause = false;
smp_mb(); /* Make sure the above is visible */
+ wq = kvm_arch_vcpu_wq(vcpu);
wake_up_interruptible(wq);
- return KVM_PSCI_RET_SUCCESS;
+ return PSCI_RET_SUCCESS;
}
-/**
- * kvm_psci_call - handle PSCI call if r0 value is in range
- * @vcpu: Pointer to the VCPU struct
- *
- * Handle PSCI calls from guests through traps from HVC or SMC instructions.
- * The calling convention is similar to SMC calls to the secure world where
- * the function number is placed in r0 and this function returns true if the
- * function number specified in r0 is withing the PSCI range, and false
- * otherwise.
- */
-bool kvm_psci_call(struct kvm_vcpu *vcpu)
+static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
+{
+ int i;
+ unsigned long mpidr;
+ unsigned long target_affinity;
+ unsigned long target_affinity_mask;
+ unsigned long lowest_affinity_level;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *tmp;
+
+ target_affinity = *vcpu_reg(vcpu, 1);
+ lowest_affinity_level = *vcpu_reg(vcpu, 2);
+
+ /* Determine target affinity mask */
+ target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
+ if (!target_affinity_mask)
+ return PSCI_RET_INVALID_PARAMS;
+
+ /* Ignore other bits of target affinity */
+ target_affinity &= target_affinity_mask;
+
+ /*
+ * If one or more VCPU matching target affinity are running
+ * then ON else OFF
+ */
+ kvm_for_each_vcpu(i, tmp, kvm) {
+ mpidr = kvm_vcpu_get_mpidr(tmp);
+ if (((mpidr & target_affinity_mask) == target_affinity) &&
+ !tmp->arch.pause) {
+ return PSCI_0_2_AFFINITY_LEVEL_ON;
+ }
+ }
+
+ return PSCI_0_2_AFFINITY_LEVEL_OFF;
+}
+
+static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
+{
+ memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
+ vcpu->run->system_event.type = type;
+ vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
+static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
+{
+ kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
+}
+
+static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
+{
+ kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
+}
+
+int kvm_psci_version(struct kvm_vcpu *vcpu)
+{
+ if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+ return KVM_ARM_PSCI_0_2;
+
+ return KVM_ARM_PSCI_0_1;
+}
+
+static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
+{
+ int ret = 1;
+ unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
+ unsigned long val;
+
+ switch (psci_fn) {
+ case PSCI_0_2_FN_PSCI_VERSION:
+ /*
+ * Bits[31:16] = Major Version = 0
+ * Bits[15:0] = Minor Version = 2
+ */
+ val = 2;
+ break;
+ case PSCI_0_2_FN_CPU_SUSPEND:
+ case PSCI_0_2_FN64_CPU_SUSPEND:
+ val = kvm_psci_vcpu_suspend(vcpu);
+ break;
+ case PSCI_0_2_FN_CPU_OFF:
+ kvm_psci_vcpu_off(vcpu);
+ val = PSCI_RET_SUCCESS;
+ break;
+ case PSCI_0_2_FN_CPU_ON:
+ case PSCI_0_2_FN64_CPU_ON:
+ val = kvm_psci_vcpu_on(vcpu);
+ break;
+ case PSCI_0_2_FN_AFFINITY_INFO:
+ case PSCI_0_2_FN64_AFFINITY_INFO:
+ val = kvm_psci_vcpu_affinity_info(vcpu);
+ break;
+ case PSCI_0_2_FN_MIGRATE:
+ case PSCI_0_2_FN64_MIGRATE:
+ val = PSCI_RET_NOT_SUPPORTED;
+ break;
+ case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
+ /*
+ * Trusted OS is MP hence does not require migration
+ * or
+ * Trusted OS is not present
+ */
+ val = PSCI_0_2_TOS_MP;
+ break;
+ case PSCI_0_2_FN_MIGRATE_INFO_UP_CPU:
+ case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU:
+ val = PSCI_RET_NOT_SUPPORTED;
+ break;
+ case PSCI_0_2_FN_SYSTEM_OFF:
+ kvm_psci_system_off(vcpu);
+ /*
+ * We should'nt be going back to guest VCPU after
+ * receiving SYSTEM_OFF request.
+ *
+ * If user space accidently/deliberately resumes
+ * guest VCPU after SYSTEM_OFF request then guest
+ * VCPU should see internal failure from PSCI return
+ * value. To achieve this, we preload r0 (or x0) with
+ * PSCI return value INTERNAL_FAILURE.
+ */
+ val = PSCI_RET_INTERNAL_FAILURE;
+ ret = 0;
+ break;
+ case PSCI_0_2_FN_SYSTEM_RESET:
+ kvm_psci_system_reset(vcpu);
+ /*
+ * Same reason as SYSTEM_OFF for preloading r0 (or x0)
+ * with PSCI return value INTERNAL_FAILURE.
+ */
+ val = PSCI_RET_INTERNAL_FAILURE;
+ ret = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ *vcpu_reg(vcpu, 0) = val;
+ return ret;
+}
+
+static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
{
unsigned long psci_fn = *vcpu_reg(vcpu, 0) & ~((u32) 0);
unsigned long val;
@@ -89,20 +275,45 @@ bool kvm_psci_call(struct kvm_vcpu *vcpu)
switch (psci_fn) {
case KVM_PSCI_FN_CPU_OFF:
kvm_psci_vcpu_off(vcpu);
- val = KVM_PSCI_RET_SUCCESS;
+ val = PSCI_RET_SUCCESS;
break;
case KVM_PSCI_FN_CPU_ON:
val = kvm_psci_vcpu_on(vcpu);
break;
case KVM_PSCI_FN_CPU_SUSPEND:
case KVM_PSCI_FN_MIGRATE:
- val = KVM_PSCI_RET_NI;
+ val = PSCI_RET_NOT_SUPPORTED;
break;
-
default:
- return false;
+ return -EINVAL;
}
*vcpu_reg(vcpu, 0) = val;
- return true;
+ return 1;
+}
+
+/**
+ * kvm_psci_call - handle PSCI call if r0 value is in range
+ * @vcpu: Pointer to the VCPU struct
+ *
+ * Handle PSCI calls from guests through traps from HVC instructions.
+ * The calling convention is similar to SMC calls to the secure world
+ * where the function number is placed in r0.
+ *
+ * This function returns: > 0 (success), 0 (success but exit to user
+ * space), and < 0 (errors)
+ *
+ * Errors:
+ * -EINVAL: Unrecognized PSCI function
+ */
+int kvm_psci_call(struct kvm_vcpu *vcpu)
+{
+ switch (kvm_psci_version(vcpu)) {
+ case KVM_ARM_PSCI_0_2:
+ return kvm_psci_0_2_call(vcpu);
+ case KVM_ARM_PSCI_0_1:
+ return kvm_psci_0_1_call(vcpu);
+ default:
+ return -EINVAL;
+ };
}
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index b80256b554cd..f558c073c023 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -27,16 +27,21 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_coproc.h>
+#include <kvm/arm_arch_timer.h>
+
/******************************************************************************
- * Cortex-A15 Reset Values
+ * Cortex-A15 and Cortex-A7 Reset Values
*/
-static const int a15_max_cpu_idx = 3;
-
-static struct kvm_regs a15_regs_reset = {
+static struct kvm_regs cortexa_regs_reset = {
.usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
};
+static const struct kvm_irq_level cortexa_vtimer_irq = {
+ { .irq = 27 },
+ .level = 1,
+};
+
/*******************************************************************************
* Exported reset function
@@ -51,24 +56,28 @@ static struct kvm_regs a15_regs_reset = {
*/
int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
{
- struct kvm_regs *cpu_reset;
+ struct kvm_regs *reset_regs;
+ const struct kvm_irq_level *cpu_vtimer_irq;
switch (vcpu->arch.target) {
+ case KVM_ARM_TARGET_CORTEX_A7:
case KVM_ARM_TARGET_CORTEX_A15:
- if (vcpu->vcpu_id > a15_max_cpu_idx)
- return -EINVAL;
- cpu_reset = &a15_regs_reset;
+ reset_regs = &cortexa_regs_reset;
vcpu->arch.midr = read_cpuid_id();
+ cpu_vtimer_irq = &cortexa_vtimer_irq;
break;
default:
return -ENODEV;
}
/* Reset core registers */
- memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs));
+ memcpy(&vcpu->arch.regs, reset_regs, sizeof(vcpu->arch.regs));
/* Reset CP15 registers */
kvm_reset_coprocs(vcpu);
+ /* Reset arch_timer context */
+ kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
return 0;
}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index a8e73ed5ad5b..b1d640f78623 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -59,10 +59,9 @@ TRACE_EVENT(kvm_guest_fault,
__entry->ipa = ipa;
),
- TP_printk("guest fault at PC %#08lx (hxfar %#08lx, "
- "ipa %#16llx, hsr %#08lx",
- __entry->vcpu_pc, __entry->hxfar,
- __entry->ipa, __entry->hsr)
+ TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx",
+ __entry->ipa, __entry->hsr,
+ __entry->hxfar, __entry->vcpu_pc)
);
TRACE_EVENT(kvm_irq_line,
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
deleted file mode 100644
index 17c5ac7d10ed..000000000000
--- a/arch/arm/kvm/vgic.c
+++ /dev/null
@@ -1,1499 +0,0 @@
-/*
- * Copyright (C) 2012 ARM Ltd.
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/cpu.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-
-#include <linux/irqchip/arm-gic.h>
-
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-
-/*
- * How the whole thing works (courtesy of Christoffer Dall):
- *
- * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
- * something is pending
- * - VGIC pending interrupts are stored on the vgic.irq_state vgic
- * bitmap (this bitmap is updated by both user land ioctls and guest
- * mmio ops, and other in-kernel peripherals such as the
- * arch. timers) and indicate the 'wire' state.
- * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
- * recalculated
- * - To calculate the oracle, we need info for each cpu from
- * compute_pending_for_cpu, which considers:
- * - PPI: dist->irq_state & dist->irq_enable
- * - SPI: dist->irq_state & dist->irq_enable & dist->irq_spi_target
- * - irq_spi_target is a 'formatted' version of the GICD_ICFGR
- * registers, stored on each vcpu. We only keep one bit of
- * information per interrupt, making sure that only one vcpu can
- * accept the interrupt.
- * - The same is true when injecting an interrupt, except that we only
- * consider a single interrupt at a time. The irq_spi_cpu array
- * contains the target CPU for each SPI.
- *
- * The handling of level interrupts adds some extra complexity. We
- * need to track when the interrupt has been EOIed, so we can sample
- * the 'line' again. This is achieved as such:
- *
- * - When a level interrupt is moved onto a vcpu, the corresponding
- * bit in irq_active is set. As long as this bit is set, the line
- * will be ignored for further interrupts. The interrupt is injected
- * into the vcpu with the GICH_LR_EOI bit set (generate a
- * maintenance interrupt on EOI).
- * - When the interrupt is EOIed, the maintenance interrupt fires,
- * and clears the corresponding bit in irq_active. This allow the
- * interrupt line to be sampled again.
- */
-
-#define VGIC_ADDR_UNDEF (-1)
-#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
-
-/* Physical address of vgic virtual cpu interface */
-static phys_addr_t vgic_vcpu_base;
-
-/* Virtual control interface base address */
-static void __iomem *vgic_vctrl_base;
-
-static struct device_node *vgic_node;
-
-#define ACCESS_READ_VALUE (1 << 0)
-#define ACCESS_READ_RAZ (0 << 0)
-#define ACCESS_READ_MASK(x) ((x) & (1 << 0))
-#define ACCESS_WRITE_IGNORED (0 << 1)
-#define ACCESS_WRITE_SETBIT (1 << 1)
-#define ACCESS_WRITE_CLEARBIT (2 << 1)
-#define ACCESS_WRITE_VALUE (3 << 1)
-#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1))
-
-static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
-static void vgic_update_state(struct kvm *kvm);
-static void vgic_kick_vcpus(struct kvm *kvm);
-static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
-static u32 vgic_nr_lr;
-
-static unsigned int vgic_maint_irq;
-
-static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
- int cpuid, u32 offset)
-{
- offset >>= 2;
- if (!offset)
- return x->percpu[cpuid].reg;
- else
- return x->shared.reg + offset - 1;
-}
-
-static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
- int cpuid, int irq)
-{
- if (irq < VGIC_NR_PRIVATE_IRQS)
- return test_bit(irq, x->percpu[cpuid].reg_ul);
-
- return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared.reg_ul);
-}
-
-static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
- int irq, int val)
-{
- unsigned long *reg;
-
- if (irq < VGIC_NR_PRIVATE_IRQS) {
- reg = x->percpu[cpuid].reg_ul;
- } else {
- reg = x->shared.reg_ul;
- irq -= VGIC_NR_PRIVATE_IRQS;
- }
-
- if (val)
- set_bit(irq, reg);
- else
- clear_bit(irq, reg);
-}
-
-static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
-{
- if (unlikely(cpuid >= VGIC_MAX_CPUS))
- return NULL;
- return x->percpu[cpuid].reg_ul;
-}
-
-static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
-{
- return x->shared.reg_ul;
-}
-
-static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
-{
- offset >>= 2;
- BUG_ON(offset > (VGIC_NR_IRQS / 4));
- if (offset < 4)
- return x->percpu[cpuid] + offset;
- else
- return x->shared + offset - 8;
-}
-
-#define VGIC_CFG_LEVEL 0
-#define VGIC_CFG_EDGE 1
-
-static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- int irq_val;
-
- irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
- return irq_val == VGIC_CFG_EDGE;
-}
-
-static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
-}
-
-static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
-}
-
-static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
-}
-
-static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
-}
-
-static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- return vgic_bitmap_get_irq_val(&dist->irq_state, vcpu->vcpu_id, irq);
-}
-
-static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 1);
-}
-
-static void vgic_dist_irq_clear(struct kvm_vcpu *vcpu, int irq)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- vgic_bitmap_set_irq_val(&dist->irq_state, vcpu->vcpu_id, irq, 0);
-}
-
-static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
-{
- if (irq < VGIC_NR_PRIVATE_IRQS)
- set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
- else
- set_bit(irq - VGIC_NR_PRIVATE_IRQS,
- vcpu->arch.vgic_cpu.pending_shared);
-}
-
-static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
-{
- if (irq < VGIC_NR_PRIVATE_IRQS)
- clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
- else
- clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
- vcpu->arch.vgic_cpu.pending_shared);
-}
-
-static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
-{
- return *((u32 *)mmio->data) & mask;
-}
-
-static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
-{
- *((u32 *)mmio->data) = value & mask;
-}
-
-/**
- * vgic_reg_access - access vgic register
- * @mmio: pointer to the data describing the mmio access
- * @reg: pointer to the virtual backing of vgic distributor data
- * @offset: least significant 2 bits used for word offset
- * @mode: ACCESS_ mode (see defines above)
- *
- * Helper to make vgic register access easier using one of the access
- * modes defined for vgic register access
- * (read,raz,write-ignored,setbit,clearbit,write)
- */
-static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
- phys_addr_t offset, int mode)
-{
- int word_offset = (offset & 3) * 8;
- u32 mask = (1UL << (mmio->len * 8)) - 1;
- u32 regval;
-
- /*
- * Any alignment fault should have been delivered to the guest
- * directly (ARM ARM B3.12.7 "Prioritization of aborts").
- */
-
- if (reg) {
- regval = *reg;
- } else {
- BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
- regval = 0;
- }
-
- if (mmio->is_write) {
- u32 data = mmio_data_read(mmio, mask) << word_offset;
- switch (ACCESS_WRITE_MASK(mode)) {
- case ACCESS_WRITE_IGNORED:
- return;
-
- case ACCESS_WRITE_SETBIT:
- regval |= data;
- break;
-
- case ACCESS_WRITE_CLEARBIT:
- regval &= ~data;
- break;
-
- case ACCESS_WRITE_VALUE:
- regval = (regval & ~(mask << word_offset)) | data;
- break;
- }
- *reg = regval;
- } else {
- switch (ACCESS_READ_MASK(mode)) {
- case ACCESS_READ_RAZ:
- regval = 0;
- /* fall through */
-
- case ACCESS_READ_VALUE:
- mmio_data_write(mmio, mask, regval >> word_offset);
- }
- }
-}
-
-static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
- u32 reg;
- u32 word_offset = offset & 3;
-
- switch (offset & ~3) {
- case 0: /* CTLR */
- reg = vcpu->kvm->arch.vgic.enabled;
- vgic_reg_access(mmio, &reg, word_offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
- if (mmio->is_write) {
- vcpu->kvm->arch.vgic.enabled = reg & 1;
- vgic_update_state(vcpu->kvm);
- return true;
- }
- break;
-
- case 4: /* TYPER */
- reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
- reg |= (VGIC_NR_IRQS >> 5) - 1;
- vgic_reg_access(mmio, &reg, word_offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
- break;
-
- case 8: /* IIDR */
- reg = 0x4B00043B;
- vgic_reg_access(mmio, &reg, word_offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
- break;
- }
-
- return false;
-}
-
-static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
- vgic_reg_access(mmio, NULL, offset,
- ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
- return false;
-}
-
-static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
-{
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
- vcpu->vcpu_id, offset);
- vgic_reg_access(mmio, reg, offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
- if (mmio->is_write) {
- vgic_update_state(vcpu->kvm);
- return true;
- }
-
- return false;
-}
-
-static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
-{
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
- vcpu->vcpu_id, offset);
- vgic_reg_access(mmio, reg, offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
- if (mmio->is_write) {
- if (offset < 4) /* Force SGI enabled */
- *reg |= 0xffff;
- vgic_retire_disabled_irqs(vcpu);
- vgic_update_state(vcpu->kvm);
- return true;
- }
-
- return false;
-}
-
-static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
-{
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
- vcpu->vcpu_id, offset);
- vgic_reg_access(mmio, reg, offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
- if (mmio->is_write) {
- vgic_update_state(vcpu->kvm);
- return true;
- }
-
- return false;
-}
-
-static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
-{
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_state,
- vcpu->vcpu_id, offset);
- vgic_reg_access(mmio, reg, offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
- if (mmio->is_write) {
- vgic_update_state(vcpu->kvm);
- return true;
- }
-
- return false;
-}
-
-static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
-{
- u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
- vcpu->vcpu_id, offset);
- vgic_reg_access(mmio, reg, offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
- return false;
-}
-
-#define GICD_ITARGETSR_SIZE 32
-#define GICD_CPUTARGETS_BITS 8
-#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
-static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct kvm_vcpu *vcpu;
- int i, c;
- unsigned long *bmap;
- u32 val = 0;
-
- irq -= VGIC_NR_PRIVATE_IRQS;
-
- kvm_for_each_vcpu(c, vcpu, kvm) {
- bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
- for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
- if (test_bit(irq + i, bmap))
- val |= 1 << (c + i * 8);
- }
-
- return val;
-}
-
-static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct kvm_vcpu *vcpu;
- int i, c;
- unsigned long *bmap;
- u32 target;
-
- irq -= VGIC_NR_PRIVATE_IRQS;
-
- /*
- * Pick the LSB in each byte. This ensures we target exactly
- * one vcpu per IRQ. If the byte is null, assume we target
- * CPU0.
- */
- for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
- int shift = i * GICD_CPUTARGETS_BITS;
- target = ffs((val >> shift) & 0xffU);
- target = target ? (target - 1) : 0;
- dist->irq_spi_cpu[irq + i] = target;
- kvm_for_each_vcpu(c, vcpu, kvm) {
- bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
- if (c == target)
- set_bit(irq + i, bmap);
- else
- clear_bit(irq + i, bmap);
- }
- }
-}
-
-static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
-{
- u32 reg;
-
- /* We treat the banked interrupts targets as read-only */
- if (offset < 32) {
- u32 roreg = 1 << vcpu->vcpu_id;
- roreg |= roreg << 8;
- roreg |= roreg << 16;
-
- vgic_reg_access(mmio, &roreg, offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
- return false;
- }
-
- reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
- vgic_reg_access(mmio, &reg, offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
- if (mmio->is_write) {
- vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
- vgic_update_state(vcpu->kvm);
- return true;
- }
-
- return false;
-}
-
-static u32 vgic_cfg_expand(u16 val)
-{
- u32 res = 0;
- int i;
-
- /*
- * Turn a 16bit value like abcd...mnop into a 32bit word
- * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
- */
- for (i = 0; i < 16; i++)
- res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
-
- return res;
-}
-
-static u16 vgic_cfg_compress(u32 val)
-{
- u16 res = 0;
- int i;
-
- /*
- * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
- * abcd...mnop which is what we really care about.
- */
- for (i = 0; i < 16; i++)
- res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
-
- return res;
-}
-
-/*
- * The distributor uses 2 bits per IRQ for the CFG register, but the
- * LSB is always 0. As such, we only keep the upper bit, and use the
- * two above functions to compress/expand the bits
- */
-static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
- u32 val;
- u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
- vcpu->vcpu_id, offset >> 1);
- if (offset & 2)
- val = *reg >> 16;
- else
- val = *reg & 0xffff;
-
- val = vgic_cfg_expand(val);
- vgic_reg_access(mmio, &val, offset,
- ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
- if (mmio->is_write) {
- if (offset < 4) {
- *reg = ~0U; /* Force PPIs/SGIs to 1 */
- return false;
- }
-
- val = vgic_cfg_compress(val);
- if (offset & 2) {
- *reg &= 0xffff;
- *reg |= val << 16;
- } else {
- *reg &= 0xffff << 16;
- *reg |= val;
- }
- }
-
- return false;
-}
-
-static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
- struct kvm_exit_mmio *mmio, phys_addr_t offset)
-{
- u32 reg;
- vgic_reg_access(mmio, &reg, offset,
- ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
- if (mmio->is_write) {
- vgic_dispatch_sgi(vcpu, reg);
- vgic_update_state(vcpu->kvm);
- return true;
- }
-
- return false;
-}
-
-/*
- * I would have liked to use the kvm_bus_io_*() API instead, but it
- * cannot cope with banked registers (only the VM pointer is passed
- * around, and we need the vcpu). One of these days, someone please
- * fix it!
- */
-struct mmio_range {
- phys_addr_t base;
- unsigned long len;
- bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
- phys_addr_t offset);
-};
-
-static const struct mmio_range vgic_ranges[] = {
- {
- .base = GIC_DIST_CTRL,
- .len = 12,
- .handle_mmio = handle_mmio_misc,
- },
- {
- .base = GIC_DIST_IGROUP,
- .len = VGIC_NR_IRQS / 8,
- .handle_mmio = handle_mmio_raz_wi,
- },
- {
- .base = GIC_DIST_ENABLE_SET,
- .len = VGIC_NR_IRQS / 8,
- .handle_mmio = handle_mmio_set_enable_reg,
- },
- {
- .base = GIC_DIST_ENABLE_CLEAR,
- .len = VGIC_NR_IRQS / 8,
- .handle_mmio = handle_mmio_clear_enable_reg,
- },
- {
- .base = GIC_DIST_PENDING_SET,
- .len = VGIC_NR_IRQS / 8,
- .handle_mmio = handle_mmio_set_pending_reg,
- },
- {
- .base = GIC_DIST_PENDING_CLEAR,
- .len = VGIC_NR_IRQS / 8,
- .handle_mmio = handle_mmio_clear_pending_reg,
- },
- {
- .base = GIC_DIST_ACTIVE_SET,
- .len = VGIC_NR_IRQS / 8,
- .handle_mmio = handle_mmio_raz_wi,
- },
- {
- .base = GIC_DIST_ACTIVE_CLEAR,
- .len = VGIC_NR_IRQS / 8,
- .handle_mmio = handle_mmio_raz_wi,
- },
- {
- .base = GIC_DIST_PRI,
- .len = VGIC_NR_IRQS,
- .handle_mmio = handle_mmio_priority_reg,
- },
- {
- .base = GIC_DIST_TARGET,
- .len = VGIC_NR_IRQS,
- .handle_mmio = handle_mmio_target_reg,
- },
- {
- .base = GIC_DIST_CONFIG,
- .len = VGIC_NR_IRQS / 4,
- .handle_mmio = handle_mmio_cfg_reg,
- },
- {
- .base = GIC_DIST_SOFTINT,
- .len = 4,
- .handle_mmio = handle_mmio_sgi_reg,
- },
- {}
-};
-
-static const
-struct mmio_range *find_matching_range(const struct mmio_range *ranges,
- struct kvm_exit_mmio *mmio,
- phys_addr_t base)
-{
- const struct mmio_range *r = ranges;
- phys_addr_t addr = mmio->phys_addr - base;
-
- while (r->len) {
- if (addr >= r->base &&
- (addr + mmio->len) <= (r->base + r->len))
- return r;
- r++;
- }
-
- return NULL;
-}
-
-/**
- * vgic_handle_mmio - handle an in-kernel MMIO access
- * @vcpu: pointer to the vcpu performing the access
- * @run: pointer to the kvm_run structure
- * @mmio: pointer to the data describing the access
- *
- * returns true if the MMIO access has been performed in kernel space,
- * and false if it needs to be emulated in user space.
- */
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
- struct kvm_exit_mmio *mmio)
-{
- const struct mmio_range *range;
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- unsigned long base = dist->vgic_dist_base;
- bool updated_state;
- unsigned long offset;
-
- if (!irqchip_in_kernel(vcpu->kvm) ||
- mmio->phys_addr < base ||
- (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
- return false;
-
- /* We don't support ldrd / strd or ldm / stm to the emulated vgic */
- if (mmio->len > 4) {
- kvm_inject_dabt(vcpu, mmio->phys_addr);
- return true;
- }
-
- range = find_matching_range(vgic_ranges, mmio, base);
- if (unlikely(!range || !range->handle_mmio)) {
- pr_warn("Unhandled access %d %08llx %d\n",
- mmio->is_write, mmio->phys_addr, mmio->len);
- return false;
- }
-
- spin_lock(&vcpu->kvm->arch.vgic.lock);
- offset = mmio->phys_addr - range->base - base;
- updated_state = range->handle_mmio(vcpu, mmio, offset);
- spin_unlock(&vcpu->kvm->arch.vgic.lock);
- kvm_prepare_mmio(run, mmio);
- kvm_handle_mmio_return(vcpu, run);
-
- if (updated_state)
- vgic_kick_vcpus(vcpu->kvm);
-
- return true;
-}
-
-static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
-{
- struct kvm *kvm = vcpu->kvm;
- struct vgic_dist *dist = &kvm->arch.vgic;
- int nrcpus = atomic_read(&kvm->online_vcpus);
- u8 target_cpus;
- int sgi, mode, c, vcpu_id;
-
- vcpu_id = vcpu->vcpu_id;
-
- sgi = reg & 0xf;
- target_cpus = (reg >> 16) & 0xff;
- mode = (reg >> 24) & 3;
-
- switch (mode) {
- case 0:
- if (!target_cpus)
- return;
-
- case 1:
- target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
- break;
-
- case 2:
- target_cpus = 1 << vcpu_id;
- break;
- }
-
- kvm_for_each_vcpu(c, vcpu, kvm) {
- if (target_cpus & 1) {
- /* Flag the SGI as pending */
- vgic_dist_irq_set(vcpu, sgi);
- dist->irq_sgi_sources[c][sgi] |= 1 << vcpu_id;
- kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
- }
-
- target_cpus >>= 1;
- }
-}
-
-static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
- unsigned long pending_private, pending_shared;
- int vcpu_id;
-
- vcpu_id = vcpu->vcpu_id;
- pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
- pend_shared = vcpu->arch.vgic_cpu.pending_shared;
-
- pending = vgic_bitmap_get_cpu_map(&dist->irq_state, vcpu_id);
- enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
- bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
-
- pending = vgic_bitmap_get_shared_map(&dist->irq_state);
- enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
- bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
- bitmap_and(pend_shared, pend_shared,
- vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
- VGIC_NR_SHARED_IRQS);
-
- pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
- pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
- return (pending_private < VGIC_NR_PRIVATE_IRQS ||
- pending_shared < VGIC_NR_SHARED_IRQS);
-}
-
-/*
- * Update the interrupt state and determine which CPUs have pending
- * interrupts. Must be called with distributor lock held.
- */
-static void vgic_update_state(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct kvm_vcpu *vcpu;
- int c;
-
- if (!dist->enabled) {
- set_bit(0, &dist->irq_pending_on_cpu);
- return;
- }
-
- kvm_for_each_vcpu(c, vcpu, kvm) {
- if (compute_pending_for_cpu(vcpu)) {
- pr_debug("CPU%d has pending interrupts\n", c);
- set_bit(c, &dist->irq_pending_on_cpu);
- }
- }
-}
-
-#define LR_CPUID(lr) \
- (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
-#define MK_LR_PEND(src, irq) \
- (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
-
-/*
- * An interrupt may have been disabled after being made pending on the
- * CPU interface (the classic case is a timer running while we're
- * rebooting the guest - the interrupt would kick as soon as the CPU
- * interface gets enabled, with deadly consequences).
- *
- * The solution is to examine already active LRs, and check the
- * interrupt is still enabled. If not, just retire it.
- */
-static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- int lr;
-
- for_each_set_bit(lr, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
- int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
- if (!vgic_irq_is_enabled(vcpu, irq)) {
- vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
- clear_bit(lr, vgic_cpu->lr_used);
- vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE;
- if (vgic_irq_is_active(vcpu, irq))
- vgic_irq_clear_active(vcpu, irq);
- }
- }
-}
-
-/*
- * Queue an interrupt to a CPU virtual interface. Return true on success,
- * or false if it wasn't possible to queue it.
- */
-static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- int lr;
-
- /* Sanitize the input... */
- BUG_ON(sgi_source_id & ~7);
- BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
- BUG_ON(irq >= VGIC_NR_IRQS);
-
- kvm_debug("Queue IRQ%d\n", irq);
-
- lr = vgic_cpu->vgic_irq_lr_map[irq];
-
- /* Do we have an active interrupt for the same CPUID? */
- if (lr != LR_EMPTY &&
- (LR_CPUID(vgic_cpu->vgic_lr[lr]) == sgi_source_id)) {
- kvm_debug("LR%d piggyback for IRQ%d %x\n",
- lr, irq, vgic_cpu->vgic_lr[lr]);
- BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
- vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT;
- return true;
- }
-
- /* Try to use another LR for this interrupt */
- lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
- vgic_cpu->nr_lr);
- if (lr >= vgic_cpu->nr_lr)
- return false;
-
- kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
- vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
- vgic_cpu->vgic_irq_lr_map[irq] = lr;
- set_bit(lr, vgic_cpu->lr_used);
-
- if (!vgic_irq_is_edge(vcpu, irq))
- vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI;
-
- return true;
-}
-
-static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- unsigned long sources;
- int vcpu_id = vcpu->vcpu_id;
- int c;
-
- sources = dist->irq_sgi_sources[vcpu_id][irq];
-
- for_each_set_bit(c, &sources, VGIC_MAX_CPUS) {
- if (vgic_queue_irq(vcpu, c, irq))
- clear_bit(c, &sources);
- }
-
- dist->irq_sgi_sources[vcpu_id][irq] = sources;
-
- /*
- * If the sources bitmap has been cleared it means that we
- * could queue all the SGIs onto link registers (see the
- * clear_bit above), and therefore we are done with them in
- * our emulated gic and can get rid of them.
- */
- if (!sources) {
- vgic_dist_irq_clear(vcpu, irq);
- vgic_cpu_irq_clear(vcpu, irq);
- return true;
- }
-
- return false;
-}
-
-static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
-{
- if (vgic_irq_is_active(vcpu, irq))
- return true; /* level interrupt, already queued */
-
- if (vgic_queue_irq(vcpu, 0, irq)) {
- if (vgic_irq_is_edge(vcpu, irq)) {
- vgic_dist_irq_clear(vcpu, irq);
- vgic_cpu_irq_clear(vcpu, irq);
- } else {
- vgic_irq_set_active(vcpu, irq);
- }
-
- return true;
- }
-
- return false;
-}
-
-/*
- * Fill the list registers with pending interrupts before running the
- * guest.
- */
-static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- int i, vcpu_id;
- int overflow = 0;
-
- vcpu_id = vcpu->vcpu_id;
-
- /*
- * We may not have any pending interrupt, or the interrupts
- * may have been serviced from another vcpu. In all cases,
- * move along.
- */
- if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
- pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
- goto epilog;
- }
-
- /* SGIs */
- for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
- if (!vgic_queue_sgi(vcpu, i))
- overflow = 1;
- }
-
- /* PPIs */
- for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
- if (!vgic_queue_hwirq(vcpu, i))
- overflow = 1;
- }
-
- /* SPIs */
- for_each_set_bit(i, vgic_cpu->pending_shared, VGIC_NR_SHARED_IRQS) {
- if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
- overflow = 1;
- }
-
-epilog:
- if (overflow) {
- vgic_cpu->vgic_hcr |= GICH_HCR_UIE;
- } else {
- vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
- /*
- * We're about to run this VCPU, and we've consumed
- * everything the distributor had in store for
- * us. Claim we don't have anything pending. We'll
- * adjust that if needed while exiting.
- */
- clear_bit(vcpu_id, &dist->irq_pending_on_cpu);
- }
-}
-
-static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- bool level_pending = false;
-
- kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
-
- if (vgic_cpu->vgic_misr & GICH_MISR_EOI) {
- /*
- * Some level interrupts have been EOIed. Clear their
- * active bit.
- */
- int lr, irq;
-
- for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr,
- vgic_cpu->nr_lr) {
- irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
- vgic_irq_clear_active(vcpu, irq);
- vgic_cpu->vgic_lr[lr] &= ~GICH_LR_EOI;
-
- /* Any additional pending interrupt? */
- if (vgic_dist_irq_is_pending(vcpu, irq)) {
- vgic_cpu_irq_set(vcpu, irq);
- level_pending = true;
- } else {
- vgic_cpu_irq_clear(vcpu, irq);
- }
-
- /*
- * Despite being EOIed, the LR may not have
- * been marked as empty.
- */
- set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
- vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT;
- }
- }
-
- if (vgic_cpu->vgic_misr & GICH_MISR_U)
- vgic_cpu->vgic_hcr &= ~GICH_HCR_UIE;
-
- return level_pending;
-}
-
-/*
- * Sync back the VGIC state after a guest run. The distributor lock is
- * needed so we don't get preempted in the middle of the state processing.
- */
-static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- int lr, pending;
- bool level_pending;
-
- level_pending = vgic_process_maintenance(vcpu);
-
- /* Clear mappings for empty LRs */
- for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
- vgic_cpu->nr_lr) {
- int irq;
-
- if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
- continue;
-
- irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
-
- BUG_ON(irq >= VGIC_NR_IRQS);
- vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
- }
-
- /* Check if we still have something up our sleeve... */
- pending = find_first_zero_bit((unsigned long *)vgic_cpu->vgic_elrsr,
- vgic_cpu->nr_lr);
- if (level_pending || pending < vgic_cpu->nr_lr)
- set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
-}
-
-void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- if (!irqchip_in_kernel(vcpu->kvm))
- return;
-
- spin_lock(&dist->lock);
- __kvm_vgic_flush_hwstate(vcpu);
- spin_unlock(&dist->lock);
-}
-
-void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- if (!irqchip_in_kernel(vcpu->kvm))
- return;
-
- spin_lock(&dist->lock);
- __kvm_vgic_sync_hwstate(vcpu);
- spin_unlock(&dist->lock);
-}
-
-int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
- if (!irqchip_in_kernel(vcpu->kvm))
- return 0;
-
- return test_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
-}
-
-static void vgic_kick_vcpus(struct kvm *kvm)
-{
- struct kvm_vcpu *vcpu;
- int c;
-
- /*
- * We've injected an interrupt, time to find out who deserves
- * a good kick...
- */
- kvm_for_each_vcpu(c, vcpu, kvm) {
- if (kvm_vgic_vcpu_pending_irq(vcpu))
- kvm_vcpu_kick(vcpu);
- }
-}
-
-static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
-{
- int is_edge = vgic_irq_is_edge(vcpu, irq);
- int state = vgic_dist_irq_is_pending(vcpu, irq);
-
- /*
- * Only inject an interrupt if:
- * - edge triggered and we have a rising edge
- * - level triggered and we change level
- */
- if (is_edge)
- return level > state;
- else
- return level != state;
-}
-
-static bool vgic_update_irq_state(struct kvm *kvm, int cpuid,
- unsigned int irq_num, bool level)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct kvm_vcpu *vcpu;
- int is_edge, is_level;
- int enabled;
- bool ret = true;
-
- spin_lock(&dist->lock);
-
- vcpu = kvm_get_vcpu(kvm, cpuid);
- is_edge = vgic_irq_is_edge(vcpu, irq_num);
- is_level = !is_edge;
-
- if (!vgic_validate_injection(vcpu, irq_num, level)) {
- ret = false;
- goto out;
- }
-
- if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
- cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
- vcpu = kvm_get_vcpu(kvm, cpuid);
- }
-
- kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
-
- if (level)
- vgic_dist_irq_set(vcpu, irq_num);
- else
- vgic_dist_irq_clear(vcpu, irq_num);
-
- enabled = vgic_irq_is_enabled(vcpu, irq_num);
-
- if (!enabled) {
- ret = false;
- goto out;
- }
-
- if (is_level && vgic_irq_is_active(vcpu, irq_num)) {
- /*
- * Level interrupt in progress, will be picked up
- * when EOId.
- */
- ret = false;
- goto out;
- }
-
- if (level) {
- vgic_cpu_irq_set(vcpu, irq_num);
- set_bit(cpuid, &dist->irq_pending_on_cpu);
- }
-
-out:
- spin_unlock(&dist->lock);
-
- return ret;
-}
-
-/**
- * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
- * @kvm: The VM structure pointer
- * @cpuid: The CPU for PPIs
- * @irq_num: The IRQ number that is assigned to the device
- * @level: Edge-triggered: true: to trigger the interrupt
- * false: to ignore the call
- * Level-sensitive true: activates an interrupt
- * false: deactivates an interrupt
- *
- * The GIC is not concerned with devices being active-LOW or active-HIGH for
- * level-sensitive interrupts. You can think of the level parameter as 1
- * being HIGH and 0 being LOW and all devices being active-HIGH.
- */
-int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
- bool level)
-{
- if (vgic_update_irq_state(kvm, cpuid, irq_num, level))
- vgic_kick_vcpus(kvm);
-
- return 0;
-}
-
-static irqreturn_t vgic_maintenance_handler(int irq, void *data)
-{
- /*
- * We cannot rely on the vgic maintenance interrupt to be
- * delivered synchronously. This means we can only use it to
- * exit the VM, and we perform the handling of EOIed
- * interrupts on the exit path (see vgic_process_maintenance).
- */
- return IRQ_HANDLED;
-}
-
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- int i;
-
- if (!irqchip_in_kernel(vcpu->kvm))
- return 0;
-
- if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
- return -EBUSY;
-
- for (i = 0; i < VGIC_NR_IRQS; i++) {
- if (i < VGIC_NR_PPIS)
- vgic_bitmap_set_irq_val(&dist->irq_enabled,
- vcpu->vcpu_id, i, 1);
- if (i < VGIC_NR_PRIVATE_IRQS)
- vgic_bitmap_set_irq_val(&dist->irq_cfg,
- vcpu->vcpu_id, i, VGIC_CFG_EDGE);
-
- vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
- }
-
- /*
- * By forcing VMCR to zero, the GIC will restore the binary
- * points to their reset values. Anything else resets to zero
- * anyway.
- */
- vgic_cpu->vgic_vmcr = 0;
-
- vgic_cpu->nr_lr = vgic_nr_lr;
- vgic_cpu->vgic_hcr = GICH_HCR_EN; /* Get the show on the road... */
-
- return 0;
-}
-
-static void vgic_init_maintenance_interrupt(void *info)
-{
- enable_percpu_irq(vgic_maint_irq, 0);
-}
-
-static int vgic_cpu_notify(struct notifier_block *self,
- unsigned long action, void *cpu)
-{
- switch (action) {
- case CPU_STARTING:
- case CPU_STARTING_FROZEN:
- vgic_init_maintenance_interrupt(NULL);
- break;
- case CPU_DYING:
- case CPU_DYING_FROZEN:
- disable_percpu_irq(vgic_maint_irq);
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block vgic_cpu_nb = {
- .notifier_call = vgic_cpu_notify,
-};
-
-int kvm_vgic_hyp_init(void)
-{
- int ret;
- struct resource vctrl_res;
- struct resource vcpu_res;
-
- vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic");
- if (!vgic_node) {
- kvm_err("error: no compatible vgic node in DT\n");
- return -ENODEV;
- }
-
- vgic_maint_irq = irq_of_parse_and_map(vgic_node, 0);
- if (!vgic_maint_irq) {
- kvm_err("error getting vgic maintenance irq from DT\n");
- ret = -ENXIO;
- goto out;
- }
-
- ret = request_percpu_irq(vgic_maint_irq, vgic_maintenance_handler,
- "vgic", kvm_get_running_vcpus());
- if (ret) {
- kvm_err("Cannot register interrupt %d\n", vgic_maint_irq);
- goto out;
- }
-
- ret = register_cpu_notifier(&vgic_cpu_nb);
- if (ret) {
- kvm_err("Cannot register vgic CPU notifier\n");
- goto out_free_irq;
- }
-
- ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
- if (ret) {
- kvm_err("Cannot obtain VCTRL resource\n");
- goto out_free_irq;
- }
-
- vgic_vctrl_base = of_iomap(vgic_node, 2);
- if (!vgic_vctrl_base) {
- kvm_err("Cannot ioremap VCTRL\n");
- ret = -ENOMEM;
- goto out_free_irq;
- }
-
- vgic_nr_lr = readl_relaxed(vgic_vctrl_base + GICH_VTR);
- vgic_nr_lr = (vgic_nr_lr & 0x3f) + 1;
-
- ret = create_hyp_io_mappings(vgic_vctrl_base,
- vgic_vctrl_base + resource_size(&vctrl_res),
- vctrl_res.start);
- if (ret) {
- kvm_err("Cannot map VCTRL into hyp\n");
- goto out_unmap;
- }
-
- kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
- vctrl_res.start, vgic_maint_irq);
- on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
- if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
- kvm_err("Cannot obtain VCPU resource\n");
- ret = -ENXIO;
- goto out_unmap;
- }
- vgic_vcpu_base = vcpu_res.start;
-
- goto out;
-
-out_unmap:
- iounmap(vgic_vctrl_base);
-out_free_irq:
- free_percpu_irq(vgic_maint_irq, kvm_get_running_vcpus());
-out:
- of_node_put(vgic_node);
- return ret;
-}
-
-int kvm_vgic_init(struct kvm *kvm)
-{
- int ret = 0, i;
-
- mutex_lock(&kvm->lock);
-
- if (vgic_initialized(kvm))
- goto out;
-
- if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
- IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
- kvm_err("Need to set vgic cpu and dist addresses first\n");
- ret = -ENXIO;
- goto out;
- }
-
- ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
- vgic_vcpu_base, KVM_VGIC_V2_CPU_SIZE);
- if (ret) {
- kvm_err("Unable to remap VGIC CPU to VCPU\n");
- goto out;
- }
-
- for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
- vgic_set_target_reg(kvm, 0, i);
-
- kvm_timer_init(kvm);
- kvm->arch.vgic.ready = true;
-out:
- mutex_unlock(&kvm->lock);
- return ret;
-}
-
-int kvm_vgic_create(struct kvm *kvm)
-{
- int ret = 0;
-
- mutex_lock(&kvm->lock);
-
- if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
- ret = -EEXIST;
- goto out;
- }
-
- spin_lock_init(&kvm->arch.vgic.lock);
- kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
- kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
- kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
-
-out:
- mutex_unlock(&kvm->lock);
- return ret;
-}
-
-static bool vgic_ioaddr_overlap(struct kvm *kvm)
-{
- phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
- phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
-
- if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
- return 0;
- if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
- (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
- return -EBUSY;
- return 0;
-}
-
-static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
- phys_addr_t addr, phys_addr_t size)
-{
- int ret;
-
- if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
- return -EEXIST;
- if (addr + size < addr)
- return -EINVAL;
-
- ret = vgic_ioaddr_overlap(kvm);
- if (ret)
- return ret;
- *ioaddr = addr;
- return ret;
-}
-
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
- int r = 0;
- struct vgic_dist *vgic = &kvm->arch.vgic;
-
- if (addr & ~KVM_PHYS_MASK)
- return -E2BIG;
-
- if (addr & (SZ_4K - 1))
- return -EINVAL;
-
- mutex_lock(&kvm->lock);
- switch (type) {
- case KVM_VGIC_V2_ADDR_TYPE_DIST:
- r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
- addr, KVM_VGIC_V2_DIST_SIZE);
- break;
- case KVM_VGIC_V2_ADDR_TYPE_CPU:
- r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
- addr, KVM_VGIC_V2_CPU_SIZE);
- break;
- default:
- r = -ENODEV;
- }
-
- mutex_unlock(&kvm->lock);
- return r;
-}
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8fb007..3bc8eb811a73 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -197,24 +197,24 @@
12: PLD( pld [r1, #124] )
13: ldr4w r1, r4, r5, r6, r7, abort=19f
- mov r3, lr, pull #\pull
+ mov r3, lr, lspull #\pull
subs r2, r2, #32
ldr4w r1, r8, r9, ip, lr, abort=19f
- orr r3, r3, r4, push #\push
- mov r4, r4, pull #\pull
- orr r4, r4, r5, push #\push
- mov r5, r5, pull #\pull
- orr r5, r5, r6, push #\push
- mov r6, r6, pull #\pull
- orr r6, r6, r7, push #\push
- mov r7, r7, pull #\pull
- orr r7, r7, r8, push #\push
- mov r8, r8, pull #\pull
- orr r8, r8, r9, push #\push
- mov r9, r9, pull #\pull
- orr r9, r9, ip, push #\push
- mov ip, ip, pull #\pull
- orr ip, ip, lr, push #\push
+ orr r3, r3, r4, lspush #\push
+ mov r4, r4, lspull #\pull
+ orr r4, r4, r5, lspush #\push
+ mov r5, r5, lspull #\pull
+ orr r5, r5, r6, lspush #\push
+ mov r6, r6, lspull #\pull
+ orr r6, r6, r7, lspush #\push
+ mov r7, r7, lspull #\pull
+ orr r7, r7, r8, lspush #\push
+ mov r8, r8, lspull #\pull
+ orr r8, r8, r9, lspush #\push
+ mov r9, r9, lspull #\pull
+ orr r9, r9, ip, lspush #\push
+ mov ip, ip, lspull #\pull
+ orr ip, ip, lr, lspush #\push
str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
bge 12b
PLD( cmn r2, #96 )
@@ -225,10 +225,10 @@
14: ands ip, r2, #28
beq 16f
-15: mov r3, lr, pull #\pull
+15: mov r3, lr, lspull #\pull
ldr1w r1, lr, abort=21f
subs ip, ip, #4
- orr r3, r3, lr, push #\push
+ orr r3, r3, lr, lspush #\push
str1w r0, r3, abort=21f
bgt 15b
CALGN( cmp r2, #0 )
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d620a5f22a09..d6e742d24007 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -141,7 +141,7 @@ FN_ENTRY
tst len, #2
mov r5, r4, get_byte_0
beq .Lexit
- adcs sum, sum, r4, push #16
+ adcs sum, sum, r4, lspush #16
strb r5, [dst], #1
mov r5, r4, get_byte_1
strb r5, [dst], #1
@@ -171,23 +171,23 @@ FN_ENTRY
cmp ip, #2
beq .Lsrc2_aligned
bhi .Lsrc3_aligned
- mov r4, r5, pull #8 @ C = 0
+ mov r4, r5, lspull #8 @ C = 0
bics ip, len, #15
beq 2f
1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- mov r6, r6, pull #8
- orr r6, r6, r7, push #24
- mov r7, r7, pull #8
- orr r7, r7, r8, push #24
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ mov r6, r6, lspull #8
+ orr r6, r6, r7, lspush #24
+ mov r7, r7, lspull #8
+ orr r7, r7, r8, lspush #24
stmia dst!, {r4, r5, r6, r7}
adcs sum, sum, r4
adcs sum, sum, r5
adcs sum, sum, r6
adcs sum, sum, r7
- mov r4, r8, pull #8
+ mov r4, r8, lspull #8
sub ip, ip, #16
teq ip, #0
bne 1b
@@ -196,50 +196,50 @@ FN_ENTRY
tst ip, #8
beq 3f
load2l r5, r6
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
stmia dst!, {r4, r5}
adcs sum, sum, r4
adcs sum, sum, r5
- mov r4, r6, pull #8
+ mov r4, r6, lspull #8
tst ip, #4
beq 4f
3: load1l r5
- orr r4, r4, r5, push #24
+ orr r4, r4, r5, lspush #24
str r4, [dst], #4
adcs sum, sum, r4
- mov r4, r5, pull #8
+ mov r4, r5, lspull #8
4: ands len, len, #3
beq .Ldone
mov r5, r4, get_byte_0
tst len, #2
beq .Lexit
- adcs sum, sum, r4, push #16
+ adcs sum, sum, r4, lspush #16
strb r5, [dst], #1
mov r5, r4, get_byte_1
strb r5, [dst], #1
mov r5, r4, get_byte_2
b .Lexit
-.Lsrc2_aligned: mov r4, r5, pull #16
+.Lsrc2_aligned: mov r4, r5, lspull #16
adds sum, sum, #0
bics ip, len, #15
beq 2f
1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- mov r6, r6, pull #16
- orr r6, r6, r7, push #16
- mov r7, r7, pull #16
- orr r7, r7, r8, push #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ mov r6, r6, lspull #16
+ orr r6, r6, r7, lspush #16
+ mov r7, r7, lspull #16
+ orr r7, r7, r8, lspush #16
stmia dst!, {r4, r5, r6, r7}
adcs sum, sum, r4
adcs sum, sum, r5
adcs sum, sum, r6
adcs sum, sum, r7
- mov r4, r8, pull #16
+ mov r4, r8, lspull #16
sub ip, ip, #16
teq ip, #0
bne 1b
@@ -248,20 +248,20 @@ FN_ENTRY
tst ip, #8
beq 3f
load2l r5, r6
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
stmia dst!, {r4, r5}
adcs sum, sum, r4
adcs sum, sum, r5
- mov r4, r6, pull #16
+ mov r4, r6, lspull #16
tst ip, #4
beq 4f
3: load1l r5
- orr r4, r4, r5, push #16
+ orr r4, r4, r5, lspush #16
str r4, [dst], #4
adcs sum, sum, r4
- mov r4, r5, pull #16
+ mov r4, r5, lspull #16
4: ands len, len, #3
beq .Ldone
mov r5, r4, get_byte_0
@@ -276,24 +276,24 @@ FN_ENTRY
load1b r5
b .Lexit
-.Lsrc3_aligned: mov r4, r5, pull #24
+.Lsrc3_aligned: mov r4, r5, lspull #24
adds sum, sum, #0
bics ip, len, #15
beq 2f
1: load4l r5, r6, r7, r8
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- mov r6, r6, pull #24
- orr r6, r6, r7, push #8
- mov r7, r7, pull #24
- orr r7, r7, r8, push #8
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ mov r6, r6, lspull #24
+ orr r6, r6, r7, lspush #8
+ mov r7, r7, lspull #24
+ orr r7, r7, r8, lspush #8
stmia dst!, {r4, r5, r6, r7}
adcs sum, sum, r4
adcs sum, sum, r5
adcs sum, sum, r6
adcs sum, sum, r7
- mov r4, r8, pull #24
+ mov r4, r8, lspull #24
sub ip, ip, #16
teq ip, #0
bne 1b
@@ -302,20 +302,20 @@ FN_ENTRY
tst ip, #8
beq 3f
load2l r5, r6
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
stmia dst!, {r4, r5}
adcs sum, sum, r4
adcs sum, sum, r5
- mov r4, r6, pull #24
+ mov r4, r6, lspull #24
tst ip, #4
beq 4f
3: load1l r5
- orr r4, r4, r5, push #8
+ orr r4, r4, r5, lspush #8
str r4, [dst], #4
adcs sum, sum, r4
- mov r4, r5, pull #24
+ mov r4, r5, lspull #24
4: ands len, len, #3
beq .Ldone
mov r5, r4, get_byte_0
@@ -326,7 +326,7 @@ FN_ENTRY
load1l r4
mov r5, r4, get_byte_0
strb r5, [dst], #1
- adcs sum, sum, r4, push #24
+ adcs sum, sum, r4, lspush #24
mov r5, r4, get_byte_1
b .Lexit
FN_EXIT
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 5fb97e7f9f4b..7a7430950c79 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -47,25 +47,25 @@ ENTRY(__raw_readsl)
strb ip, [r1], #1
4: subs r2, r2, #1
- mov ip, r3, pull #24
+ mov ip, r3, lspull #24
ldrne r3, [r0]
- orrne ip, ip, r3, push #8
+ orrne ip, ip, r3, lspush #8
strne ip, [r1], #4
bne 4b
b 8f
5: subs r2, r2, #1
- mov ip, r3, pull #16
+ mov ip, r3, lspull #16
ldrne r3, [r0]
- orrne ip, ip, r3, push #16
+ orrne ip, ip, r3, lspush #16
strne ip, [r1], #4
bne 5b
b 7f
6: subs r2, r2, #1
- mov ip, r3, pull #8
+ mov ip, r3, lspull #8
ldrne r3, [r0]
- orrne ip, ip, r3, push #24
+ orrne ip, ip, r3, lspush #24
strne ip, [r1], #4
bne 6b
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index 8d3b7813725c..d0d104a0dd11 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -41,26 +41,26 @@ ENTRY(__raw_writesl)
blt 5f
bgt 6f
-4: mov ip, r3, pull #16
+4: mov ip, r3, lspull #16
ldr r3, [r1], #4
subs r2, r2, #1
- orr ip, ip, r3, push #16
+ orr ip, ip, r3, lspush #16
str ip, [r0]
bne 4b
mov pc, lr
-5: mov ip, r3, pull #8
+5: mov ip, r3, lspull #8
ldr r3, [r1], #4
subs r2, r2, #1
- orr ip, ip, r3, push #24
+ orr ip, ip, r3, lspush #24
str ip, [r0]
bne 5b
mov pc, lr
-6: mov ip, r3, pull #24
+6: mov ip, r3, lspull #24
ldr r3, [r1], #4
subs r2, r2, #1
- orr ip, ip, r3, push #8
+ orr ip, ip, r3, lspush #8
str ip, [r0]
bne 6b
mov pc, lr
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 938fc14f962d..d1fc0c0c342c 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -147,24 +147,24 @@ ENTRY(memmove)
12: PLD( pld [r1, #-128] )
13: ldmdb r1!, {r7, r8, r9, ip}
- mov lr, r3, push #\push
+ mov lr, r3, lspush #\push
subs r2, r2, #32
ldmdb r1!, {r3, r4, r5, r6}
- orr lr, lr, ip, pull #\pull
- mov ip, ip, push #\push
- orr ip, ip, r9, pull #\pull
- mov r9, r9, push #\push
- orr r9, r9, r8, pull #\pull
- mov r8, r8, push #\push
- orr r8, r8, r7, pull #\pull
- mov r7, r7, push #\push
- orr r7, r7, r6, pull #\pull
- mov r6, r6, push #\push
- orr r6, r6, r5, pull #\pull
- mov r5, r5, push #\push
- orr r5, r5, r4, pull #\pull
- mov r4, r4, push #\push
- orr r4, r4, r3, pull #\pull
+ orr lr, lr, ip, lspull #\pull
+ mov ip, ip, lspush #\push
+ orr ip, ip, r9, lspull #\pull
+ mov r9, r9, lspush #\push
+ orr r9, r9, r8, lspull #\pull
+ mov r8, r8, lspush #\push
+ orr r8, r8, r7, lspull #\pull
+ mov r7, r7, lspush #\push
+ orr r7, r7, r6, lspull #\pull
+ mov r6, r6, lspush #\push
+ orr r6, r6, r5, lspull #\pull
+ mov r5, r5, lspush #\push
+ orr r5, r5, r4, lspull #\pull
+ mov r4, r4, lspush #\push
+ orr r4, r4, r3, lspull #\pull
stmdb r0!, {r4 - r9, ip, lr}
bge 12b
PLD( cmn r2, #96 )
@@ -175,10 +175,10 @@ ENTRY(memmove)
14: ands ip, r2, #28
beq 16f
-15: mov lr, r3, push #\push
+15: mov lr, r3, lspush #\push
ldr r3, [r1, #-4]!
subs ip, ip, #4
- orr lr, lr, r3, pull #\pull
+ orr lr, lr, r3, lspull #\pull
str lr, [r0, #-4]!
bgt 15b
CALGN( cmp r2, #0 )
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
index 5c908b1cb8ed..e50520904b76 100644
--- a/arch/arm/lib/uaccess.S
+++ b/arch/arm/lib/uaccess.S
@@ -117,9 +117,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault
.Lc2u_1fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lc2u_1nowords
- mov r3, r7, pull #8
+ mov r3, r7, lspull #8
ldr r7, [r1], #4
- orr r3, r3, r7, push #24
+ orr r3, r3, r7, lspush #24
USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
@@ -131,30 +131,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault
subs ip, ip, #16
blt .Lc2u_1rem8lp
-.Lc2u_1cpy8lp: mov r3, r7, pull #8
+.Lc2u_1cpy8lp: mov r3, r7, lspull #8
ldmia r1!, {r4 - r7}
subs ip, ip, #16
- orr r3, r3, r4, push #24
- mov r4, r4, pull #8
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- mov r6, r6, pull #8
- orr r6, r6, r7, push #24
+ orr r3, r3, r4, lspush #24
+ mov r4, r4, lspull #8
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ mov r6, r6, lspull #8
+ orr r6, r6, r7, lspush #24
stmia r0!, {r3 - r6} @ Shouldnt fault
bpl .Lc2u_1cpy8lp
.Lc2u_1rem8lp: tst ip, #8
- movne r3, r7, pull #8
+ movne r3, r7, lspull #8
ldmneia r1!, {r4, r7}
- orrne r3, r3, r4, push #24
- movne r4, r4, pull #8
- orrne r4, r4, r7, push #24
+ orrne r3, r3, r4, lspush #24
+ movne r4, r4, lspull #8
+ orrne r4, r4, r7, lspush #24
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
- movne r3, r7, pull #8
+ movne r3, r7, lspull #8
ldrne r7, [r1], #4
- orrne r3, r3, r7, push #24
+ orrne r3, r3, r7, lspush #24
TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
beq .Lc2u_1fupi
@@ -172,9 +172,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault
.Lc2u_2fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lc2u_2nowords
- mov r3, r7, pull #16
+ mov r3, r7, lspull #16
ldr r7, [r1], #4
- orr r3, r3, r7, push #16
+ orr r3, r3, r7, lspush #16
USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
@@ -186,30 +186,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault
subs ip, ip, #16
blt .Lc2u_2rem8lp
-.Lc2u_2cpy8lp: mov r3, r7, pull #16
+.Lc2u_2cpy8lp: mov r3, r7, lspull #16
ldmia r1!, {r4 - r7}
subs ip, ip, #16
- orr r3, r3, r4, push #16
- mov r4, r4, pull #16
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- mov r6, r6, pull #16
- orr r6, r6, r7, push #16
+ orr r3, r3, r4, lspush #16
+ mov r4, r4, lspull #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ mov r6, r6, lspull #16
+ orr r6, r6, r7, lspush #16
stmia r0!, {r3 - r6} @ Shouldnt fault
bpl .Lc2u_2cpy8lp
.Lc2u_2rem8lp: tst ip, #8
- movne r3, r7, pull #16
+ movne r3, r7, lspull #16
ldmneia r1!, {r4, r7}
- orrne r3, r3, r4, push #16
- movne r4, r4, pull #16
- orrne r4, r4, r7, push #16
+ orrne r3, r3, r4, lspush #16
+ movne r4, r4, lspull #16
+ orrne r4, r4, r7, lspush #16
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
- movne r3, r7, pull #16
+ movne r3, r7, lspull #16
ldrne r7, [r1], #4
- orrne r3, r3, r7, push #16
+ orrne r3, r3, r7, lspush #16
TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
beq .Lc2u_2fupi
@@ -227,9 +227,9 @@ USER( TUSER( strgtb) r3, [r0], #1) @ May fault
.Lc2u_3fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lc2u_3nowords
- mov r3, r7, pull #24
+ mov r3, r7, lspull #24
ldr r7, [r1], #4
- orr r3, r3, r7, push #8
+ orr r3, r3, r7, lspush #8
USER( TUSER( str) r3, [r0], #4) @ May fault
mov ip, r0, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
@@ -241,30 +241,30 @@ USER( TUSER( str) r3, [r0], #4) @ May fault
subs ip, ip, #16
blt .Lc2u_3rem8lp
-.Lc2u_3cpy8lp: mov r3, r7, pull #24
+.Lc2u_3cpy8lp: mov r3, r7, lspull #24
ldmia r1!, {r4 - r7}
subs ip, ip, #16
- orr r3, r3, r4, push #8
- mov r4, r4, pull #24
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- mov r6, r6, pull #24
- orr r6, r6, r7, push #8
+ orr r3, r3, r4, lspush #8
+ mov r4, r4, lspull #24
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ mov r6, r6, lspull #24
+ orr r6, r6, r7, lspush #8
stmia r0!, {r3 - r6} @ Shouldnt fault
bpl .Lc2u_3cpy8lp
.Lc2u_3rem8lp: tst ip, #8
- movne r3, r7, pull #24
+ movne r3, r7, lspull #24
ldmneia r1!, {r4, r7}
- orrne r3, r3, r4, push #8
- movne r4, r4, pull #24
- orrne r4, r4, r7, push #8
+ orrne r3, r3, r4, lspush #8
+ movne r4, r4, lspull #24
+ orrne r4, r4, r7, lspush #8
stmneia r0!, {r3 - r4} @ Shouldnt fault
tst ip, #4
- movne r3, r7, pull #24
+ movne r3, r7, lspull #24
ldrne r7, [r1], #4
- orrne r3, r3, r7, push #8
+ orrne r3, r3, r7, lspush #8
TUSER( strne) r3, [r0], #4 @ Shouldnt fault
ands ip, ip, #3
beq .Lc2u_3fupi
@@ -382,9 +382,9 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault
.Lcfu_1fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lcfu_1nowords
- mov r3, r7, pull #8
+ mov r3, r7, lspull #8
USER( TUSER( ldr) r7, [r1], #4) @ May fault
- orr r3, r3, r7, push #24
+ orr r3, r3, r7, lspush #24
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
@@ -396,30 +396,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault
subs ip, ip, #16
blt .Lcfu_1rem8lp
-.Lcfu_1cpy8lp: mov r3, r7, pull #8
+.Lcfu_1cpy8lp: mov r3, r7, lspull #8
ldmia r1!, {r4 - r7} @ Shouldnt fault
subs ip, ip, #16
- orr r3, r3, r4, push #24
- mov r4, r4, pull #8
- orr r4, r4, r5, push #24
- mov r5, r5, pull #8
- orr r5, r5, r6, push #24
- mov r6, r6, pull #8
- orr r6, r6, r7, push #24
+ orr r3, r3, r4, lspush #24
+ mov r4, r4, lspull #8
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ mov r6, r6, lspull #8
+ orr r6, r6, r7, lspush #24
stmia r0!, {r3 - r6}
bpl .Lcfu_1cpy8lp
.Lcfu_1rem8lp: tst ip, #8
- movne r3, r7, pull #8
+ movne r3, r7, lspull #8
ldmneia r1!, {r4, r7} @ Shouldnt fault
- orrne r3, r3, r4, push #24
- movne r4, r4, pull #8
- orrne r4, r4, r7, push #24
+ orrne r3, r3, r4, lspush #24
+ movne r4, r4, lspull #8
+ orrne r4, r4, r7, lspush #24
stmneia r0!, {r3 - r4}
tst ip, #4
- movne r3, r7, pull #8
+ movne r3, r7, lspull #8
USER( TUSER( ldrne) r7, [r1], #4) @ May fault
- orrne r3, r3, r7, push #24
+ orrne r3, r3, r7, lspush #24
strne r3, [r0], #4
ands ip, ip, #3
beq .Lcfu_1fupi
@@ -437,9 +437,9 @@ USER( TUSER( ldrne) r7, [r1], #4) @ May fault
.Lcfu_2fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lcfu_2nowords
- mov r3, r7, pull #16
+ mov r3, r7, lspull #16
USER( TUSER( ldr) r7, [r1], #4) @ May fault
- orr r3, r3, r7, push #16
+ orr r3, r3, r7, lspush #16
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
@@ -452,30 +452,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault
blt .Lcfu_2rem8lp
-.Lcfu_2cpy8lp: mov r3, r7, pull #16
+.Lcfu_2cpy8lp: mov r3, r7, lspull #16
ldmia r1!, {r4 - r7} @ Shouldnt fault
subs ip, ip, #16
- orr r3, r3, r4, push #16
- mov r4, r4, pull #16
- orr r4, r4, r5, push #16
- mov r5, r5, pull #16
- orr r5, r5, r6, push #16
- mov r6, r6, pull #16
- orr r6, r6, r7, push #16
+ orr r3, r3, r4, lspush #16
+ mov r4, r4, lspull #16
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ mov r6, r6, lspull #16
+ orr r6, r6, r7, lspush #16
stmia r0!, {r3 - r6}
bpl .Lcfu_2cpy8lp
.Lcfu_2rem8lp: tst ip, #8
- movne r3, r7, pull #16
+ movne r3, r7, lspull #16
ldmneia r1!, {r4, r7} @ Shouldnt fault
- orrne r3, r3, r4, push #16
- movne r4, r4, pull #16
- orrne r4, r4, r7, push #16
+ orrne r3, r3, r4, lspush #16
+ movne r4, r4, lspull #16
+ orrne r4, r4, r7, lspush #16
stmneia r0!, {r3 - r4}
tst ip, #4
- movne r3, r7, pull #16
+ movne r3, r7, lspull #16
USER( TUSER( ldrne) r7, [r1], #4) @ May fault
- orrne r3, r3, r7, push #16
+ orrne r3, r3, r7, lspush #16
strne r3, [r0], #4
ands ip, ip, #3
beq .Lcfu_2fupi
@@ -493,9 +493,9 @@ USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault
.Lcfu_3fupi: subs r2, r2, #4
addmi ip, r2, #4
bmi .Lcfu_3nowords
- mov r3, r7, pull #24
+ mov r3, r7, lspull #24
USER( TUSER( ldr) r7, [r1], #4) @ May fault
- orr r3, r3, r7, push #8
+ orr r3, r3, r7, lspush #8
str r3, [r0], #4
mov ip, r1, lsl #32 - PAGE_SHIFT
rsb ip, ip, #0
@@ -507,30 +507,30 @@ USER( TUSER( ldr) r7, [r1], #4) @ May fault
subs ip, ip, #16
blt .Lcfu_3rem8lp
-.Lcfu_3cpy8lp: mov r3, r7, pull #24
+.Lcfu_3cpy8lp: mov r3, r7, lspull #24
ldmia r1!, {r4 - r7} @ Shouldnt fault
- orr r3, r3, r4, push #8
- mov r4, r4, pull #24
- orr r4, r4, r5, push #8
- mov r5, r5, pull #24
- orr r5, r5, r6, push #8
- mov r6, r6, pull #24
- orr r6, r6, r7, push #8
+ orr r3, r3, r4, lspush #8
+ mov r4, r4, lspull #24
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ mov r6, r6, lspull #24
+ orr r6, r6, r7, lspush #8
stmia r0!, {r3 - r6}
subs ip, ip, #16
bpl .Lcfu_3cpy8lp
.Lcfu_3rem8lp: tst ip, #8
- movne r3, r7, pull #24
+ movne r3, r7, lspull #24
ldmneia r1!, {r4, r7} @ Shouldnt fault
- orrne r3, r3, r4, push #8
- movne r4, r4, pull #24
- orrne r4, r4, r7, push #8
+ orrne r3, r3, r4, lspush #8
+ movne r4, r4, lspull #24
+ orrne r4, r4, r7, lspush #8
stmneia r0!, {r3 - r4}
tst ip, #4
- movne r3, r7, pull #24
+ movne r3, r7, lspull #24
USER( TUSER( ldrne) r7, [r1], #4) @ May fault
- orrne r3, r3, r7, push #8
+ orrne r3, r3, r7, lspush #8
strne r3, [r0], #4
ands ip, ip, #3
beq .Lcfu_3fupi
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index c61d2373408c..990837379112 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -10,6 +10,7 @@
#include <asm/system_info.h>
pgd_t *idmap_pgd;
+phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
#ifdef CONFIG_ARM_LPAE
static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
@@ -74,8 +75,8 @@ static void identity_mapping_add(pgd_t *pgd, const char *text_start,
unsigned long addr, end;
unsigned long next;
- addr = virt_to_phys(text_start);
- end = virt_to_phys(text_end);
+ addr = virt_to_idmap(text_start);
+ end = virt_to_idmap(text_end);
prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e0a9b48b02c3..279594b83781 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -443,6 +443,8 @@ source "drivers/firmware/Kconfig"
source "fs/Kconfig"
+source "arch/arm64/kvm/Kconfig"
+
source "arch/arm64/Kconfig.debug"
source "security/Kconfig"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 8f63c8a21b7e..8a311839e2d7 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -43,6 +43,7 @@ TEXT_OFFSET := 0x00080000
export TEXT_OFFSET GZFLAGS
core-y += arch/arm64/kernel/ arch/arm64/mm/
+core-$(CONFIG_KVM) += arch/arm64/kvm/
core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
libs-y := arch/arm64/lib/ $(libs-y)
libs-y += $(LIBGCC)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index c404fb0df3a6..27f54a7cc81b 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -41,6 +41,7 @@
#define ARM_CPU_PART_AEM_V8 0xD0F0
#define ARM_CPU_PART_FOUNDATION 0xD000
+#define ARM_CPU_PART_CORTEX_A53 0xD030
#define ARM_CPU_PART_CORTEX_A57 0xD070
#define APM_CPU_PART_POTENZA 0x0000
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 7c951a510b54..aab72ce22348 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -18,6 +18,15 @@
#ifdef __KERNEL__
+/* Low-level stepping controls. */
+#define DBG_MDSCR_SS (1 << 0)
+#define DBG_SPSR_SS (1 << 21)
+
+/* MDSCR_EL1 enabling bits */
+#define DBG_MDSCR_KDE (1 << 13)
+#define DBG_MDSCR_MDE (1 << 15)
+#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
+
#define DBG_ESR_EVT(x) (((x) >> 27) & 0x7)
/* AArch64 */
@@ -73,11 +82,6 @@
#define CACHE_FLUSH_IS_SAFE 1
-enum debug_el {
- DBG_ACTIVE_EL0 = 0,
- DBG_ACTIVE_EL1,
-};
-
/* AArch32 */
#define DBG_ESR_EVT_BKPT 0x4
#define DBG_ESR_EVT_VECC 0x5
@@ -115,6 +119,11 @@ void unregister_break_hook(struct break_hook *hook);
u8 debug_monitors_arch(void);
+enum debug_el {
+ DBG_ACTIVE_EL0 = 0,
+ DBG_ACTIVE_EL1,
+};
+
void enable_debug_monitors(enum debug_el el);
void disable_debug_monitors(enum debug_el el);
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
new file mode 100644
index 000000000000..7fd3e27e3ccc
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_ARM_H__
+#define __ARM64_KVM_ARM_H__
+
+#include <asm/types.h>
+
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_ID (UL(1) << 33)
+#define HCR_CD (UL(1) << 32)
+#define HCR_RW_SHIFT 31
+#define HCR_RW (UL(1) << HCR_RW_SHIFT)
+#define HCR_TRVM (UL(1) << 30)
+#define HCR_HCD (UL(1) << 29)
+#define HCR_TDZ (UL(1) << 28)
+#define HCR_TGE (UL(1) << 27)
+#define HCR_TVM (UL(1) << 26)
+#define HCR_TTLB (UL(1) << 25)
+#define HCR_TPU (UL(1) << 24)
+#define HCR_TPC (UL(1) << 23)
+#define HCR_TSW (UL(1) << 22)
+#define HCR_TAC (UL(1) << 21)
+#define HCR_TIDCP (UL(1) << 20)
+#define HCR_TSC (UL(1) << 19)
+#define HCR_TID3 (UL(1) << 18)
+#define HCR_TID2 (UL(1) << 17)
+#define HCR_TID1 (UL(1) << 16)
+#define HCR_TID0 (UL(1) << 15)
+#define HCR_TWE (UL(1) << 14)
+#define HCR_TWI (UL(1) << 13)
+#define HCR_DC (UL(1) << 12)
+#define HCR_BSU (3 << 10)
+#define HCR_BSU_IS (UL(1) << 10)
+#define HCR_FB (UL(1) << 9)
+#define HCR_VA (UL(1) << 8)
+#define HCR_VI (UL(1) << 7)
+#define HCR_VF (UL(1) << 6)
+#define HCR_AMO (UL(1) << 5)
+#define HCR_IMO (UL(1) << 4)
+#define HCR_FMO (UL(1) << 3)
+#define HCR_PTW (UL(1) << 2)
+#define HCR_SWIO (UL(1) << 1)
+#define HCR_VM (UL(1) << 0)
+
+/*
+ * The bits we set in HCR:
+ * RW: 64bit by default, can be overriden for 32bit VMs
+ * TAC: Trap ACTLR
+ * TSC: Trap SMC
+ * TVM: Trap VM ops (until M+C set in SCTLR_EL1)
+ * TSW: Trap cache operations by set/way
+ * TWE: Trap WFE
+ * TWI: Trap WFI
+ * TIDCP: Trap L2CTLR/L2ECTLR
+ * BSU_IS: Upgrade barriers to the inner shareable domain
+ * FB: Force broadcast of all maintainance operations
+ * AMO: Override CPSR.A and enable signaling with VA
+ * IMO: Override CPSR.I and enable signaling with VI
+ * FMO: Override CPSR.F and enable signaling with VF
+ * SWIO: Turn set/way invalidates into set/way clean+invalidate
+ */
+#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
+ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
+ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+#define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO)
+
+
+/* Hyp System Control Register (SCTLR_EL2) bits */
+#define SCTLR_EL2_EE (1 << 25)
+#define SCTLR_EL2_WXN (1 << 19)
+#define SCTLR_EL2_I (1 << 12)
+#define SCTLR_EL2_SA (1 << 3)
+#define SCTLR_EL2_C (1 << 2)
+#define SCTLR_EL2_A (1 << 1)
+#define SCTLR_EL2_M 1
+#define SCTLR_EL2_FLAGS (SCTLR_EL2_M | SCTLR_EL2_A | SCTLR_EL2_C | \
+ SCTLR_EL2_SA | SCTLR_EL2_I)
+
+/* TCR_EL2 Registers bits */
+#define TCR_EL2_TBI (1 << 20)
+#define TCR_EL2_PS (7 << 16)
+#define TCR_EL2_PS_40B (2 << 16)
+#define TCR_EL2_TG0 (1 << 14)
+#define TCR_EL2_SH0 (3 << 12)
+#define TCR_EL2_ORGN0 (3 << 10)
+#define TCR_EL2_IRGN0 (3 << 8)
+#define TCR_EL2_T0SZ 0x3f
+#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \
+ TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
+
+#define TCR_EL2_FLAGS (TCR_EL2_PS_40B)
+
+/* VTCR_EL2 Registers bits */
+#define VTCR_EL2_PS_MASK (7 << 16)
+#define VTCR_EL2_TG0_MASK (1 << 14)
+#define VTCR_EL2_TG0_4K (0 << 14)
+#define VTCR_EL2_TG0_64K (1 << 14)
+#define VTCR_EL2_SH0_MASK (3 << 12)
+#define VTCR_EL2_SH0_INNER (3 << 12)
+#define VTCR_EL2_ORGN0_MASK (3 << 10)
+#define VTCR_EL2_ORGN0_WBWA (1 << 10)
+#define VTCR_EL2_IRGN0_MASK (3 << 8)
+#define VTCR_EL2_IRGN0_WBWA (1 << 8)
+#define VTCR_EL2_SL0_MASK (3 << 6)
+#define VTCR_EL2_SL0_LVL1 (1 << 6)
+#define VTCR_EL2_T0SZ_MASK 0x3f
+#define VTCR_EL2_T0SZ_40B 24
+
+/*
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together.
+ *
+ * The magic numbers used for VTTBR_X in this patch can be found in Tables
+ * D4-23 and D4-25 in ARM DDI 0487A.b.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+/*
+ * Stage2 translation configuration:
+ * 40bits output (PS = 2)
+ * 40bits input (T0SZ = 24)
+ * 64kB pages (TG0 = 1)
+ * 2 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
+ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+ VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
+#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B)
+#else
+/*
+ * Stage2 translation configuration:
+ * 40bits output (PS = 2)
+ * 40bits input (T0SZ = 24)
+ * 4kB pages (TG0 = 0)
+ * 3 level page tables (SL = 1)
+ */
+#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
+ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+ VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
+#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
+#endif
+
+#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
+#define VTTBR_BADDR_MASK (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT (48LLU)
+#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT)
+
+/* Hyp System Trap Register */
+#define HSTR_EL2_TTEE (1 << 16)
+#define HSTR_EL2_T(x) (1 << x)
+
+/* Hyp Coprocessor Trap Register */
+#define CPTR_EL2_TCPAC (1 << 31)
+#define CPTR_EL2_TTA (1 << 20)
+#define CPTR_EL2_TFP (1 << 10)
+
+/* Hyp Debug Configuration Register bits */
+#define MDCR_EL2_TDRA (1 << 11)
+#define MDCR_EL2_TDOSA (1 << 10)
+#define MDCR_EL2_TDA (1 << 9)
+#define MDCR_EL2_TDE (1 << 8)
+#define MDCR_EL2_HPME (1 << 7)
+#define MDCR_EL2_TPM (1 << 6)
+#define MDCR_EL2_TPMCR (1 << 5)
+#define MDCR_EL2_HPMN_MASK (0x1F)
+
+/* Exception Syndrome Register (ESR) bits */
+#define ESR_EL2_EC_SHIFT (26)
+#define ESR_EL2_EC (0x3fU << ESR_EL2_EC_SHIFT)
+#define ESR_EL2_IL (1U << 25)
+#define ESR_EL2_ISS (ESR_EL2_IL - 1)
+#define ESR_EL2_ISV_SHIFT (24)
+#define ESR_EL2_ISV (1U << ESR_EL2_ISV_SHIFT)
+#define ESR_EL2_SAS_SHIFT (22)
+#define ESR_EL2_SAS (3U << ESR_EL2_SAS_SHIFT)
+#define ESR_EL2_SSE (1 << 21)
+#define ESR_EL2_SRT_SHIFT (16)
+#define ESR_EL2_SRT_MASK (0x1f << ESR_EL2_SRT_SHIFT)
+#define ESR_EL2_SF (1 << 15)
+#define ESR_EL2_AR (1 << 14)
+#define ESR_EL2_EA (1 << 9)
+#define ESR_EL2_CM (1 << 8)
+#define ESR_EL2_S1PTW (1 << 7)
+#define ESR_EL2_WNR (1 << 6)
+#define ESR_EL2_FSC (0x3f)
+#define ESR_EL2_FSC_TYPE (0x3c)
+
+#define ESR_EL2_CV_SHIFT (24)
+#define ESR_EL2_CV (1U << ESR_EL2_CV_SHIFT)
+#define ESR_EL2_COND_SHIFT (20)
+#define ESR_EL2_COND (0xfU << ESR_EL2_COND_SHIFT)
+
+
+#define FSC_FAULT (0x04)
+#define FSC_PERM (0x0c)
+
+/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
+#define HPFAR_MASK (~0xFUL)
+
+#define ESR_EL2_EC_UNKNOWN (0x00)
+#define ESR_EL2_EC_WFI (0x01)
+#define ESR_EL2_EC_CP15_32 (0x03)
+#define ESR_EL2_EC_CP15_64 (0x04)
+#define ESR_EL2_EC_CP14_MR (0x05)
+#define ESR_EL2_EC_CP14_LS (0x06)
+#define ESR_EL2_EC_FP_ASIMD (0x07)
+#define ESR_EL2_EC_CP10_ID (0x08)
+#define ESR_EL2_EC_CP14_64 (0x0C)
+#define ESR_EL2_EC_ILL_ISS (0x0E)
+#define ESR_EL2_EC_SVC32 (0x11)
+#define ESR_EL2_EC_HVC32 (0x12)
+#define ESR_EL2_EC_SMC32 (0x13)
+#define ESR_EL2_EC_SVC64 (0x15)
+#define ESR_EL2_EC_HVC64 (0x16)
+#define ESR_EL2_EC_SMC64 (0x17)
+#define ESR_EL2_EC_SYS64 (0x18)
+#define ESR_EL2_EC_IABT (0x20)
+#define ESR_EL2_EC_IABT_HYP (0x21)
+#define ESR_EL2_EC_PC_ALIGN (0x22)
+#define ESR_EL2_EC_DABT (0x24)
+#define ESR_EL2_EC_DABT_HYP (0x25)
+#define ESR_EL2_EC_SP_ALIGN (0x26)
+#define ESR_EL2_EC_FP_EXC32 (0x28)
+#define ESR_EL2_EC_FP_EXC64 (0x2C)
+#define ESR_EL2_EC_SERROR (0x2F)
+#define ESR_EL2_EC_BREAKPT (0x30)
+#define ESR_EL2_EC_BREAKPT_HYP (0x31)
+#define ESR_EL2_EC_SOFTSTP (0x32)
+#define ESR_EL2_EC_SOFTSTP_HYP (0x33)
+#define ESR_EL2_EC_WATCHPT (0x34)
+#define ESR_EL2_EC_WATCHPT_HYP (0x35)
+#define ESR_EL2_EC_BKPT32 (0x38)
+#define ESR_EL2_EC_VECTOR32 (0x3A)
+#define ESR_EL2_EC_BRK64 (0x3C)
+
+#define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10
+
+#define ESR_EL2_EC_WFI_ISS_WFE (1 << 0)
+
+#endif /* __ARM64_KVM_ARM_H__ */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
new file mode 100644
index 000000000000..483842180f8f
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_ASM_H__
+#define __ARM_KVM_ASM_H__
+
+#include <asm/virt.h>
+
+/*
+ * 0 is reserved as an invalid value.
+ * Order *must* be kept in sync with the hyp switch code.
+ */
+#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */
+#define CSSELR_EL1 2 /* Cache Size Selection Register */
+#define SCTLR_EL1 3 /* System Control Register */
+#define ACTLR_EL1 4 /* Auxilliary Control Register */
+#define CPACR_EL1 5 /* Coprocessor Access Control */
+#define TTBR0_EL1 6 /* Translation Table Base Register 0 */
+#define TTBR1_EL1 7 /* Translation Table Base Register 1 */
+#define TCR_EL1 8 /* Translation Control Register */
+#define ESR_EL1 9 /* Exception Syndrome Register */
+#define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */
+#define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */
+#define FAR_EL1 12 /* Fault Address Register */
+#define MAIR_EL1 13 /* Memory Attribute Indirection Register */
+#define VBAR_EL1 14 /* Vector Base Address Register */
+#define CONTEXTIDR_EL1 15 /* Context ID Register */
+#define TPIDR_EL0 16 /* Thread ID, User R/W */
+#define TPIDRRO_EL0 17 /* Thread ID, User R/O */
+#define TPIDR_EL1 18 /* Thread ID, Privileged */
+#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */
+#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
+#define PAR_EL1 21 /* Physical Address Register */
+#define MDSCR_EL1 22 /* Monitor Debug System Control Register */
+#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */
+#define DBGBCR15_EL1 38
+#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */
+#define DBGBVR15_EL1 54
+#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */
+#define DBGWCR15_EL1 70
+#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */
+#define DBGWVR15_EL1 86
+#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */
+
+/* 32bit specific registers. Keep them at the end of the range */
+#define DACR32_EL2 88 /* Domain Access Control Register */
+#define IFSR32_EL2 89 /* Instruction Fault Status Register */
+#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */
+#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */
+#define TEECR32_EL1 92 /* ThumbEE Configuration Register */
+#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */
+#define NR_SYS_REGS 94
+
+/* 32bit mapping */
+#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
+#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */
+#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */
+#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */
+#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */
+#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */
+#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */
+#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */
+#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */
+#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */
+#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */
+#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */
+#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */
+#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */
+#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */
+#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */
+#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */
+#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */
+#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */
+#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */
+#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */
+#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */
+#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */
+#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */
+#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
+#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */
+#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
+#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+
+#define cp14_DBGDSCRext (MDSCR_EL1 * 2)
+#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2)
+#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2)
+#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1)
+#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2)
+#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2)
+#define cp14_DBGDCCINT (MDCCINT_EL1 * 2)
+
+#define NR_COPRO_REGS (NR_SYS_REGS * 2)
+
+#define ARM_EXCEPTION_IRQ 0
+#define ARM_EXCEPTION_TRAP 1
+
+#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
+#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+
+#ifndef __ASSEMBLY__
+struct kvm;
+struct kvm_vcpu;
+
+extern char __kvm_hyp_init[];
+extern char __kvm_hyp_init_end[];
+
+extern char __kvm_hyp_vector[];
+
+#define __kvm_hyp_code_start __hyp_text_start
+#define __kvm_hyp_code_end __hyp_text_end
+
+extern void __kvm_flush_vm_context(void);
+extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+extern u64 __vgic_v3_get_ich_vtr_el2(void);
+
+extern char __save_vgic_v2_state[];
+extern char __restore_vgic_v2_state[];
+extern char __save_vgic_v3_state[];
+extern char __restore_vgic_v3_state[];
+
+#endif
+
+#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h
new file mode 100644
index 000000000000..0b52377a6c11
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_coproc.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_coproc.h
+ * Copyright (C) 2012 Rusty Russell IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_COPROC_H__
+#define __ARM64_KVM_COPROC_H__
+
+#include <linux/kvm_host.h>
+
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
+
+struct kvm_sys_reg_table {
+ const struct sys_reg_desc *table;
+ size_t num;
+};
+
+struct kvm_sys_reg_target_table {
+ struct kvm_sys_reg_table table64;
+ struct kvm_sys_reg_table table32;
+};
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+ struct kvm_sys_reg_target_table *table);
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+#define kvm_coproc_table_init kvm_sys_reg_table_init
+void kvm_sys_reg_table_init(void);
+
+struct kvm_one_reg;
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_COPROC_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
new file mode 100644
index 000000000000..5674a55b5518
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -0,0 +1,268 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/kvm_emulate.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_EMULATE_H__
+#define __ARM64_KVM_EMULATE_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmio.h>
+#include <asm/ptrace.h>
+
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
+
+void kvm_inject_undefined(struct kvm_vcpu *vcpu);
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
+
+static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
+{
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
+}
+
+static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+{
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
+}
+
+static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
+{
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
+}
+
+static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
+{
+ return !!(*vcpu_cpsr(vcpu) & PSR_MODE32_BIT);
+}
+
+static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu)
+{
+ if (vcpu_mode_is_32bit(vcpu))
+ return kvm_condition_valid32(vcpu);
+
+ return true;
+}
+
+static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+ if (vcpu_mode_is_32bit(vcpu))
+ kvm_skip_instr32(vcpu, is_wide_instr);
+ else
+ *vcpu_pc(vcpu) += 4;
+}
+
+static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu)
+{
+ *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT;
+}
+
+static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+ if (vcpu_mode_is_32bit(vcpu))
+ return vcpu_reg32(vcpu, reg_num);
+
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num];
+}
+
+/* Get vcpu SPSR for current mode */
+static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+{
+ if (vcpu_mode_is_32bit(vcpu))
+ return vcpu_spsr32(vcpu);
+
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+}
+
+static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
+{
+ u32 mode = *vcpu_cpsr(vcpu) & PSR_MODE_MASK;
+
+ if (vcpu_mode_is_32bit(vcpu))
+ return mode > COMPAT_PSR_MODE_USR;
+
+ return mode != PSR_MODE_EL0t;
+}
+
+static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.fault.esr_el2;
+}
+
+static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.fault.far_el2;
+}
+
+static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
+{
+ return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
+}
+
+static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV);
+}
+
+static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR);
+}
+
+static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE);
+}
+
+static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu)
+{
+ return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT;
+}
+
+static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA);
+}
+
+static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW);
+}
+
+static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu)
+{
+ return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT);
+}
+
+/* This one is not specific to Data Abort */
+static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu)
+{
+ return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL);
+}
+
+static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT;
+}
+
+static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
+}
+
+static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu)
+{
+ return vcpu_sys_reg(vcpu, MPIDR_EL1);
+}
+
+static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_mode_is_32bit(vcpu))
+ *vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
+ else
+ vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
+}
+
+static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_mode_is_32bit(vcpu))
+ return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
+
+ return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+}
+
+static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
+ unsigned long data,
+ unsigned int len)
+{
+ if (kvm_vcpu_is_be(vcpu)) {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return be16_to_cpu(data & 0xffff);
+ case 4:
+ return be32_to_cpu(data & 0xffffffff);
+ default:
+ return be64_to_cpu(data);
+ }
+ } else {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return le16_to_cpu(data & 0xffff);
+ case 4:
+ return le32_to_cpu(data & 0xffffffff);
+ default:
+ return le64_to_cpu(data);
+ }
+ }
+
+ return data; /* Leave LE untouched */
+}
+
+static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu,
+ unsigned long data,
+ unsigned int len)
+{
+ if (kvm_vcpu_is_be(vcpu)) {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return cpu_to_be16(data & 0xffff);
+ case 4:
+ return cpu_to_be32(data & 0xffffffff);
+ default:
+ return cpu_to_be64(data);
+ }
+ } else {
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return cpu_to_le16(data & 0xffff);
+ case 4:
+ return cpu_to_le32(data & 0xffffffff);
+ default:
+ return cpu_to_le64(data);
+ }
+ }
+
+ return data; /* Leave LE untouched */
+}
+
+#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
new file mode 100644
index 000000000000..bcde41905746
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/asm/kvm_host.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HOST_H__
+#define __ARM64_KVM_HOST_H__
+
+#include <linux/types.h>
+#include <linux/kvm_types.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmio.h>
+
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
+#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
+#define KVM_USER_MEM_SLOTS 32
+#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#include <kvm/arm_vgic.h>
+#include <kvm/arm_arch_timer.h>
+
+#define KVM_VCPU_MAX_FEATURES 3
+
+int __attribute_const__ kvm_target_cpu(void);
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+int kvm_arch_dev_ioctl_check_extension(long ext);
+
+struct kvm_arch {
+ /* The VMID generation used for the virt. memory system */
+ u64 vmid_gen;
+ u32 vmid;
+
+ /* 1-level 2nd stage table and lock */
+ spinlock_t pgd_lock;
+ pgd_t *pgd;
+
+ /* VTTBR value associated with above pgd and vmid */
+ u64 vttbr;
+
+ /* Interrupt controller */
+ struct vgic_dist vgic;
+
+ /* Timer */
+ struct arch_timer_kvm timer;
+};
+
+#define KVM_NR_MEM_OBJS 40
+
+/*
+ * We don't want allocation failures within the mmu code, so we preallocate
+ * enough memory for a single page fault in a cache.
+ */
+struct kvm_mmu_memory_cache {
+ int nobjs;
+ void *objects[KVM_NR_MEM_OBJS];
+};
+
+struct kvm_vcpu_fault_info {
+ u32 esr_el2; /* Hyp Syndrom Register */
+ u64 far_el2; /* Hyp Fault Address Register */
+ u64 hpfar_el2; /* Hyp IPA Fault Address Register */
+};
+
+struct kvm_cpu_context {
+ struct kvm_regs gp_regs;
+ union {
+ u64 sys_regs[NR_SYS_REGS];
+ u32 copro[NR_COPRO_REGS];
+ };
+};
+
+typedef struct kvm_cpu_context kvm_cpu_context_t;
+
+struct kvm_vcpu_arch {
+ struct kvm_cpu_context ctxt;
+
+ /* HYP configuration */
+ u64 hcr_el2;
+
+ /* Exception Information */
+ struct kvm_vcpu_fault_info fault;
+
+ /* Debug state */
+ u64 debug_flags;
+
+ /* Pointer to host CPU context */
+ kvm_cpu_context_t *host_cpu_context;
+
+ /* VGIC state */
+ struct vgic_cpu vgic_cpu;
+ struct arch_timer_cpu timer_cpu;
+
+ /*
+ * Anything that is not used directly from assembly code goes
+ * here.
+ */
+ /* dcache set/way operation pending */
+ int last_pcpu;
+ cpumask_t require_dcache_flush;
+
+ /* Don't run the guest */
+ bool pause;
+
+ /* IO related fields */
+ struct kvm_decode mmio_decode;
+
+ /* Interrupt related fields */
+ u64 irq_lines; /* IRQ and FIQ levels */
+
+ /* Cache some mmu pages needed inside spinlock regions */
+ struct kvm_mmu_memory_cache mmu_page_cache;
+
+ /* Target CPU and feature flags */
+ int target;
+ DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
+
+ /* Detect first run of a vcpu */
+ bool has_run_once;
+};
+
+#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
+#define vcpu_sys_reg(v,r) ((v)->arch.ctxt.sys_regs[(r)])
+/*
+ * CP14 and CP15 live in the same array, as they are backed by the
+ * same system registers.
+ */
+#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r)])
+#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r)])
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r))
+#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r) + 1)
+#else
+#define vcpu_cp15_64_high(v,r) vcpu_cp15((v),(r) + 1)
+#define vcpu_cp15_64_low(v,r) vcpu_cp15((v),(r))
+#endif
+
+struct kvm_vm_stat {
+ u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+ u32 halt_wakeup;
+};
+
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+ const struct kvm_vcpu_init *init);
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm,
+ unsigned long start, unsigned long end);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return 0;
+}
+
+struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
+
+u64 kvm_call_hyp(void *hypfn, ...);
+
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ int exception_index);
+
+int kvm_perf_init(void);
+int kvm_perf_teardown(void);
+
+static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
+ phys_addr_t pgd_ptr,
+ unsigned long hyp_stack_ptr,
+ unsigned long vector_ptr)
+{
+ /*
+ * Call initialization code, and switch to the full blown
+ * HYP code.
+ */
+ kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+ hyp_stack_ptr, vector_ptr);
+}
+
+struct vgic_sr_vectors {
+ void *save_vgic;
+ void *restore_vgic;
+};
+
+static inline void vgic_arch_setup(const struct vgic_params *vgic)
+{
+ extern struct vgic_sr_vectors __vgic_sr_vectors;
+
+ switch(vgic->type)
+ {
+ case VGIC_V2:
+ __vgic_sr_vectors.save_vgic = __save_vgic_v2_state;
+ __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state;
+ break;
+
+#ifdef CONFIG_ARM_GIC_V3
+ case VGIC_V3:
+ __vgic_sr_vectors.save_vgic = __save_vgic_v3_state;
+ __vgic_sr_vectors.restore_vgic = __restore_vgic_v3_state;
+ break;
+#endif
+
+ default:
+ BUG();
+ }
+}
+
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
+#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
new file mode 100644
index 000000000000..fc2f689c0694
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMIO_H__
+#define __ARM64_KVM_MMIO_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+/*
+ * This is annoying. The mmio code requires this, even if we don't
+ * need any decoding. To be fixed.
+ */
+struct kvm_decode {
+ unsigned long rt;
+ bool sign_extend;
+};
+
+/*
+ * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
+ * which is an anonymous type. Use our own type instead.
+ */
+struct kvm_exit_mmio {
+ phys_addr_t phys_addr;
+ u8 data[8];
+ u32 len;
+ bool is_write;
+};
+
+static inline void kvm_prepare_mmio(struct kvm_run *run,
+ struct kvm_exit_mmio *mmio)
+{
+ run->mmio.phys_addr = mmio->phys_addr;
+ run->mmio.len = mmio->len;
+ run->mmio.is_write = mmio->is_write;
+ memcpy(run->mmio.data, mmio->data, mmio->len);
+ run->exit_reason = KVM_EXIT_MMIO;
+}
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ phys_addr_t fault_ipa);
+
+#endif /* __ARM64_KVM_MMIO_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
new file mode 100644
index 000000000000..a030d163840b
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_MMU_H__
+#define __ARM64_KVM_MMU_H__
+
+#include <asm/page.h>
+#include <asm/memory.h>
+
+/*
+ * As we only have the TTBR0_EL2 register, we cannot express
+ * "negative" addresses. This makes it impossible to directly share
+ * mappings with the kernel.
+ *
+ * Instead, give the HYP mode its own VA region at a fixed offset from
+ * the kernel by just masking the top bits (which are all ones for a
+ * kernel address).
+ */
+#define HYP_PAGE_OFFSET_SHIFT VA_BITS
+#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1)
+#define HYP_PAGE_OFFSET (PAGE_OFFSET & HYP_PAGE_OFFSET_MASK)
+
+/*
+ * Our virtual mapping for the idmap-ed MMU-enable code. Must be
+ * shared across all the page-tables. Conveniently, we use the last
+ * possible page, where no kernel mapping will ever exist.
+ */
+#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK)
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Convert a kernel VA into a HYP VA.
+ * reg: VA to be converted.
+ */
+.macro kern_hyp_va reg
+ and \reg, \reg, #HYP_PAGE_OFFSET_MASK
+.endm
+
+#else
+
+#include <asm/cachetype.h>
+#include <asm/cacheflush.h>
+
+#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
+
+/*
+ * We currently only support a 40bit IPA.
+ */
+#define KVM_PHYS_SHIFT (40)
+#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
+#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
+
+/* Make sure we get the right size, and thus the right alignment */
+#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - PGDIR_SHIFT))
+#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+
+int create_hyp_mappings(void *from, void *to);
+int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+void free_boot_hyp_pgd(void);
+void free_hyp_pgds(void);
+
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
+ phys_addr_t pa, unsigned long size);
+
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu);
+
+phys_addr_t kvm_mmu_get_httbr(void);
+phys_addr_t kvm_mmu_get_boot_httbr(void);
+phys_addr_t kvm_get_idmap_vector(void);
+int kvm_mmu_init(void);
+void kvm_clear_hyp_idmap(void);
+
+#define kvm_set_pte(ptep, pte) set_pte(ptep, pte)
+#define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd)
+
+static inline void kvm_clean_pgd(pgd_t *pgd) {}
+static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
+static inline void kvm_clean_pte(pte_t *pte) {}
+static inline void kvm_clean_pte_entry(pte_t *pte) {}
+
+static inline void kvm_set_s2pte_writable(pte_t *pte)
+{
+ pte_val(*pte) |= PTE_S2_RDWR;
+}
+
+static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
+{
+ pmd_val(*pmd) |= PMD_S2_RDWR;
+}
+
+#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end)
+#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
+#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
+
+static inline bool kvm_page_empty(void *ptr)
+{
+ struct page *ptr_page = virt_to_page(ptr);
+ return page_count(ptr_page) == 1;
+}
+
+#define kvm_pte_table_empty(ptep) kvm_page_empty(ptep)
+#ifndef CONFIG_ARM64_64K_PAGES
+#define kvm_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
+#else
+#define kvm_pmd_table_empty(pmdp) (0)
+#endif
+#define kvm_pud_table_empty(pudp) (0)
+
+
+struct kvm;
+
+#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
+
+static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
+{
+ return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+}
+
+static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
+ unsigned long size)
+{
+ if (!vcpu_has_cache_enabled(vcpu))
+ kvm_flush_dcache_to_poc((void *)hva, size);
+
+ if (!icache_is_aliasing()) { /* PIPT */
+ flush_icache_range(hva, hva + size);
+ } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */
+ /* any kind of VIPT cache */
+ __flush_icache_all();
+ }
+}
+
+#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x))
+
+void stage2_flush_vm(struct kvm *kvm);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/kvm_psci.h b/arch/arm64/include/asm/kvm_psci.h
new file mode 100644
index 000000000000..bc39e557c56c
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_psci.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_PSCI_H__
+#define __ARM64_KVM_PSCI_H__
+
+#define KVM_ARM_PSCI_0_1 1
+#define KVM_ARM_PSCI_0_2 2
+
+int kvm_psci_version(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
+
+#endif /* __ARM64_KVM_PSCI_H__ */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 2e9d83673ef6..f7af66b54cb2 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -85,6 +85,8 @@
#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */
#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */
+#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */
+
/*
* Memory Attribute override for Stage-2 (MemAttr[3:0])
*/
@@ -98,9 +100,9 @@
#define PTE_HYP PTE_USER
/*
- * 40-bit physical address supported.
+ * Highest possible physical address supported.
*/
-#define PHYS_MASK_SHIFT (40)
+#define PHYS_MASK_SHIFT (48)
#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
/*
@@ -120,7 +122,6 @@
#define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28))
#define TCR_TG0_64K (UL(1) << 14)
#define TCR_TG1_64K (UL(1) << 30)
-#define TCR_IPS_40BIT (UL(2) << 32)
#define TCR_ASID16 (UL(1) << 36)
#define TCR_TBI0 (UL(1) << 37)
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 215ad4649dd7..7a5df5252dd7 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -50,6 +50,10 @@ static inline bool is_hyp_mode_mismatched(void)
return __boot_cpu_mode[0] != __boot_cpu_mode[1];
}
+/* The section containing the hypervisor text */
+extern char __hyp_text_start[];
+extern char __hyp_text_end[];
+
#endif /* __ASSEMBLY__ */
#endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
new file mode 100644
index 000000000000..8e38878c87c6
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/uapi/asm/kvm.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#define KVM_SPSR_EL1 0
+#define KVM_SPSR_SVC KVM_SPSR_EL1
+#define KVM_SPSR_ABT 1
+#define KVM_SPSR_UND 2
+#define KVM_SPSR_IRQ 3
+#define KVM_SPSR_FIQ 4
+#define KVM_NR_SPSR 5
+
+#ifndef __ASSEMBLY__
+#include <linux/psci.h>
+#include <asm/types.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+struct kvm_regs {
+ struct user_pt_regs regs; /* sp = sp_el0 */
+
+ __u64 sp_el1;
+ __u64 elr_el1;
+
+ __u64 spsr[KVM_NR_SPSR];
+
+ struct user_fpsimd_state fp_regs;
+};
+
+/* Supported Processor Types */
+#define KVM_ARM_TARGET_AEM_V8 0
+#define KVM_ARM_TARGET_FOUNDATION_V8 1
+#define KVM_ARM_TARGET_CORTEX_A57 2
+#define KVM_ARM_TARGET_XGENE_POTENZA 3
+#define KVM_ARM_TARGET_CORTEX_A53 4
+
+#define KVM_ARM_NUM_TARGETS 5
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT 0
+#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT 16
+#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2 0
+
+/* Supported VGIC address types */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST 0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU 1
+
+#define KVM_VGIC_V2_DIST_SIZE 0x1000
+#define KVM_VGIC_V2_CPU_SIZE 0x2000
+
+#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
+
+struct kvm_vcpu_init {
+ __u32 target;
+ __u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+struct kvm_sync_regs {
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
+#define KVM_REG_ARM_COPROC_SHIFT 16
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / sizeof(__u32))
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX (0x0011 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK 0x000000000000FF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT 8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR (0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00000000000000FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT 0
+
+/* AArch64 system registers */
+#define KVM_REG_ARM64_SYSREG (0x0013 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM64_SYSREG_OP0_MASK 0x000000000000c000
+#define KVM_REG_ARM64_SYSREG_OP0_SHIFT 14
+#define KVM_REG_ARM64_SYSREG_OP1_MASK 0x0000000000003800
+#define KVM_REG_ARM64_SYSREG_OP1_SHIFT 11
+#define KVM_REG_ARM64_SYSREG_CRN_MASK 0x0000000000000780
+#define KVM_REG_ARM64_SYSREG_CRN_SHIFT 7
+#define KVM_REG_ARM64_SYSREG_CRM_MASK 0x0000000000000078
+#define KVM_REG_ARM64_SYSREG_CRM_SHIFT 3
+#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007
+#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0
+
+#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
+ (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
+ KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
+
+#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
+ (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
+ ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+ ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+ ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+ ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+ ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+
+#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
+#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
+
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
+#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
+#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
+#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT 24
+#define KVM_ARM_IRQ_TYPE_MASK 0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT 16
+#define KVM_ARM_IRQ_VCPU_MASK 0xff
+#define KVM_ARM_IRQ_NUM_SHIFT 0
+#define KVM_ARM_IRQ_NUM_MASK 0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU 0
+#define KVM_ARM_IRQ_TYPE_SPI 1
+#define KVM_ARM_IRQ_TYPE_PPI 2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ 0
+#define KVM_ARM_IRQ_CPU_FIQ 1
+
+/* Highest supported SPI, from VGIC_NR_IRQS */
+#define KVM_ARM_IRQ_GIC_MAX 127
+
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE 0x95c1ba5e
+#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
+
+#endif
+
+#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index c481a119b98a..9a9fce090d58 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -21,6 +21,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
+#include <linux/kvm_host.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/cputable.h>
@@ -119,6 +120,7 @@ int main(void)
DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2));
DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
+ DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context));
@@ -128,13 +130,24 @@ int main(void)
DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
- DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr));
- DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr));
- DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr));
- DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr));
- DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr));
- DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr));
- DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr));
+ DEFINE(VGIC_SAVE_FN, offsetof(struct vgic_sr_vectors, save_vgic));
+ DEFINE(VGIC_RESTORE_FN, offsetof(struct vgic_sr_vectors, restore_vgic));
+ DEFINE(VGIC_SR_VECTOR_SZ, sizeof(struct vgic_sr_vectors));
+ DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
+ DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
+ DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
+ DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
+ DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
+ DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
+ DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
+ DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
+ DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
+ DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
+ DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
+ DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
+ DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
+ DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
+ DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr));
DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base));
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 7f66fe150265..b33051d501e6 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -31,15 +31,6 @@
#include <asm/cputype.h>
#include <asm/system_misc.h>
-/* Low-level stepping controls. */
-#define DBG_MDSCR_SS (1 << 0)
-#define DBG_SPSR_SS (1 << 21)
-
-/* MDSCR_EL1 enabling bits */
-#define DBG_MDSCR_KDE (1 << 13)
-#define DBG_MDSCR_MDE (1 << 15)
-#define DBG_MDSCR_MASK ~(DBG_MDSCR_KDE | DBG_MDSCR_MDE)
-
/* Determine debug architecture. */
u8 debug_monitors_arch(void)
{
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index ce2d97255ba9..55d0e035205f 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -17,6 +17,19 @@ ENTRY(stext)
jiffies = jiffies_64;
+#define HYPERVISOR_TEXT \
+ /* \
+ * Force the alignment to be compatible with \
+ * the vectors requirements \
+ */ \
+ . = ALIGN(2048); \
+ VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
+ *(.hyp.idmap.text) \
+ VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \
+ VMLINUX_SYMBOL(__hyp_text_start) = .; \
+ *(.hyp.text) \
+ VMLINUX_SYMBOL(__hyp_text_end) = .;
+
SECTIONS
{
/*
@@ -48,6 +61,7 @@ SECTIONS
TEXT_TEXT
SCHED_TEXT
LOCK_TEXT
+ HYPERVISOR_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
@@ -102,3 +116,9 @@ SECTIONS
STABS_DEBUG
.comment 0 : { *(.comment) }
}
+
+/*
+ * The HYP init code can't be more than a page long.
+ */
+ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end),
+ "HYP init code too big")
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
new file mode 100644
index 000000000000..8ba85e9ea388
--- /dev/null
+++ b/arch/arm64/kvm/Kconfig
@@ -0,0 +1,63 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ ---help---
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ bool "Kernel-based Virtual Machine (KVM) support"
+ select MMU_NOTIFIER
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+ select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select KVM_MMIO
+ select KVM_ARM_HOST
+ select KVM_ARM_VGIC
+ select KVM_ARM_TIMER
+ ---help---
+ Support hosting virtualized guest machines.
+
+ If unsure, say N.
+
+config KVM_ARM_HOST
+ bool
+ ---help---
+ Provides host support for ARM processors.
+
+config KVM_ARM_MAX_VCPUS
+ int "Number maximum supported virtual CPUs per VM"
+ depends on KVM_ARM_HOST
+ default 4
+ help
+ Static number of max supported virtual CPUs per VM.
+
+ If you choose a high number, the vcpu structures will be quite
+ large, so only choose a reasonable number that you expect to
+ actually use.
+
+config KVM_ARM_VGIC
+ bool
+ depends on KVM_ARM_HOST && OF
+ select HAVE_KVM_IRQCHIP
+ ---help---
+ Adds support for a hardware assisted, in-kernel GIC emulation.
+
+config KVM_ARM_TIMER
+ bool
+ depends on KVM_ARM_VGIC
+ ---help---
+ Adds support for the Architected Timers in virtual machines.
+
+endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
new file mode 100644
index 000000000000..32a096174b94
--- /dev/null
+++ b/arch/arm64/kvm/Makefile
@@ -0,0 +1,27 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+CFLAGS_arm.o := -I.
+CFLAGS_mmu.o := -I.
+
+KVM=../../../virt/kvm
+ARM=../../../arch/arm/kvm
+
+obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
+
+kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
+kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
+kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
+
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
+kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c
new file mode 100644
index 000000000000..124418d17049
--- /dev/null
+++ b/arch/arm64/kvm/emulate.c
@@ -0,0 +1,158 @@
+/*
+ * (not much of an) Emulation layer for 32bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+
+/*
+ * stolen from arch/arm/kernel/opcodes.c
+ *
+ * condition code lookup table
+ * index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ *
+ * bit position in short is condition code: NZCV
+ */
+static const unsigned short cc_map[16] = {
+ 0xF0F0, /* EQ == Z set */
+ 0x0F0F, /* NE */
+ 0xCCCC, /* CS == C set */
+ 0x3333, /* CC */
+ 0xFF00, /* MI == N set */
+ 0x00FF, /* PL */
+ 0xAAAA, /* VS == V set */
+ 0x5555, /* VC */
+ 0x0C0C, /* HI == C set && Z clear */
+ 0xF3F3, /* LS == C clear || Z set */
+ 0xAA55, /* GE == (N==V) */
+ 0x55AA, /* LT == (N!=V) */
+ 0x0A05, /* GT == (!Z && (N==V)) */
+ 0xF5FA, /* LE == (Z || (N!=V)) */
+ 0xFFFF, /* AL always */
+ 0 /* NV */
+};
+
+static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu)
+{
+ u32 esr = kvm_vcpu_get_hsr(vcpu);
+
+ if (esr & ESR_EL2_CV)
+ return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT;
+
+ return -1;
+}
+
+/*
+ * Check if a trapped instruction should have been executed or not.
+ */
+bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+{
+ unsigned long cpsr;
+ u32 cpsr_cond;
+ int cond;
+
+ /* Top two bits non-zero? Unconditional. */
+ if (kvm_vcpu_get_hsr(vcpu) >> 30)
+ return true;
+
+ /* Is condition field valid? */
+ cond = kvm_vcpu_get_condition(vcpu);
+ if (cond == 0xE)
+ return true;
+
+ cpsr = *vcpu_cpsr(vcpu);
+
+ if (cond < 0) {
+ /* This can happen in Thumb mode: examine IT state. */
+ unsigned long it;
+
+ it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3);
+
+ /* it == 0 => unconditional. */
+ if (it == 0)
+ return true;
+
+ /* The cond for this insn works out as the top 4 bits. */
+ cond = (it >> 4);
+ }
+
+ cpsr_cond = cpsr >> 28;
+
+ if (!((cc_map[cond] >> cpsr_cond) & 1))
+ return false;
+
+ return true;
+}
+
+/**
+ * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * @vcpu: The VCPU pointer
+ *
+ * When exceptions occur while instructions are executed in Thumb IF-THEN
+ * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have
+ * to do this little bit of work manually. The fields map like this:
+ *
+ * IT[7:0] -> CPSR[26:25],CPSR[15:10]
+ */
+static void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+{
+ unsigned long itbits, cond;
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
+ bool is_arm = !(cpsr & COMPAT_PSR_T_BIT);
+
+ BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK));
+
+ if (!(cpsr & COMPAT_PSR_IT_MASK))
+ return;
+
+ cond = (cpsr & 0xe000) >> 13;
+ itbits = (cpsr & 0x1c00) >> (10 - 2);
+ itbits |= (cpsr & (0x3 << 25)) >> 25;
+
+ /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */
+ if ((itbits & 0x7) == 0)
+ itbits = cond = 0;
+ else
+ itbits = (itbits << 1) & 0x1f;
+
+ cpsr &= ~COMPAT_PSR_IT_MASK;
+ cpsr |= cond << 13;
+ cpsr |= (itbits & 0x1c) << (10 - 2);
+ cpsr |= (itbits & 0x3) << 25;
+ *vcpu_cpsr(vcpu) = cpsr;
+}
+
+/**
+ * kvm_skip_instr - skip a trapped instruction and proceed to the next
+ * @vcpu: The vcpu pointer
+ */
+void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr)
+{
+ bool is_thumb;
+
+ is_thumb = !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_T_BIT);
+ if (is_thumb && !is_wide_instr)
+ *vcpu_pc(vcpu) += 2;
+ else
+ *vcpu_pc(vcpu) += 4;
+ kvm_adjust_itstate(vcpu);
+}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
new file mode 100644
index 000000000000..76794692c20b
--- /dev/null
+++ b/arch/arm64/kvm/guest.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/guest.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputype.h>
+#include <asm/uaccess.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ { NULL }
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+ return 0;
+}
+
+static u64 core_reg_offset_from_id(u64 id)
+{
+ return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
+}
+
+static int get_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ /*
+ * Because the kvm_regs structure is a mix of 32, 64 and
+ * 128bit fields, we index it as if it was a 32bit
+ * array. Hence below, nr_regs is the number of entries, and
+ * off the index in the "array".
+ */
+ __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+ struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+ int nr_regs = sizeof(*regs) / sizeof(__u32);
+ u32 off;
+
+ /* Our ID is an index into the kvm_regs struct. */
+ off = core_reg_offset_from_id(reg->id);
+ if (off >= nr_regs ||
+ (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+ return -ENOENT;
+
+ if (copy_to_user(uaddr, ((u32 *)regs) + off, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ __u32 __user *uaddr = (__u32 __user *)(unsigned long)reg->addr;
+ struct kvm_regs *regs = vcpu_gp_regs(vcpu);
+ int nr_regs = sizeof(*regs) / sizeof(__u32);
+ __uint128_t tmp;
+ void *valp = &tmp;
+ u64 off;
+ int err = 0;
+
+ /* Our ID is an index into the kvm_regs struct. */
+ off = core_reg_offset_from_id(reg->id);
+ if (off >= nr_regs ||
+ (off + (KVM_REG_SIZE(reg->id) / sizeof(__u32))) >= nr_regs)
+ return -ENOENT;
+
+ if (KVM_REG_SIZE(reg->id) > sizeof(tmp))
+ return -EINVAL;
+
+ if (copy_from_user(valp, uaddr, KVM_REG_SIZE(reg->id))) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ if (off == KVM_REG_ARM_CORE_REG(regs.pstate)) {
+ u32 mode = (*(u32 *)valp) & COMPAT_PSR_MODE_MASK;
+ switch (mode) {
+ case COMPAT_PSR_MODE_USR:
+ case COMPAT_PSR_MODE_FIQ:
+ case COMPAT_PSR_MODE_IRQ:
+ case COMPAT_PSR_MODE_SVC:
+ case COMPAT_PSR_MODE_ABT:
+ case COMPAT_PSR_MODE_UND:
+ case PSR_MODE_EL0t:
+ case PSR_MODE_EL1t:
+ case PSR_MODE_EL1h:
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ memcpy((u32 *)regs + off, valp, KVM_REG_SIZE(reg->id));
+out:
+ return err;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ return -EINVAL;
+}
+
+static unsigned long num_core_regs(void)
+{
+ return sizeof(struct kvm_regs) / sizeof(__u32);
+}
+
+/**
+ * ARM64 versions of the TIMER registers, always available on arm64
+ */
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+ switch (index) {
+ case KVM_REG_ARM_TIMER_CTL:
+ case KVM_REG_ARM_TIMER_CNT:
+ case KVM_REG_ARM_TIMER_CVAL:
+ return true;
+ }
+ return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+ return -EFAULT;
+ uindices++;
+ if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+ return -EFAULT;
+ uindices++;
+ if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+ int ret;
+
+ ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+ if (ret != 0)
+ return -EFAULT;
+
+ return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+
+ val = kvm_arm_timer_get_reg(vcpu, reg->id);
+ return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
+/**
+ * kvm_arm_num_regs - how many registers do we present via KVM_GET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
+{
+ return num_core_regs() + kvm_arm_num_sys_reg_descs(vcpu)
+ + NUM_TIMER_REGS;
+}
+
+/**
+ * kvm_arm_copy_reg_indices - get indices of all registers.
+ *
+ * We do core registers right here, then we apppend system regs.
+ */
+int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ unsigned int i;
+ const u64 core_reg = KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE;
+ int ret;
+
+ for (i = 0; i < sizeof(struct kvm_regs) / sizeof(__u32); i++) {
+ if (put_user(core_reg | i, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+
+ ret = copy_timer_indices(vcpu, uindices);
+ if (ret)
+ return ret;
+ uindices += NUM_TIMER_REGS;
+
+ return kvm_arm_copy_sys_reg_indices(vcpu, uindices);
+}
+
+int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ /* We currently use nothing arch-specific in upper 32 bits */
+ if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+ return -EINVAL;
+
+ /* Register group 16 means we want a core register. */
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+ return get_core_reg(vcpu, reg);
+
+ if (is_timer_reg(reg->id))
+ return get_timer_reg(vcpu, reg);
+
+ return kvm_arm_sys_reg_get_reg(vcpu, reg);
+}
+
+int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ /* We currently use nothing arch-specific in upper 32 bits */
+ if ((reg->id & ~KVM_REG_SIZE_MASK) >> 32 != KVM_REG_ARM64 >> 32)
+ return -EINVAL;
+
+ /* Register group 16 means we set a core register. */
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
+ return set_core_reg(vcpu, reg);
+
+ if (is_timer_reg(reg->id))
+ return set_timer_reg(vcpu, reg);
+
+ return kvm_arm_sys_reg_set_reg(vcpu, reg);
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int __attribute_const__ kvm_target_cpu(void)
+{
+ unsigned long implementor = read_cpuid_implementor();
+ unsigned long part_number = read_cpuid_part_number();
+
+ switch (implementor) {
+ case ARM_CPU_IMP_ARM:
+ switch (part_number) {
+ case ARM_CPU_PART_AEM_V8:
+ return KVM_ARM_TARGET_AEM_V8;
+ case ARM_CPU_PART_FOUNDATION:
+ return KVM_ARM_TARGET_FOUNDATION_V8;
+ case ARM_CPU_PART_CORTEX_A53:
+ return KVM_ARM_TARGET_CORTEX_A53;
+ case ARM_CPU_PART_CORTEX_A57:
+ return KVM_ARM_TARGET_CORTEX_A57;
+ };
+ break;
+ case ARM_CPU_IMP_APM:
+ switch (part_number) {
+ case APM_CPU_PART_POTENZA:
+ return KVM_ARM_TARGET_XGENE_POTENZA;
+ };
+ break;
+ };
+
+ return -EINVAL;
+}
+
+int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+ const struct kvm_vcpu_init *init)
+{
+ unsigned int i;
+ int phys_target = kvm_target_cpu();
+
+ if (init->target != phys_target)
+ return -EINVAL;
+
+ vcpu->arch.target = phys_target;
+ bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
+
+ /* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+ for (i = 0; i < sizeof(init->features) * 8; i++) {
+ if (init->features[i / 32] & (1 << (i % 32))) {
+ if (i >= KVM_VCPU_MAX_FEATURES)
+ return -ENOENT;
+ set_bit(i, vcpu->arch.features);
+ }
+ }
+
+ /* Now we know what it is, we can reset it. */
+ return kvm_reset_vcpu(vcpu);
+}
+
+int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+{
+ int target = kvm_target_cpu();
+
+ if (target < 0)
+ return -ENODEV;
+
+ memset(init, 0, sizeof(*init));
+
+ /*
+ * For now, we don't return any features.
+ * In future, we might use features to return target
+ * specific features available for the preferred
+ * target type.
+ */
+ init->target = (__u32)target;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL;
+}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
new file mode 100644
index 000000000000..e28be510380c
--- /dev/null
+++ b/arch/arm64/kvm/handle_exit.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/handle_exit.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_psci.h>
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int ret;
+
+ ret = kvm_psci_call(vcpu);
+ if (ret < 0) {
+ kvm_inject_undefined(vcpu);
+ return 1;
+ }
+
+ return ret;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
+/**
+ * kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
+ * instruction executed by a guest
+ *
+ * @vcpu: the vcpu pointer
+ *
+ * WFE: Yield the CPU and come back to this vcpu when the scheduler
+ * decides to.
+ * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * world-switches and schedule other host processes until there is an
+ * incoming IRQ or FIQ to the VM.
+ */
+static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE)
+ kvm_vcpu_on_spin(vcpu);
+ else
+ kvm_vcpu_block(vcpu);
+
+ return 1;
+}
+
+static exit_handle_fn arm_exit_handlers[] = {
+ [ESR_EL2_EC_WFI] = kvm_handle_wfx,
+ [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32,
+ [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64,
+ [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_32,
+ [ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store,
+ [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_64,
+ [ESR_EL2_EC_HVC32] = handle_hvc,
+ [ESR_EL2_EC_SMC32] = handle_smc,
+ [ESR_EL2_EC_HVC64] = handle_hvc,
+ [ESR_EL2_EC_SMC64] = handle_smc,
+ [ESR_EL2_EC_SYS64] = kvm_handle_sys_reg,
+ [ESR_EL2_EC_IABT] = kvm_handle_guest_abort,
+ [ESR_EL2_EC_DABT] = kvm_handle_guest_abort,
+};
+
+static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
+{
+ u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+
+ if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) ||
+ !arm_exit_handlers[hsr_ec]) {
+ kvm_err("Unknown exception class: hsr: %#08x\n",
+ (unsigned int)kvm_vcpu_get_hsr(vcpu));
+ BUG();
+ }
+
+ return arm_exit_handlers[hsr_ec];
+}
+
+/*
+ * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
+ * proper exit to userspace.
+ */
+int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ int exception_index)
+{
+ exit_handle_fn exit_handler;
+
+ switch (exception_index) {
+ case ARM_EXCEPTION_IRQ:
+ return 1;
+ case ARM_EXCEPTION_TRAP:
+ /*
+ * See ARM ARM B1.14.1: "Hyp traps on instructions
+ * that fail their condition code check"
+ */
+ if (!kvm_condition_valid(vcpu)) {
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ return 1;
+ }
+
+ exit_handler = kvm_get_exit_handler(vcpu);
+
+ return exit_handler(vcpu, run);
+ default:
+ kvm_pr_unimpl("Unsupported exception type: %d",
+ exception_index);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return 0;
+ }
+}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
new file mode 100644
index 000000000000..d968796f4b2d
--- /dev/null
+++ b/arch/arm64/kvm/hyp-init.S
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+ .text
+ .pushsection .hyp.idmap.text, "ax"
+
+ .align 11
+
+ENTRY(__kvm_hyp_init)
+ ventry __invalid // Synchronous EL2t
+ ventry __invalid // IRQ EL2t
+ ventry __invalid // FIQ EL2t
+ ventry __invalid // Error EL2t
+
+ ventry __invalid // Synchronous EL2h
+ ventry __invalid // IRQ EL2h
+ ventry __invalid // FIQ EL2h
+ ventry __invalid // Error EL2h
+
+ ventry __do_hyp_init // Synchronous 64-bit EL1
+ ventry __invalid // IRQ 64-bit EL1
+ ventry __invalid // FIQ 64-bit EL1
+ ventry __invalid // Error 64-bit EL1
+
+ ventry __invalid // Synchronous 32-bit EL1
+ ventry __invalid // IRQ 32-bit EL1
+ ventry __invalid // FIQ 32-bit EL1
+ ventry __invalid // Error 32-bit EL1
+
+__invalid:
+ b .
+
+ /*
+ * x0: HYP boot pgd
+ * x1: HYP pgd
+ * x2: HYP stack
+ * x3: HYP vectors
+ */
+__do_hyp_init:
+
+ msr ttbr0_el2, x0
+
+ mrs x4, tcr_el1
+ ldr x5, =TCR_EL2_MASK
+ and x4, x4, x5
+ ldr x5, =TCR_EL2_FLAGS
+ orr x4, x4, x5
+ msr tcr_el2, x4
+
+ ldr x4, =VTCR_EL2_FLAGS
+ /*
+ * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
+ * VTCR_EL2.
+ */
+ mrs x5, ID_AA64MMFR0_EL1
+ bfi x4, x5, #16, #3
+ msr vtcr_el2, x4
+
+ mrs x4, mair_el1
+ msr mair_el2, x4
+ isb
+
+ mrs x4, sctlr_el2
+ and x4, x4, #SCTLR_EL2_EE // preserve endianness of EL2
+ ldr x5, =SCTLR_EL2_FLAGS
+ orr x4, x4, x5
+ msr sctlr_el2, x4
+ isb
+
+ /* MMU is now enabled. Get ready for the trampoline dance */
+ ldr x4, =TRAMPOLINE_VA
+ adr x5, target
+ bfi x4, x5, #0, #PAGE_SHIFT
+ br x4
+
+target: /* We're now in the trampoline code, switch page tables */
+ msr ttbr0_el2, x1
+ isb
+
+ /* Invalidate the old TLBs */
+ tlbi alle2
+ dsb sy
+
+ /* Set the stack and new vectors */
+ kern_hyp_va x2
+ mov sp, x2
+ kern_hyp_va x3
+ msr vbar_el2, x3
+
+ /* Hello, World! */
+ eret
+ENDPROC(__kvm_hyp_init)
+
+ .ltorg
+
+ .popsection
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
new file mode 100644
index 000000000000..b72aa9f9215c
--- /dev/null
+++ b/arch/arm64/kvm/hyp.S
@@ -0,0 +1,1274 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/debug-monitors.h>
+#include <asm/fpsimdmacros.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x)
+#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x)
+#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x)
+
+ .text
+ .pushsection .hyp.text, "ax"
+ .align PAGE_SHIFT
+
+.macro save_common_regs
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ add x3, x2, #CPU_XREG_OFFSET(19)
+ stp x19, x20, [x3]
+ stp x21, x22, [x3, #16]
+ stp x23, x24, [x3, #32]
+ stp x25, x26, [x3, #48]
+ stp x27, x28, [x3, #64]
+ stp x29, lr, [x3, #80]
+
+ mrs x19, sp_el0
+ mrs x20, elr_el2 // EL1 PC
+ mrs x21, spsr_el2 // EL1 pstate
+
+ stp x19, x20, [x3, #96]
+ str x21, [x3, #112]
+
+ mrs x22, sp_el1
+ mrs x23, elr_el1
+ mrs x24, spsr_el1
+
+ str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+ str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+ str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+.endm
+
+.macro restore_common_regs
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
+ ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
+ ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
+
+ msr sp_el1, x22
+ msr elr_el1, x23
+ msr spsr_el1, x24
+
+ add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0
+ ldp x19, x20, [x3]
+ ldr x21, [x3, #16]
+
+ msr sp_el0, x19
+ msr elr_el2, x20 // EL1 PC
+ msr spsr_el2, x21 // EL1 pstate
+
+ add x3, x2, #CPU_XREG_OFFSET(19)
+ ldp x19, x20, [x3]
+ ldp x21, x22, [x3, #16]
+ ldp x23, x24, [x3, #32]
+ ldp x25, x26, [x3, #48]
+ ldp x27, x28, [x3, #64]
+ ldp x29, lr, [x3, #80]
+.endm
+
+.macro save_host_regs
+ save_common_regs
+.endm
+
+.macro restore_host_regs
+ restore_common_regs
+.endm
+
+.macro save_fpsimd
+ // x2: cpu context address
+ // x3, x4: tmp regs
+ add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+ fpsimd_save x3, 4
+.endm
+
+.macro restore_fpsimd
+ // x2: cpu context address
+ // x3, x4: tmp regs
+ add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+ fpsimd_restore x3, 4
+.endm
+
+.macro save_guest_regs
+ // x0 is the vcpu address
+ // x1 is the return code, do not corrupt!
+ // x2 is the cpu context
+ // x3 is a tmp register
+ // Guest's x0-x3 are on the stack
+
+ // Compute base to save registers
+ add x3, x2, #CPU_XREG_OFFSET(4)
+ stp x4, x5, [x3]
+ stp x6, x7, [x3, #16]
+ stp x8, x9, [x3, #32]
+ stp x10, x11, [x3, #48]
+ stp x12, x13, [x3, #64]
+ stp x14, x15, [x3, #80]
+ stp x16, x17, [x3, #96]
+ str x18, [x3, #112]
+
+ pop x6, x7 // x2, x3
+ pop x4, x5 // x0, x1
+
+ add x3, x2, #CPU_XREG_OFFSET(0)
+ stp x4, x5, [x3]
+ stp x6, x7, [x3, #16]
+
+ save_common_regs
+.endm
+
+.macro restore_guest_regs
+ // x0 is the vcpu address.
+ // x2 is the cpu context
+ // x3 is a tmp register
+
+ // Prepare x0-x3 for later restore
+ add x3, x2, #CPU_XREG_OFFSET(0)
+ ldp x4, x5, [x3]
+ ldp x6, x7, [x3, #16]
+ push x4, x5 // Push x0-x3 on the stack
+ push x6, x7
+
+ // x4-x18
+ ldp x4, x5, [x3, #32]
+ ldp x6, x7, [x3, #48]
+ ldp x8, x9, [x3, #64]
+ ldp x10, x11, [x3, #80]
+ ldp x12, x13, [x3, #96]
+ ldp x14, x15, [x3, #112]
+ ldp x16, x17, [x3, #128]
+ ldr x18, [x3, #144]
+
+ // x19-x29, lr, sp*, elr*, spsr*
+ restore_common_regs
+
+ // Last bits of the 64bit state
+ pop x2, x3
+ pop x0, x1
+
+ // Do not touch any register after this!
+.endm
+
+/*
+ * Macros to perform system register save/restore.
+ *
+ * Ordering here is absolutely critical, and must be kept consistent
+ * in {save,restore}_sysregs, {save,restore}_guest_32bit_state,
+ * and in kvm_asm.h.
+ *
+ * In other words, don't touch any of these unless you know what
+ * you are doing.
+ */
+.macro save_sysregs
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+ mrs x4, vmpidr_el2
+ mrs x5, csselr_el1
+ mrs x6, sctlr_el1
+ mrs x7, actlr_el1
+ mrs x8, cpacr_el1
+ mrs x9, ttbr0_el1
+ mrs x10, ttbr1_el1
+ mrs x11, tcr_el1
+ mrs x12, esr_el1
+ mrs x13, afsr0_el1
+ mrs x14, afsr1_el1
+ mrs x15, far_el1
+ mrs x16, mair_el1
+ mrs x17, vbar_el1
+ mrs x18, contextidr_el1
+ mrs x19, tpidr_el0
+ mrs x20, tpidrro_el0
+ mrs x21, tpidr_el1
+ mrs x22, amair_el1
+ mrs x23, cntkctl_el1
+ mrs x24, par_el1
+ mrs x25, mdscr_el1
+
+ stp x4, x5, [x3]
+ stp x6, x7, [x3, #16]
+ stp x8, x9, [x3, #32]
+ stp x10, x11, [x3, #48]
+ stp x12, x13, [x3, #64]
+ stp x14, x15, [x3, #80]
+ stp x16, x17, [x3, #96]
+ stp x18, x19, [x3, #112]
+ stp x20, x21, [x3, #128]
+ stp x22, x23, [x3, #144]
+ stp x24, x25, [x3, #160]
+.endm
+
+.macro save_debug
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ mrs x26, id_aa64dfr0_el1
+ ubfx x24, x26, #12, #4 // Extract BRPs
+ ubfx x25, x26, #20, #4 // Extract WRPs
+ mov w26, #15
+ sub w24, w26, w24 // How many BPs to skip
+ sub w25, w26, w25 // How many WPs to skip
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ mrs x20, dbgbcr15_el1
+ mrs x19, dbgbcr14_el1
+ mrs x18, dbgbcr13_el1
+ mrs x17, dbgbcr12_el1
+ mrs x16, dbgbcr11_el1
+ mrs x15, dbgbcr10_el1
+ mrs x14, dbgbcr9_el1
+ mrs x13, dbgbcr8_el1
+ mrs x12, dbgbcr7_el1
+ mrs x11, dbgbcr6_el1
+ mrs x10, dbgbcr5_el1
+ mrs x9, dbgbcr4_el1
+ mrs x8, dbgbcr3_el1
+ mrs x7, dbgbcr2_el1
+ mrs x6, dbgbcr1_el1
+ mrs x5, dbgbcr0_el1
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+
+1:
+ str x20, [x3, #(15 * 8)]
+ str x19, [x3, #(14 * 8)]
+ str x18, [x3, #(13 * 8)]
+ str x17, [x3, #(12 * 8)]
+ str x16, [x3, #(11 * 8)]
+ str x15, [x3, #(10 * 8)]
+ str x14, [x3, #(9 * 8)]
+ str x13, [x3, #(8 * 8)]
+ str x12, [x3, #(7 * 8)]
+ str x11, [x3, #(6 * 8)]
+ str x10, [x3, #(5 * 8)]
+ str x9, [x3, #(4 * 8)]
+ str x8, [x3, #(3 * 8)]
+ str x7, [x3, #(2 * 8)]
+ str x6, [x3, #(1 * 8)]
+ str x5, [x3, #(0 * 8)]
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ mrs x20, dbgbvr15_el1
+ mrs x19, dbgbvr14_el1
+ mrs x18, dbgbvr13_el1
+ mrs x17, dbgbvr12_el1
+ mrs x16, dbgbvr11_el1
+ mrs x15, dbgbvr10_el1
+ mrs x14, dbgbvr9_el1
+ mrs x13, dbgbvr8_el1
+ mrs x12, dbgbvr7_el1
+ mrs x11, dbgbvr6_el1
+ mrs x10, dbgbvr5_el1
+ mrs x9, dbgbvr4_el1
+ mrs x8, dbgbvr3_el1
+ mrs x7, dbgbvr2_el1
+ mrs x6, dbgbvr1_el1
+ mrs x5, dbgbvr0_el1
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+
+1:
+ str x20, [x3, #(15 * 8)]
+ str x19, [x3, #(14 * 8)]
+ str x18, [x3, #(13 * 8)]
+ str x17, [x3, #(12 * 8)]
+ str x16, [x3, #(11 * 8)]
+ str x15, [x3, #(10 * 8)]
+ str x14, [x3, #(9 * 8)]
+ str x13, [x3, #(8 * 8)]
+ str x12, [x3, #(7 * 8)]
+ str x11, [x3, #(6 * 8)]
+ str x10, [x3, #(5 * 8)]
+ str x9, [x3, #(4 * 8)]
+ str x8, [x3, #(3 * 8)]
+ str x7, [x3, #(2 * 8)]
+ str x6, [x3, #(1 * 8)]
+ str x5, [x3, #(0 * 8)]
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ mrs x20, dbgwcr15_el1
+ mrs x19, dbgwcr14_el1
+ mrs x18, dbgwcr13_el1
+ mrs x17, dbgwcr12_el1
+ mrs x16, dbgwcr11_el1
+ mrs x15, dbgwcr10_el1
+ mrs x14, dbgwcr9_el1
+ mrs x13, dbgwcr8_el1
+ mrs x12, dbgwcr7_el1
+ mrs x11, dbgwcr6_el1
+ mrs x10, dbgwcr5_el1
+ mrs x9, dbgwcr4_el1
+ mrs x8, dbgwcr3_el1
+ mrs x7, dbgwcr2_el1
+ mrs x6, dbgwcr1_el1
+ mrs x5, dbgwcr0_el1
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+
+1:
+ str x20, [x3, #(15 * 8)]
+ str x19, [x3, #(14 * 8)]
+ str x18, [x3, #(13 * 8)]
+ str x17, [x3, #(12 * 8)]
+ str x16, [x3, #(11 * 8)]
+ str x15, [x3, #(10 * 8)]
+ str x14, [x3, #(9 * 8)]
+ str x13, [x3, #(8 * 8)]
+ str x12, [x3, #(7 * 8)]
+ str x11, [x3, #(6 * 8)]
+ str x10, [x3, #(5 * 8)]
+ str x9, [x3, #(4 * 8)]
+ str x8, [x3, #(3 * 8)]
+ str x7, [x3, #(2 * 8)]
+ str x6, [x3, #(1 * 8)]
+ str x5, [x3, #(0 * 8)]
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ mrs x20, dbgwvr15_el1
+ mrs x19, dbgwvr14_el1
+ mrs x18, dbgwvr13_el1
+ mrs x17, dbgwvr12_el1
+ mrs x16, dbgwvr11_el1
+ mrs x15, dbgwvr10_el1
+ mrs x14, dbgwvr9_el1
+ mrs x13, dbgwvr8_el1
+ mrs x12, dbgwvr7_el1
+ mrs x11, dbgwvr6_el1
+ mrs x10, dbgwvr5_el1
+ mrs x9, dbgwvr4_el1
+ mrs x8, dbgwvr3_el1
+ mrs x7, dbgwvr2_el1
+ mrs x6, dbgwvr1_el1
+ mrs x5, dbgwvr0_el1
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+
+1:
+ str x20, [x3, #(15 * 8)]
+ str x19, [x3, #(14 * 8)]
+ str x18, [x3, #(13 * 8)]
+ str x17, [x3, #(12 * 8)]
+ str x16, [x3, #(11 * 8)]
+ str x15, [x3, #(10 * 8)]
+ str x14, [x3, #(9 * 8)]
+ str x13, [x3, #(8 * 8)]
+ str x12, [x3, #(7 * 8)]
+ str x11, [x3, #(6 * 8)]
+ str x10, [x3, #(5 * 8)]
+ str x9, [x3, #(4 * 8)]
+ str x8, [x3, #(3 * 8)]
+ str x7, [x3, #(2 * 8)]
+ str x6, [x3, #(1 * 8)]
+ str x5, [x3, #(0 * 8)]
+
+ mrs x21, mdccint_el1
+ str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+.endm
+
+.macro restore_sysregs
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
+
+ ldp x4, x5, [x3]
+ ldp x6, x7, [x3, #16]
+ ldp x8, x9, [x3, #32]
+ ldp x10, x11, [x3, #48]
+ ldp x12, x13, [x3, #64]
+ ldp x14, x15, [x3, #80]
+ ldp x16, x17, [x3, #96]
+ ldp x18, x19, [x3, #112]
+ ldp x20, x21, [x3, #128]
+ ldp x22, x23, [x3, #144]
+ ldp x24, x25, [x3, #160]
+
+ msr vmpidr_el2, x4
+ msr csselr_el1, x5
+ msr sctlr_el1, x6
+ msr actlr_el1, x7
+ msr cpacr_el1, x8
+ msr ttbr0_el1, x9
+ msr ttbr1_el1, x10
+ msr tcr_el1, x11
+ msr esr_el1, x12
+ msr afsr0_el1, x13
+ msr afsr1_el1, x14
+ msr far_el1, x15
+ msr mair_el1, x16
+ msr vbar_el1, x17
+ msr contextidr_el1, x18
+ msr tpidr_el0, x19
+ msr tpidrro_el0, x20
+ msr tpidr_el1, x21
+ msr amair_el1, x22
+ msr cntkctl_el1, x23
+ msr par_el1, x24
+ msr mdscr_el1, x25
+.endm
+
+.macro restore_debug
+ // x2: base address for cpu context
+ // x3: tmp register
+
+ mrs x26, id_aa64dfr0_el1
+ ubfx x24, x26, #12, #4 // Extract BRPs
+ ubfx x25, x26, #20, #4 // Extract WRPs
+ mov w26, #15
+ sub w24, w26, w24 // How many BPs to skip
+ sub w25, w26, w25 // How many WPs to skip
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ ldr x20, [x3, #(15 * 8)]
+ ldr x19, [x3, #(14 * 8)]
+ ldr x18, [x3, #(13 * 8)]
+ ldr x17, [x3, #(12 * 8)]
+ ldr x16, [x3, #(11 * 8)]
+ ldr x15, [x3, #(10 * 8)]
+ ldr x14, [x3, #(9 * 8)]
+ ldr x13, [x3, #(8 * 8)]
+ ldr x12, [x3, #(7 * 8)]
+ ldr x11, [x3, #(6 * 8)]
+ ldr x10, [x3, #(5 * 8)]
+ ldr x9, [x3, #(4 * 8)]
+ ldr x8, [x3, #(3 * 8)]
+ ldr x7, [x3, #(2 * 8)]
+ ldr x6, [x3, #(1 * 8)]
+ ldr x5, [x3, #(0 * 8)]
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ msr dbgbcr15_el1, x20
+ msr dbgbcr14_el1, x19
+ msr dbgbcr13_el1, x18
+ msr dbgbcr12_el1, x17
+ msr dbgbcr11_el1, x16
+ msr dbgbcr10_el1, x15
+ msr dbgbcr9_el1, x14
+ msr dbgbcr8_el1, x13
+ msr dbgbcr7_el1, x12
+ msr dbgbcr6_el1, x11
+ msr dbgbcr5_el1, x10
+ msr dbgbcr4_el1, x9
+ msr dbgbcr3_el1, x8
+ msr dbgbcr2_el1, x7
+ msr dbgbcr1_el1, x6
+ msr dbgbcr0_el1, x5
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ ldr x20, [x3, #(15 * 8)]
+ ldr x19, [x3, #(14 * 8)]
+ ldr x18, [x3, #(13 * 8)]
+ ldr x17, [x3, #(12 * 8)]
+ ldr x16, [x3, #(11 * 8)]
+ ldr x15, [x3, #(10 * 8)]
+ ldr x14, [x3, #(9 * 8)]
+ ldr x13, [x3, #(8 * 8)]
+ ldr x12, [x3, #(7 * 8)]
+ ldr x11, [x3, #(6 * 8)]
+ ldr x10, [x3, #(5 * 8)]
+ ldr x9, [x3, #(4 * 8)]
+ ldr x8, [x3, #(3 * 8)]
+ ldr x7, [x3, #(2 * 8)]
+ ldr x6, [x3, #(1 * 8)]
+ ldr x5, [x3, #(0 * 8)]
+
+ adr x26, 1f
+ add x26, x26, x24, lsl #2
+ br x26
+1:
+ msr dbgbvr15_el1, x20
+ msr dbgbvr14_el1, x19
+ msr dbgbvr13_el1, x18
+ msr dbgbvr12_el1, x17
+ msr dbgbvr11_el1, x16
+ msr dbgbvr10_el1, x15
+ msr dbgbvr9_el1, x14
+ msr dbgbvr8_el1, x13
+ msr dbgbvr7_el1, x12
+ msr dbgbvr6_el1, x11
+ msr dbgbvr5_el1, x10
+ msr dbgbvr4_el1, x9
+ msr dbgbvr3_el1, x8
+ msr dbgbvr2_el1, x7
+ msr dbgbvr1_el1, x6
+ msr dbgbvr0_el1, x5
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGWCR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ ldr x20, [x3, #(15 * 8)]
+ ldr x19, [x3, #(14 * 8)]
+ ldr x18, [x3, #(13 * 8)]
+ ldr x17, [x3, #(12 * 8)]
+ ldr x16, [x3, #(11 * 8)]
+ ldr x15, [x3, #(10 * 8)]
+ ldr x14, [x3, #(9 * 8)]
+ ldr x13, [x3, #(8 * 8)]
+ ldr x12, [x3, #(7 * 8)]
+ ldr x11, [x3, #(6 * 8)]
+ ldr x10, [x3, #(5 * 8)]
+ ldr x9, [x3, #(4 * 8)]
+ ldr x8, [x3, #(3 * 8)]
+ ldr x7, [x3, #(2 * 8)]
+ ldr x6, [x3, #(1 * 8)]
+ ldr x5, [x3, #(0 * 8)]
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ msr dbgwcr15_el1, x20
+ msr dbgwcr14_el1, x19
+ msr dbgwcr13_el1, x18
+ msr dbgwcr12_el1, x17
+ msr dbgwcr11_el1, x16
+ msr dbgwcr10_el1, x15
+ msr dbgwcr9_el1, x14
+ msr dbgwcr8_el1, x13
+ msr dbgwcr7_el1, x12
+ msr dbgwcr6_el1, x11
+ msr dbgwcr5_el1, x10
+ msr dbgwcr4_el1, x9
+ msr dbgwcr3_el1, x8
+ msr dbgwcr2_el1, x7
+ msr dbgwcr1_el1, x6
+ msr dbgwcr0_el1, x5
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DBGWVR0_EL1)
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ ldr x20, [x3, #(15 * 8)]
+ ldr x19, [x3, #(14 * 8)]
+ ldr x18, [x3, #(13 * 8)]
+ ldr x17, [x3, #(12 * 8)]
+ ldr x16, [x3, #(11 * 8)]
+ ldr x15, [x3, #(10 * 8)]
+ ldr x14, [x3, #(9 * 8)]
+ ldr x13, [x3, #(8 * 8)]
+ ldr x12, [x3, #(7 * 8)]
+ ldr x11, [x3, #(6 * 8)]
+ ldr x10, [x3, #(5 * 8)]
+ ldr x9, [x3, #(4 * 8)]
+ ldr x8, [x3, #(3 * 8)]
+ ldr x7, [x3, #(2 * 8)]
+ ldr x6, [x3, #(1 * 8)]
+ ldr x5, [x3, #(0 * 8)]
+
+ adr x26, 1f
+ add x26, x26, x25, lsl #2
+ br x26
+1:
+ msr dbgwvr15_el1, x20
+ msr dbgwvr14_el1, x19
+ msr dbgwvr13_el1, x18
+ msr dbgwvr12_el1, x17
+ msr dbgwvr11_el1, x16
+ msr dbgwvr10_el1, x15
+ msr dbgwvr9_el1, x14
+ msr dbgwvr8_el1, x13
+ msr dbgwvr7_el1, x12
+ msr dbgwvr6_el1, x11
+ msr dbgwvr5_el1, x10
+ msr dbgwvr4_el1, x9
+ msr dbgwvr3_el1, x8
+ msr dbgwvr2_el1, x7
+ msr dbgwvr1_el1, x6
+ msr dbgwvr0_el1, x5
+
+ ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
+ msr mdccint_el1, x21
+.endm
+
+.macro skip_32bit_state tmp, target
+ // Skip 32bit state if not needed
+ mrs \tmp, hcr_el2
+ tbnz \tmp, #HCR_RW_SHIFT, \target
+.endm
+
+.macro skip_tee_state tmp, target
+ // Skip ThumbEE state if not needed
+ mrs \tmp, id_pfr0_el1
+ tbz \tmp, #12, \target
+.endm
+
+.macro skip_debug_state tmp, target
+ ldr \tmp, [x0, #VCPU_DEBUG_FLAGS]
+ tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
+.endm
+
+.macro compute_debug_state target
+ // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
+ // is set, we do a full save/restore cycle and disable trapping.
+ add x25, x0, #VCPU_CONTEXT
+
+ // Check the state of MDSCR_EL1
+ ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)]
+ and x26, x25, #DBG_MDSCR_KDE
+ and x25, x25, #DBG_MDSCR_MDE
+ adds xzr, x25, x26
+ b.eq 9998f // Nothing to see there
+
+ // If any interesting bits was set, we must set the flag
+ mov x26, #KVM_ARM64_DEBUG_DIRTY
+ str x26, [x0, #VCPU_DEBUG_FLAGS]
+ b 9999f // Don't skip restore
+
+9998:
+ // Otherwise load the flags from memory in case we recently
+ // trapped
+ skip_debug_state x25, \target
+9999:
+.endm
+
+.macro save_guest_32bit_state
+ skip_32bit_state x3, 1f
+
+ add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+ mrs x4, spsr_abt
+ mrs x5, spsr_und
+ mrs x6, spsr_irq
+ mrs x7, spsr_fiq
+ stp x4, x5, [x3]
+ stp x6, x7, [x3, #16]
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+ mrs x4, dacr32_el2
+ mrs x5, ifsr32_el2
+ mrs x6, fpexc32_el2
+ stp x4, x5, [x3]
+ str x6, [x3, #16]
+
+ skip_debug_state x8, 2f
+ mrs x7, dbgvcr32_el2
+ str x7, [x3, #24]
+2:
+ skip_tee_state x8, 1f
+
+ add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+ mrs x4, teecr32_el1
+ mrs x5, teehbr32_el1
+ stp x4, x5, [x3]
+1:
+.endm
+
+.macro restore_guest_32bit_state
+ skip_32bit_state x3, 1f
+
+ add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
+ ldp x4, x5, [x3]
+ ldp x6, x7, [x3, #16]
+ msr spsr_abt, x4
+ msr spsr_und, x5
+ msr spsr_irq, x6
+ msr spsr_fiq, x7
+
+ add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
+ ldp x4, x5, [x3]
+ ldr x6, [x3, #16]
+ msr dacr32_el2, x4
+ msr ifsr32_el2, x5
+ msr fpexc32_el2, x6
+
+ skip_debug_state x8, 2f
+ ldr x7, [x3, #24]
+ msr dbgvcr32_el2, x7
+2:
+ skip_tee_state x8, 1f
+
+ add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
+ ldp x4, x5, [x3]
+ msr teecr32_el1, x4
+ msr teehbr32_el1, x5
+1:
+.endm
+
+.macro activate_traps
+ ldr x2, [x0, #VCPU_HCR_EL2]
+ msr hcr_el2, x2
+ ldr x2, =(CPTR_EL2_TTA)
+ msr cptr_el2, x2
+
+ ldr x2, =(1 << 15) // Trap CP15 Cr=15
+ msr hstr_el2, x2
+
+ mrs x2, mdcr_el2
+ and x2, x2, #MDCR_EL2_HPMN_MASK
+ orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
+ orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
+
+ // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
+ // if not dirty.
+ ldr x3, [x0, #VCPU_DEBUG_FLAGS]
+ tbnz x3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
+ orr x2, x2, #MDCR_EL2_TDA
+1:
+ msr mdcr_el2, x2
+.endm
+
+.macro deactivate_traps
+ mov x2, #HCR_RW
+ msr hcr_el2, x2
+ msr cptr_el2, xzr
+ msr hstr_el2, xzr
+
+ mrs x2, mdcr_el2
+ and x2, x2, #MDCR_EL2_HPMN_MASK
+ msr mdcr_el2, x2
+.endm
+
+.macro activate_vm
+ ldr x1, [x0, #VCPU_KVM]
+ kern_hyp_va x1
+ ldr x2, [x1, #KVM_VTTBR]
+ msr vttbr_el2, x2
+.endm
+
+.macro deactivate_vm
+ msr vttbr_el2, xzr
+.endm
+
+/*
+ * Call into the vgic backend for state saving
+ */
+.macro save_vgic_state
+ adr x24, __vgic_sr_vectors
+ ldr x24, [x24, VGIC_SAVE_FN]
+ kern_hyp_va x24
+ blr x24
+ mrs x24, hcr_el2
+ mov x25, #HCR_INT_OVERRIDE
+ neg x25, x25
+ and x24, x24, x25
+ msr hcr_el2, x24
+.endm
+
+/*
+ * Call into the vgic backend for state restoring
+ */
+.macro restore_vgic_state
+ mrs x24, hcr_el2
+ ldr x25, [x0, #VCPU_IRQ_LINES]
+ orr x24, x24, #HCR_INT_OVERRIDE
+ orr x24, x24, x25
+ msr hcr_el2, x24
+ adr x24, __vgic_sr_vectors
+ ldr x24, [x24, #VGIC_RESTORE_FN]
+ kern_hyp_va x24
+ blr x24
+.endm
+
+.macro save_timer_state
+ // x0: vcpu pointer
+ ldr x2, [x0, #VCPU_KVM]
+ kern_hyp_va x2
+ ldr w3, [x2, #KVM_TIMER_ENABLED]
+ cbz w3, 1f
+
+ mrs x3, cntv_ctl_el0
+ and x3, x3, #3
+ str w3, [x0, #VCPU_TIMER_CNTV_CTL]
+ bic x3, x3, #1 // Clear Enable
+ msr cntv_ctl_el0, x3
+
+ isb
+
+ mrs x3, cntv_cval_el0
+ str x3, [x0, #VCPU_TIMER_CNTV_CVAL]
+
+1:
+ // Allow physical timer/counter access for the host
+ mrs x2, cnthctl_el2
+ orr x2, x2, #3
+ msr cnthctl_el2, x2
+
+ // Clear cntvoff for the host
+ msr cntvoff_el2, xzr
+.endm
+
+.macro restore_timer_state
+ // x0: vcpu pointer
+ // Disallow physical timer access for the guest
+ // Physical counter access is allowed
+ mrs x2, cnthctl_el2
+ orr x2, x2, #1
+ bic x2, x2, #2
+ msr cnthctl_el2, x2
+
+ ldr x2, [x0, #VCPU_KVM]
+ kern_hyp_va x2
+ ldr w3, [x2, #KVM_TIMER_ENABLED]
+ cbz w3, 1f
+
+ ldr x3, [x2, #KVM_TIMER_CNTVOFF]
+ msr cntvoff_el2, x3
+ ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL]
+ msr cntv_cval_el0, x2
+ isb
+
+ ldr w2, [x0, #VCPU_TIMER_CNTV_CTL]
+ and x2, x2, #3
+ msr cntv_ctl_el0, x2
+1:
+.endm
+
+__save_sysregs:
+ save_sysregs
+ ret
+
+__restore_sysregs:
+ restore_sysregs
+ ret
+
+__save_debug:
+ save_debug
+ ret
+
+__restore_debug:
+ restore_debug
+ ret
+
+__save_fpsimd:
+ save_fpsimd
+ ret
+
+__restore_fpsimd:
+ restore_fpsimd
+ ret
+
+/*
+ * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+ *
+ * This is the world switch. The first half of the function
+ * deals with entering the guest, and anything from __kvm_vcpu_return
+ * to the end of the function deals with reentering the host.
+ * On the enter path, only x0 (vcpu pointer) must be preserved until
+ * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception
+ * code) must both be preserved until the epilogue.
+ * In both cases, x2 points to the CPU context we're saving/restoring from/to.
+ */
+ENTRY(__kvm_vcpu_run)
+ kern_hyp_va x0
+ msr tpidr_el2, x0 // Save the vcpu register
+
+ // Host context
+ ldr x2, [x0, #VCPU_HOST_CONTEXT]
+ kern_hyp_va x2
+
+ save_host_regs
+ bl __save_fpsimd
+ bl __save_sysregs
+
+ compute_debug_state 1f
+ bl __save_debug
+1:
+ activate_traps
+ activate_vm
+
+ restore_vgic_state
+ restore_timer_state
+
+ // Guest context
+ add x2, x0, #VCPU_CONTEXT
+
+ bl __restore_sysregs
+ bl __restore_fpsimd
+
+ skip_debug_state x3, 1f
+ bl __restore_debug
+1:
+ restore_guest_32bit_state
+ restore_guest_regs
+
+ // That's it, no more messing around.
+ eret
+
+__kvm_vcpu_return:
+ // Assume x0 is the vcpu pointer, x1 the return code
+ // Guest's x0-x3 are on the stack
+
+ // Guest context
+ add x2, x0, #VCPU_CONTEXT
+
+ save_guest_regs
+ bl __save_fpsimd
+ bl __save_sysregs
+
+ skip_debug_state x3, 1f
+ bl __save_debug
+1:
+ save_guest_32bit_state
+
+ save_timer_state
+ save_vgic_state
+
+ deactivate_traps
+ deactivate_vm
+
+ // Host context
+ ldr x2, [x0, #VCPU_HOST_CONTEXT]
+ kern_hyp_va x2
+
+ bl __restore_sysregs
+ bl __restore_fpsimd
+
+ skip_debug_state x3, 1f
+ // Clear the dirty flag for the next run, as all the state has
+ // already been saved. Note that we nuke the whole 64bit word.
+ // If we ever add more flags, we'll have to be more careful...
+ str xzr, [x0, #VCPU_DEBUG_FLAGS]
+ bl __restore_debug
+1:
+ restore_host_regs
+
+ mov x0, x1
+ ret
+END(__kvm_vcpu_run)
+
+// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+ENTRY(__kvm_tlb_flush_vmid_ipa)
+ dsb ishst
+
+ kern_hyp_va x0
+ ldr x2, [x0, #KVM_VTTBR]
+ msr vttbr_el2, x2
+ isb
+
+ /*
+ * We could do so much better if we had the VA as well.
+ * Instead, we invalidate Stage-2 for this IPA, and the
+ * whole of Stage-1. Weep...
+ */
+ tlbi ipas2e1is, x1
+ /*
+ * We have to ensure completion of the invalidation at Stage-2,
+ * since a table walk on another CPU could refill a TLB with a
+ * complete (S1 + S2) walk based on the old Stage-2 mapping if
+ * the Stage-1 invalidation happened first.
+ */
+ dsb ish
+ tlbi vmalle1is
+ dsb ish
+ isb
+
+ msr vttbr_el2, xzr
+ ret
+ENDPROC(__kvm_tlb_flush_vmid_ipa)
+
+ENTRY(__kvm_flush_vm_context)
+ dsb ishst
+ tlbi alle1is
+ ic ialluis
+ dsb ish
+ ret
+ENDPROC(__kvm_flush_vm_context)
+
+ // struct vgic_sr_vectors __vgi_sr_vectors;
+ .align 3
+ENTRY(__vgic_sr_vectors)
+ .skip VGIC_SR_VECTOR_SZ
+ENDPROC(__vgic_sr_vectors)
+
+__kvm_hyp_panic:
+ // Guess the context by looking at VTTBR:
+ // If zero, then we're already a host.
+ // Otherwise restore a minimal host context before panicing.
+ mrs x0, vttbr_el2
+ cbz x0, 1f
+
+ mrs x0, tpidr_el2
+
+ deactivate_traps
+ deactivate_vm
+
+ ldr x2, [x0, #VCPU_HOST_CONTEXT]
+ kern_hyp_va x2
+
+ bl __restore_sysregs
+
+1: adr x0, __hyp_panic_str
+ adr x1, 2f
+ ldp x2, x3, [x1]
+ sub x0, x0, x2
+ add x0, x0, x3
+ mrs x1, spsr_el2
+ mrs x2, elr_el2
+ mrs x3, esr_el2
+ mrs x4, far_el2
+ mrs x5, hpfar_el2
+ mrs x6, par_el1
+ mrs x7, tpidr_el2
+
+ mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+ PSR_MODE_EL1h)
+ msr spsr_el2, lr
+ ldr lr, =panic
+ msr elr_el2, lr
+ eret
+
+ .align 3
+2: .quad HYP_PAGE_OFFSET
+ .quad PAGE_OFFSET
+ENDPROC(__kvm_hyp_panic)
+
+__hyp_panic_str:
+ .ascii "HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0"
+
+ .align 2
+
+/*
+ * u64 kvm_call_hyp(void *hypfn, ...);
+ *
+ * This is not really a variadic function in the classic C-way and care must
+ * be taken when calling this to ensure parameters are passed in registers
+ * only, since the stack will change between the caller and the callee.
+ *
+ * Call the function with the first argument containing a pointer to the
+ * function you wish to call in Hyp mode, and subsequent arguments will be
+ * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
+ * function pointer can be passed). The function being called must be mapped
+ * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are
+ * passed in r0 and r1.
+ *
+ * A function pointer with a value of 0 has a special meaning, and is
+ * used to implement __hyp_get_vectors in the same way as in
+ * arch/arm64/kernel/hyp_stub.S.
+ */
+ENTRY(kvm_call_hyp)
+ hvc #0
+ ret
+ENDPROC(kvm_call_hyp)
+
+.macro invalid_vector label, target
+ .align 2
+\label:
+ b \target
+ENDPROC(\label)
+.endm
+
+ /* None of these should ever happen */
+ invalid_vector el2t_sync_invalid, __kvm_hyp_panic
+ invalid_vector el2t_irq_invalid, __kvm_hyp_panic
+ invalid_vector el2t_fiq_invalid, __kvm_hyp_panic
+ invalid_vector el2t_error_invalid, __kvm_hyp_panic
+ invalid_vector el2h_sync_invalid, __kvm_hyp_panic
+ invalid_vector el2h_irq_invalid, __kvm_hyp_panic
+ invalid_vector el2h_fiq_invalid, __kvm_hyp_panic
+ invalid_vector el2h_error_invalid, __kvm_hyp_panic
+ invalid_vector el1_sync_invalid, __kvm_hyp_panic
+ invalid_vector el1_irq_invalid, __kvm_hyp_panic
+ invalid_vector el1_fiq_invalid, __kvm_hyp_panic
+ invalid_vector el1_error_invalid, __kvm_hyp_panic
+
+el1_sync: // Guest trapped into EL2
+ push x0, x1
+ push x2, x3
+
+ mrs x1, esr_el2
+ lsr x2, x1, #ESR_EL2_EC_SHIFT
+
+ cmp x2, #ESR_EL2_EC_HVC64
+ b.ne el1_trap
+
+ mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest
+ cbnz x3, el1_trap // called HVC
+
+ /* Here, we're pretty sure the host called HVC. */
+ pop x2, x3
+ pop x0, x1
+
+ /* Check for __hyp_get_vectors */
+ cbnz x0, 1f
+ mrs x0, vbar_el2
+ b 2f
+
+1: push lr, xzr
+
+ /*
+ * Compute the function address in EL2, and shuffle the parameters.
+ */
+ kern_hyp_va x0
+ mov lr, x0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ blr lr
+
+ pop lr, xzr
+2: eret
+
+el1_trap:
+ /*
+ * x1: ESR
+ * x2: ESR_EC
+ */
+ cmp x2, #ESR_EL2_EC_DABT
+ mov x0, #ESR_EL2_EC_IABT
+ ccmp x2, x0, #4, ne
+ b.ne 1f // Not an abort we care about
+
+ /* This is an abort. Check for permission fault */
+ and x2, x1, #ESR_EL2_FSC_TYPE
+ cmp x2, #FSC_PERM
+ b.ne 1f // Not a permission fault
+
+ /*
+ * Check for Stage-1 page table walk, which is guaranteed
+ * to give a valid HPFAR_EL2.
+ */
+ tbnz x1, #7, 1f // S1PTW is set
+
+ /* Preserve PAR_EL1 */
+ mrs x3, par_el1
+ push x3, xzr
+
+ /*
+ * Permission fault, HPFAR_EL2 is invalid.
+ * Resolve the IPA the hard way using the guest VA.
+ * Stage-1 translation already validated the memory access rights.
+ * As such, we can use the EL1 translation regime, and don't have
+ * to distinguish between EL0 and EL1 access.
+ */
+ mrs x2, far_el2
+ at s1e1r, x2
+ isb
+
+ /* Read result */
+ mrs x3, par_el1
+ pop x0, xzr // Restore PAR_EL1 from the stack
+ msr par_el1, x0
+ tbnz x3, #0, 3f // Bail out if we failed the translation
+ ubfx x3, x3, #12, #36 // Extract IPA
+ lsl x3, x3, #4 // and present it like HPFAR
+ b 2f
+
+1: mrs x3, hpfar_el2
+ mrs x2, far_el2
+
+2: mrs x0, tpidr_el2
+ str w1, [x0, #VCPU_ESR_EL2]
+ str x2, [x0, #VCPU_FAR_EL2]
+ str x3, [x0, #VCPU_HPFAR_EL2]
+
+ mov x1, #ARM_EXCEPTION_TRAP
+ b __kvm_vcpu_return
+
+ /*
+ * Translation failed. Just return to the guest and
+ * let it fault again. Another CPU is probably playing
+ * behind our back.
+ */
+3: pop x2, x3
+ pop x0, x1
+
+ eret
+
+el1_irq:
+ push x0, x1
+ push x2, x3
+ mrs x0, tpidr_el2
+ mov x1, #ARM_EXCEPTION_IRQ
+ b __kvm_vcpu_return
+
+ .ltorg
+
+ .align 11
+
+ENTRY(__kvm_hyp_vector)
+ ventry el2t_sync_invalid // Synchronous EL2t
+ ventry el2t_irq_invalid // IRQ EL2t
+ ventry el2t_fiq_invalid // FIQ EL2t
+ ventry el2t_error_invalid // Error EL2t
+
+ ventry el2h_sync_invalid // Synchronous EL2h
+ ventry el2h_irq_invalid // IRQ EL2h
+ ventry el2h_fiq_invalid // FIQ EL2h
+ ventry el2h_error_invalid // Error EL2h
+
+ ventry el1_sync // Synchronous 64-bit EL1
+ ventry el1_irq // IRQ 64-bit EL1
+ ventry el1_fiq_invalid // FIQ 64-bit EL1
+ ventry el1_error_invalid // Error 64-bit EL1
+
+ ventry el1_sync // Synchronous 32-bit EL1
+ ventry el1_irq // IRQ 32-bit EL1
+ ventry el1_fiq_invalid // FIQ 32-bit EL1
+ ventry el1_error_invalid // Error 32-bit EL1
+ENDPROC(__kvm_hyp_vector)
+
+ .popsection
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
new file mode 100644
index 000000000000..81a02a8762b0
--- /dev/null
+++ b/arch/arm64/kvm/inject_fault.c
@@ -0,0 +1,203 @@
+/*
+ * Fault injection for both 32 and 64bit guests.
+ *
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/emulate.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/esr.h>
+
+#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \
+ PSR_I_BIT | PSR_D_BIT)
+#define EL1_EXCEPT_SYNC_OFFSET 0x200
+
+static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
+{
+ unsigned long cpsr;
+ unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
+ bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT);
+ u32 return_offset = (is_thumb) ? 4 : 0;
+ u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
+
+ cpsr = mode | COMPAT_PSR_I_BIT;
+
+ if (sctlr & (1 << 30))
+ cpsr |= COMPAT_PSR_T_BIT;
+ if (sctlr & (1 << 25))
+ cpsr |= COMPAT_PSR_E_BIT;
+
+ *vcpu_cpsr(vcpu) = cpsr;
+
+ /* Note: These now point to the banked copies */
+ *vcpu_spsr(vcpu) = new_spsr_value;
+ *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
+
+ /* Branch to exception vector */
+ if (sctlr & (1 << 13))
+ vect_offset += 0xffff0000;
+ else /* always have security exceptions */
+ vect_offset += vcpu_cp15(vcpu, c12_VBAR);
+
+ *vcpu_pc(vcpu) = vect_offset;
+}
+
+static void inject_undef32(struct kvm_vcpu *vcpu)
+{
+ prepare_fault32(vcpu, COMPAT_PSR_MODE_UND, 4);
+}
+
+/*
+ * Modelled after TakeDataAbortException() and TakePrefetchAbortException
+ * pseudocode.
+ */
+static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt,
+ unsigned long addr)
+{
+ u32 vect_offset;
+ u32 *far, *fsr;
+ bool is_lpae;
+
+ if (is_pabt) {
+ vect_offset = 12;
+ far = &vcpu_cp15(vcpu, c6_IFAR);
+ fsr = &vcpu_cp15(vcpu, c5_IFSR);
+ } else { /* !iabt */
+ vect_offset = 16;
+ far = &vcpu_cp15(vcpu, c6_DFAR);
+ fsr = &vcpu_cp15(vcpu, c5_DFSR);
+ }
+
+ prepare_fault32(vcpu, COMPAT_PSR_MODE_ABT | COMPAT_PSR_A_BIT, vect_offset);
+
+ *far = addr;
+
+ /* Give the guest an IMPLEMENTATION DEFINED exception */
+ is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31);
+ if (is_lpae)
+ *fsr = 1 << 9 | 0x34;
+ else
+ *fsr = 0x14;
+}
+
+static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
+{
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
+ bool is_aarch32;
+ u32 esr = 0;
+
+ is_aarch32 = vcpu_mode_is_32bit(vcpu);
+
+ *vcpu_spsr(vcpu) = cpsr;
+ *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+ *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+ *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+ vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+
+ /*
+ * Build an {i,d}abort, depending on the level and the
+ * instruction set. Report an external synchronous abort.
+ */
+ if (kvm_vcpu_trap_il_is32bit(vcpu))
+ esr |= ESR_EL1_IL;
+
+ /*
+ * Here, the guest runs in AArch64 mode when in EL1. If we get
+ * an AArch32 fault, it means we managed to trap an EL0 fault.
+ */
+ if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t)
+ esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT);
+ else
+ esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT);
+
+ if (!is_iabt)
+ esr |= ESR_EL1_EC_DABT_EL0;
+
+ vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT;
+}
+
+static void inject_undef64(struct kvm_vcpu *vcpu)
+{
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
+ u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT);
+
+ *vcpu_spsr(vcpu) = cpsr;
+ *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+
+ *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+ *vcpu_pc(vcpu) = vcpu_sys_reg(vcpu, VBAR_EL1) + EL1_EXCEPT_SYNC_OFFSET;
+
+ /*
+ * Build an unknown exception, depending on the instruction
+ * set.
+ */
+ if (kvm_vcpu_trap_il_is32bit(vcpu))
+ esr |= ESR_EL1_IL;
+
+ vcpu_sys_reg(vcpu, ESR_EL1) = esr;
+}
+
+/**
+ * kvm_inject_dabt - inject a data abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ inject_abt32(vcpu, false, addr);
+
+ inject_abt64(vcpu, false, addr);
+}
+
+/**
+ * kvm_inject_pabt - inject a prefetch abort into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ * @addr: The address to report in the DFAR
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ inject_abt32(vcpu, true, addr);
+
+ inject_abt64(vcpu, true, addr);
+}
+
+/**
+ * kvm_inject_undefined - inject an undefined instruction into the guest
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ */
+void kvm_inject_undefined(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hcr_el2 & HCR_RW))
+ inject_undef32(vcpu);
+
+ inject_undef64(vcpu);
+}
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
new file mode 100644
index 000000000000..bbc6ae32e4af
--- /dev/null
+++ b/arch/arm64/kvm/regmap.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/emulate.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/ptrace.h>
+
+#define VCPU_NR_MODES 6
+#define REG_OFFSET(_reg) \
+ (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long))
+
+#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R))
+
+static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = {
+ /* USR Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14),
+ REG_OFFSET(pc)
+ },
+
+ /* FIQ Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7),
+ REG_OFFSET(compat_r8_fiq), /* r8 */
+ REG_OFFSET(compat_r9_fiq), /* r9 */
+ REG_OFFSET(compat_r10_fiq), /* r10 */
+ REG_OFFSET(compat_r11_fiq), /* r11 */
+ REG_OFFSET(compat_r12_fiq), /* r12 */
+ REG_OFFSET(compat_sp_fiq), /* r13 */
+ REG_OFFSET(compat_lr_fiq), /* r14 */
+ REG_OFFSET(pc)
+ },
+
+ /* IRQ Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(compat_sp_irq), /* r13 */
+ REG_OFFSET(compat_lr_irq), /* r14 */
+ REG_OFFSET(pc)
+ },
+
+ /* SVC Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(compat_sp_svc), /* r13 */
+ REG_OFFSET(compat_lr_svc), /* r14 */
+ REG_OFFSET(pc)
+ },
+
+ /* ABT Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(compat_sp_abt), /* r13 */
+ REG_OFFSET(compat_lr_abt), /* r14 */
+ REG_OFFSET(pc)
+ },
+
+ /* UND Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(compat_sp_und), /* r13 */
+ REG_OFFSET(compat_lr_und), /* r14 */
+ REG_OFFSET(pc)
+ },
+};
+
+/*
+ * Return a pointer to the register number valid in the current mode of
+ * the virtual CPU.
+ */
+unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
+{
+ unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.gp_regs.regs;
+ unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+
+ switch (mode) {
+ case COMPAT_PSR_MODE_USR ... COMPAT_PSR_MODE_SVC:
+ mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */
+ break;
+
+ case COMPAT_PSR_MODE_ABT:
+ mode = 4;
+ break;
+
+ case COMPAT_PSR_MODE_UND:
+ mode = 5;
+ break;
+
+ case COMPAT_PSR_MODE_SYS:
+ mode = 0; /* SYS maps to USR */
+ break;
+
+ default:
+ BUG();
+ }
+
+ return reg_array + vcpu_reg_offsets[mode][reg_num];
+}
+
+/*
+ * Return the SPSR for the current mode of the virtual CPU.
+ */
+unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+{
+ unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
+ switch (mode) {
+ case COMPAT_PSR_MODE_SVC:
+ mode = KVM_SPSR_SVC;
+ break;
+ case COMPAT_PSR_MODE_ABT:
+ mode = KVM_SPSR_ABT;
+ break;
+ case COMPAT_PSR_MODE_UND:
+ mode = KVM_SPSR_UND;
+ break;
+ case COMPAT_PSR_MODE_IRQ:
+ mode = KVM_SPSR_IRQ;
+ break;
+ case COMPAT_PSR_MODE_FIQ:
+ mode = KVM_SPSR_FIQ;
+ break;
+ default:
+ BUG();
+ }
+
+ return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
new file mode 100644
index 000000000000..70a7816535cd
--- /dev/null
+++ b/arch/arm64/kvm/reset.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/reset.c
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+#include <kvm/arm_arch_timer.h>
+
+#include <asm/cputype.h>
+#include <asm/ptrace.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_coproc.h>
+
+/*
+ * ARMv8 Reset Values
+ */
+static const struct kvm_regs default_regs_reset = {
+ .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
+ PSR_F_BIT | PSR_D_BIT),
+};
+
+static const struct kvm_regs default_regs_reset32 = {
+ .regs.pstate = (COMPAT_PSR_MODE_SVC | COMPAT_PSR_A_BIT |
+ COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT),
+};
+
+static const struct kvm_irq_level default_vtimer_irq = {
+ .irq = 27,
+ .level = 1,
+};
+
+static bool cpu_has_32bit_el1(void)
+{
+ u64 pfr0;
+
+ pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+ return !!(pfr0 & 0x20);
+}
+
+int kvm_arch_dev_ioctl_check_extension(long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_ARM_EL1_32BIT:
+ r = cpu_has_32bit_el1();
+ break;
+ default:
+ r = 0;
+ }
+
+ return r;
+}
+
+/**
+ * kvm_reset_vcpu - sets core registers and sys_regs to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on
+ * the virtual CPU struct to their architectually defined reset
+ * values.
+ */
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+ const struct kvm_irq_level *cpu_vtimer_irq;
+ const struct kvm_regs *cpu_reset;
+
+ switch (vcpu->arch.target) {
+ default:
+ if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
+ if (!cpu_has_32bit_el1())
+ return -EINVAL;
+ cpu_reset = &default_regs_reset32;
+ vcpu->arch.hcr_el2 &= ~HCR_RW;
+ } else {
+ cpu_reset = &default_regs_reset;
+ }
+
+ cpu_vtimer_irq = &default_vtimer_irq;
+ break;
+ }
+
+ /* Reset core registers */
+ memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset));
+
+ /* Reset system registers */
+ kvm_reset_sys_regs(vcpu);
+
+ /* Reset timer */
+ kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
+
+ return 0;
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
new file mode 100644
index 000000000000..4cc3b719208e
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.c
@@ -0,0 +1,1528 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.com.au>
+ * Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/mm.h>
+#include <linux/kvm_host.h>
+#include <linux/uaccess.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <asm/kvm_mmu.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/debug-monitors.h>
+#include <trace/events/kvm.h>
+
+#include "sys_regs.h"
+
+/*
+ * All of this file is extremly similar to the ARM coproc.c, but the
+ * types are different. My gut feeling is that it should be pretty
+ * easy to merge, but that would be an ABI breakage -- again. VFP
+ * would also need to be abstracted.
+ *
+ * For AArch32, we only take care of what is being trapped. Anything
+ * that has to do with init and userspace access has to go via the
+ * 64bit interface.
+ */
+
+/* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
+static u32 cache_levels;
+
+/* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
+#define CSSELR_MAX 12
+
+/* Which cache CCSIDR represents depends on CSSELR value. */
+static u32 get_ccsidr(u32 csselr)
+{
+ u32 ccsidr;
+
+ /* Make sure noone else changes CSSELR during this! */
+ local_irq_disable();
+ /* Put value into CSSELR */
+ asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+ isb();
+ /* Read result out of CCSIDR */
+ asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr));
+ local_irq_enable();
+
+ return ccsidr;
+}
+
+static void do_dc_cisw(u32 val)
+{
+ asm volatile("dc cisw, %x0" : : "r" (val));
+ dsb(ish);
+}
+
+static void do_dc_csw(u32 val)
+{
+ asm volatile("dc csw, %x0" : : "r" (val));
+ dsb(ish);
+}
+
+/* See note at ARM ARM B1.14.4 */
+static bool access_dcsw(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ unsigned long val;
+ int cpu;
+
+ if (!p->is_write)
+ return read_from_write_only(vcpu, p);
+
+ cpu = get_cpu();
+
+ cpumask_setall(&vcpu->arch.require_dcache_flush);
+ cpumask_clear_cpu(cpu, &vcpu->arch.require_dcache_flush);
+
+ /* If we were already preempted, take the long way around */
+ if (cpu != vcpu->arch.last_pcpu) {
+ flush_cache_all();
+ goto done;
+ }
+
+ val = *vcpu_reg(vcpu, p->Rt);
+
+ switch (p->CRm) {
+ case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */
+ case 14: /* DCCISW */
+ do_dc_cisw(val);
+ break;
+
+ case 10: /* DCCSW */
+ do_dc_csw(val);
+ break;
+ }
+
+done:
+ put_cpu();
+
+ return true;
+}
+
+/*
+ * Generic accessor for VM registers. Only called as long as HCR_TVM
+ * is set.
+ */
+static bool access_vm_reg(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ unsigned long val;
+
+ BUG_ON(!p->is_write);
+
+ val = *vcpu_reg(vcpu, p->Rt);
+ if (!p->is_aarch32) {
+ vcpu_sys_reg(vcpu, r->reg) = val;
+ } else {
+ if (!p->is_32bit)
+ vcpu_cp15_64_high(vcpu, r->reg) = val >> 32;
+ vcpu_cp15_64_low(vcpu, r->reg) = val & 0xffffffffUL;
+ }
+
+ return true;
+}
+
+/*
+ * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the
+ * guest enables the MMU, we stop trapping the VM sys_regs and leave
+ * it in complete control of the caches.
+ */
+static bool access_sctlr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ access_vm_reg(vcpu, p, r);
+
+ if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */
+ vcpu->arch.hcr_el2 &= ~HCR_TVM;
+ stage2_flush_vm(vcpu->kvm);
+ }
+
+ return true;
+}
+
+static bool trap_raz_wi(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+ else
+ return read_zero(vcpu, p);
+}
+
+static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ return ignore_write(vcpu, p);
+ } else {
+ *vcpu_reg(vcpu, p->Rt) = (1 << 3);
+ return true;
+ }
+}
+
+static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ return ignore_write(vcpu, p);
+ } else {
+ u32 val;
+ asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
+ *vcpu_reg(vcpu, p->Rt) = val;
+ return true;
+ }
+}
+
+/*
+ * We want to avoid world-switching all the DBG registers all the
+ * time:
+ *
+ * - If we've touched any debug register, it is likely that we're
+ * going to touch more of them. It then makes sense to disable the
+ * traps and start doing the save/restore dance
+ * - If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is
+ * then mandatory to save/restore the registers, as the guest
+ * depends on them.
+ *
+ * For this, we use a DIRTY bit, indicating the guest has modified the
+ * debug registers, used as follow:
+ *
+ * On guest entry:
+ * - If the dirty bit is set (because we're coming back from trapping),
+ * disable the traps, save host registers, restore guest registers.
+ * - If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
+ * set the dirty bit, disable the traps, save host registers,
+ * restore guest registers.
+ * - Otherwise, enable the traps
+ *
+ * On guest exit:
+ * - If the dirty bit is set, save guest registers, restore host
+ * registers and clear the dirty bit. This ensure that the host can
+ * now use the debug registers.
+ */
+static bool trap_debug_regs(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ } else {
+ *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
+ }
+
+ return true;
+}
+
+static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 amair;
+
+ asm volatile("mrs %0, amair_el1\n" : "=r" (amair));
+ vcpu_sys_reg(vcpu, AMAIR_EL1) = amair;
+}
+
+static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ /*
+ * Simply map the vcpu_id into the Aff0 field of the MPIDR.
+ */
+ vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
+}
+
+/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
+#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
+ /* DBGBVRn_EL1 */ \
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \
+ trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \
+ /* DBGBCRn_EL1 */ \
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \
+ trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \
+ /* DBGWVRn_EL1 */ \
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \
+ trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \
+ /* DBGWCRn_EL1 */ \
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
+ trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
+
+/*
+ * Architected system registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ *
+ * We could trap ID_DFR0 and tell the guest we don't support performance
+ * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
+ * NAKed, so it will read the PMCR anyway.
+ *
+ * Therefore we tell the guest we have 0 counters. Unfortunately, we
+ * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
+ * all PM registers, which doesn't crash the guest kernel at least.
+ *
+ * Debug handling: We do trap most, if not all debug related system
+ * registers. The implementation is good enough to ensure that a guest
+ * can use these with minimal performance degradation. The drawback is
+ * that we don't implement any of the external debug, none of the
+ * OSlock protocol. This should be revisited if we ever encounter a
+ * more demanding guest...
+ */
+static const struct sys_reg_desc sys_reg_descs[] = {
+ /* DC ISW */
+ { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010),
+ access_dcsw },
+ /* DC CSW */
+ { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010),
+ access_dcsw },
+ /* DC CISW */
+ { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
+ access_dcsw },
+
+ DBG_BCR_BVR_WCR_WVR_EL1(0),
+ DBG_BCR_BVR_WCR_WVR_EL1(1),
+ /* MDCCINT_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+ trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
+ /* MDSCR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+ trap_debug_regs, reset_val, MDSCR_EL1, 0 },
+ DBG_BCR_BVR_WCR_WVR_EL1(2),
+ DBG_BCR_BVR_WCR_WVR_EL1(3),
+ DBG_BCR_BVR_WCR_WVR_EL1(4),
+ DBG_BCR_BVR_WCR_WVR_EL1(5),
+ DBG_BCR_BVR_WCR_WVR_EL1(6),
+ DBG_BCR_BVR_WCR_WVR_EL1(7),
+ DBG_BCR_BVR_WCR_WVR_EL1(8),
+ DBG_BCR_BVR_WCR_WVR_EL1(9),
+ DBG_BCR_BVR_WCR_WVR_EL1(10),
+ DBG_BCR_BVR_WCR_WVR_EL1(11),
+ DBG_BCR_BVR_WCR_WVR_EL1(12),
+ DBG_BCR_BVR_WCR_WVR_EL1(13),
+ DBG_BCR_BVR_WCR_WVR_EL1(14),
+ DBG_BCR_BVR_WCR_WVR_EL1(15),
+
+ /* MDRAR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+ trap_raz_wi },
+ /* OSLAR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
+ trap_raz_wi },
+ /* OSLSR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
+ trap_oslsr_el1 },
+ /* OSDLR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
+ trap_raz_wi },
+ /* DBGPRCR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
+ trap_raz_wi },
+ /* DBGCLAIMSET_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
+ trap_raz_wi },
+ /* DBGCLAIMCLR_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
+ trap_raz_wi },
+ /* DBGAUTHSTATUS_EL1 */
+ { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
+ trap_dbgauthstatus_el1 },
+
+ /* TEECR32_EL1 */
+ { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+ NULL, reset_val, TEECR32_EL1, 0 },
+ /* TEEHBR32_EL1 */
+ { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
+ NULL, reset_val, TEEHBR32_EL1, 0 },
+
+ /* MDCCSR_EL1 */
+ { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
+ trap_raz_wi },
+ /* DBGDTR_EL0 */
+ { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
+ trap_raz_wi },
+ /* DBGDTR[TR]X_EL0 */
+ { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
+ trap_raz_wi },
+
+ /* DBGVCR32_EL2 */
+ { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
+ NULL, reset_val, DBGVCR32_EL2, 0 },
+
+ /* MPIDR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101),
+ NULL, reset_mpidr, MPIDR_EL1 },
+ /* SCTLR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
+ access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 },
+ /* CPACR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
+ NULL, reset_val, CPACR_EL1, 0 },
+ /* TTBR0_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
+ access_vm_reg, reset_unknown, TTBR0_EL1 },
+ /* TTBR1_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
+ access_vm_reg, reset_unknown, TTBR1_EL1 },
+ /* TCR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
+ access_vm_reg, reset_val, TCR_EL1, 0 },
+
+ /* AFSR0_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
+ access_vm_reg, reset_unknown, AFSR0_EL1 },
+ /* AFSR1_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
+ access_vm_reg, reset_unknown, AFSR1_EL1 },
+ /* ESR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
+ access_vm_reg, reset_unknown, ESR_EL1 },
+ /* FAR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
+ access_vm_reg, reset_unknown, FAR_EL1 },
+ /* PAR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
+ NULL, reset_unknown, PAR_EL1 },
+
+ /* PMINTENSET_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
+ trap_raz_wi },
+ /* PMINTENCLR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010),
+ trap_raz_wi },
+
+ /* MAIR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
+ access_vm_reg, reset_unknown, MAIR_EL1 },
+ /* AMAIR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
+ access_vm_reg, reset_amair_el1, AMAIR_EL1 },
+
+ /* VBAR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
+ NULL, reset_val, VBAR_EL1, 0 },
+ /* CONTEXTIDR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
+ access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
+ /* TPIDR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
+ NULL, reset_unknown, TPIDR_EL1 },
+
+ /* CNTKCTL_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000),
+ NULL, reset_val, CNTKCTL_EL1, 0},
+
+ /* CSSELR_EL1 */
+ { Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
+ NULL, reset_unknown, CSSELR_EL1 },
+
+ /* PMCR_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000),
+ trap_raz_wi },
+ /* PMCNTENSET_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001),
+ trap_raz_wi },
+ /* PMCNTENCLR_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010),
+ trap_raz_wi },
+ /* PMOVSCLR_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011),
+ trap_raz_wi },
+ /* PMSWINC_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100),
+ trap_raz_wi },
+ /* PMSELR_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101),
+ trap_raz_wi },
+ /* PMCEID0_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110),
+ trap_raz_wi },
+ /* PMCEID1_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111),
+ trap_raz_wi },
+ /* PMCCNTR_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000),
+ trap_raz_wi },
+ /* PMXEVTYPER_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001),
+ trap_raz_wi },
+ /* PMXEVCNTR_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010),
+ trap_raz_wi },
+ /* PMUSERENR_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000),
+ trap_raz_wi },
+ /* PMOVSSET_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011),
+ trap_raz_wi },
+
+ /* TPIDR_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010),
+ NULL, reset_unknown, TPIDR_EL0 },
+ /* TPIDRRO_EL0 */
+ { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011),
+ NULL, reset_unknown, TPIDRRO_EL0 },
+
+ /* DACR32_EL2 */
+ { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000),
+ NULL, reset_unknown, DACR32_EL2 },
+ /* IFSR32_EL2 */
+ { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001),
+ NULL, reset_unknown, IFSR32_EL2 },
+ /* FPEXC32_EL2 */
+ { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000),
+ NULL, reset_val, FPEXC32_EL2, 0x70 },
+};
+
+static bool trap_dbgidr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ return ignore_write(vcpu, p);
+ } else {
+ u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
+ u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
+ u32 el3 = !!((pfr >> 12) & 0xf);
+
+ *vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
+ (((dfr >> 12) & 0xf) << 24) |
+ (((dfr >> 28) & 0xf) << 20) |
+ (6 << 16) | (el3 << 14) | (el3 << 12));
+ return true;
+ }
+}
+
+static bool trap_debug32(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write) {
+ vcpu_cp14(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ } else {
+ *vcpu_reg(vcpu, p->Rt) = vcpu_cp14(vcpu, r->reg);
+ }
+
+ return true;
+}
+
+#define DBG_BCR_BVR_WCR_WVR(n) \
+ /* DBGBVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_debug32, \
+ NULL, (cp14_DBGBVR0 + (n) * 2) }, \
+ /* DBGBCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_debug32, \
+ NULL, (cp14_DBGBCR0 + (n) * 2) }, \
+ /* DBGWVRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_debug32, \
+ NULL, (cp14_DBGWVR0 + (n) * 2) }, \
+ /* DBGWCRn */ \
+ { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_debug32, \
+ NULL, (cp14_DBGWCR0 + (n) * 2) }
+
+#define DBGBXVR(n) \
+ { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_debug32, \
+ NULL, cp14_DBGBXVR0 + n * 2 }
+
+/*
+ * Trapped cp14 registers. We generally ignore most of the external
+ * debug, on the principle that they don't really make sense to a
+ * guest. Revisit this one day, whould this principle change.
+ */
+static const struct sys_reg_desc cp14_regs[] = {
+ /* DBGIDR */
+ { Op1( 0), CRn( 0), CRm( 0), Op2( 0), trap_dbgidr },
+ /* DBGDTRRXext */
+ { Op1( 0), CRn( 0), CRm( 0), Op2( 2), trap_raz_wi },
+
+ DBG_BCR_BVR_WCR_WVR(0),
+ /* DBGDSCRint */
+ { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi },
+ DBG_BCR_BVR_WCR_WVR(1),
+ /* DBGDCCINT */
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 },
+ /* DBGDSCRext */
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 },
+ DBG_BCR_BVR_WCR_WVR(2),
+ /* DBGDTR[RT]Xint */
+ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi },
+ /* DBGDTR[RT]Xext */
+ { Op1( 0), CRn( 0), CRm( 3), Op2( 2), trap_raz_wi },
+ DBG_BCR_BVR_WCR_WVR(3),
+ DBG_BCR_BVR_WCR_WVR(4),
+ DBG_BCR_BVR_WCR_WVR(5),
+ /* DBGWFAR */
+ { Op1( 0), CRn( 0), CRm( 6), Op2( 0), trap_raz_wi },
+ /* DBGOSECCR */
+ { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi },
+ DBG_BCR_BVR_WCR_WVR(6),
+ /* DBGVCR */
+ { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 },
+ DBG_BCR_BVR_WCR_WVR(7),
+ DBG_BCR_BVR_WCR_WVR(8),
+ DBG_BCR_BVR_WCR_WVR(9),
+ DBG_BCR_BVR_WCR_WVR(10),
+ DBG_BCR_BVR_WCR_WVR(11),
+ DBG_BCR_BVR_WCR_WVR(12),
+ DBG_BCR_BVR_WCR_WVR(13),
+ DBG_BCR_BVR_WCR_WVR(14),
+ DBG_BCR_BVR_WCR_WVR(15),
+
+ /* DBGDRAR (32bit) */
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 0), trap_raz_wi },
+
+ DBGBXVR(0),
+ /* DBGOSLAR */
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
+ DBGBXVR(1),
+ /* DBGOSLSR */
+ { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
+ DBGBXVR(2),
+ DBGBXVR(3),
+ /* DBGOSDLR */
+ { Op1( 0), CRn( 1), CRm( 3), Op2( 4), trap_raz_wi },
+ DBGBXVR(4),
+ /* DBGPRCR */
+ { Op1( 0), CRn( 1), CRm( 4), Op2( 4), trap_raz_wi },
+ DBGBXVR(5),
+ DBGBXVR(6),
+ DBGBXVR(7),
+ DBGBXVR(8),
+ DBGBXVR(9),
+ DBGBXVR(10),
+ DBGBXVR(11),
+ DBGBXVR(12),
+ DBGBXVR(13),
+ DBGBXVR(14),
+ DBGBXVR(15),
+
+ /* DBGDSAR (32bit) */
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 0), trap_raz_wi },
+
+ /* DBGDEVID2 */
+ { Op1( 0), CRn( 7), CRm( 0), Op2( 7), trap_raz_wi },
+ /* DBGDEVID1 */
+ { Op1( 0), CRn( 7), CRm( 1), Op2( 7), trap_raz_wi },
+ /* DBGDEVID */
+ { Op1( 0), CRn( 7), CRm( 2), Op2( 7), trap_raz_wi },
+ /* DBGCLAIMSET */
+ { Op1( 0), CRn( 7), CRm( 8), Op2( 6), trap_raz_wi },
+ /* DBGCLAIMCLR */
+ { Op1( 0), CRn( 7), CRm( 9), Op2( 6), trap_raz_wi },
+ /* DBGAUTHSTATUS */
+ { Op1( 0), CRn( 7), CRm(14), Op2( 6), trap_dbgauthstatus_el1 },
+};
+
+/* Trapped cp14 64bit registers */
+static const struct sys_reg_desc cp14_64_regs[] = {
+ /* DBGDRAR (64bit) */
+ { Op1( 0), CRm( 1), .access = trap_raz_wi },
+
+ /* DBGDSAR (64bit) */
+ { Op1( 0), CRm( 2), .access = trap_raz_wi },
+};
+
+/*
+ * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
+ * depending on the way they are accessed (as a 32bit or a 64bit
+ * register).
+ */
+static const struct sys_reg_desc cp15_regs[] = {
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
+ { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
+ { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
+ { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
+ { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
+ { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
+ { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
+ { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
+ { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
+
+ /*
+ * DC{C,I,CI}SW operations:
+ */
+ { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
+ { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
+ { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
+
+ /* PMU */
+ { Op1( 0), CRn( 9), CRm(12), Op2( 0), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 1), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 2), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 3), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 5), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 6), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(12), Op2( 7), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 0), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 1), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(13), Op2( 2), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 0), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 1), trap_raz_wi },
+ { Op1( 0), CRn( 9), CRm(14), Op2( 2), trap_raz_wi },
+
+ { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
+ { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
+ { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
+ { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
+ { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
+};
+
+static const struct sys_reg_desc cp15_64_regs[] = {
+ { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
+ { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+};
+
+/* Target specific emulation tables */
+static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS];
+
+void kvm_register_target_sys_reg_table(unsigned int target,
+ struct kvm_sys_reg_target_table *table)
+{
+ target_tables[target] = table;
+}
+
+/* Get specific register table for this target. */
+static const struct sys_reg_desc *get_target_table(unsigned target,
+ bool mode_is_64,
+ size_t *num)
+{
+ struct kvm_sys_reg_target_table *table;
+
+ table = target_tables[target];
+ if (mode_is_64) {
+ *num = table->table64.num;
+ return table->table64.table;
+ } else {
+ *num = table->table32.num;
+ return table->table32.table;
+ }
+}
+
+static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params,
+ const struct sys_reg_desc table[],
+ unsigned int num)
+{
+ unsigned int i;
+
+ for (i = 0; i < num; i++) {
+ const struct sys_reg_desc *r = &table[i];
+
+ if (params->Op0 != r->Op0)
+ continue;
+ if (params->Op1 != r->Op1)
+ continue;
+ if (params->CRn != r->CRn)
+ continue;
+ if (params->CRm != r->CRm)
+ continue;
+ if (params->Op2 != r->Op2)
+ continue;
+
+ return r;
+ }
+ return NULL;
+}
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
+/*
+ * emulate_cp -- tries to match a sys_reg access in a handling table, and
+ * call the corresponding trap handler.
+ *
+ * @params: pointer to the descriptor of the access
+ * @table: array of trap descriptors
+ * @num: size of the trap descriptor array
+ *
+ * Return 0 if the access has been handled, and -1 if not.
+ */
+static int emulate_cp(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *params,
+ const struct sys_reg_desc *table,
+ size_t num)
+{
+ const struct sys_reg_desc *r;
+
+ if (!table)
+ return -1; /* Not handled */
+
+ r = find_reg(params, table, num);
+
+ if (r) {
+ /*
+ * Not having an accessor means that we have
+ * configured a trap that we don't know how to
+ * handle. This certainly qualifies as a gross bug
+ * that should be fixed right away.
+ */
+ BUG_ON(!r->access);
+
+ if (likely(r->access(vcpu, params, r))) {
+ /* Skip instruction, since it was emulated */
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ }
+
+ /* Handled */
+ return 0;
+ }
+
+ /* Not handled */
+ return -1;
+}
+
+static void unhandled_cp_access(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *params)
+{
+ u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+ int cp;
+
+ switch(hsr_ec) {
+ case ESR_EL2_EC_CP15_32:
+ case ESR_EL2_EC_CP15_64:
+ cp = 15;
+ break;
+ case ESR_EL2_EC_CP14_MR:
+ case ESR_EL2_EC_CP14_64:
+ cp = 14;
+ break;
+ default:
+ WARN_ON((cp = -1));
+ }
+
+ kvm_err("Unsupported guest CP%d access at: %08lx\n",
+ cp, *vcpu_pc(vcpu));
+ print_sys_reg_instr(params);
+ kvm_inject_undefined(vcpu);
+}
+
+/**
+ * kvm_handle_cp_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run struct
+ */
+static int kvm_handle_cp_64(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *global,
+ size_t nr_global,
+ const struct sys_reg_desc *target_specific,
+ size_t nr_specific)
+{
+ struct sys_reg_params params;
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+ int Rt2 = (hsr >> 10) & 0xf;
+
+ params.is_aarch32 = true;
+ params.is_32bit = false;
+ params.CRm = (hsr >> 1) & 0xf;
+ params.Rt = (hsr >> 5) & 0xf;
+ params.is_write = ((hsr & 1) == 0);
+
+ params.Op0 = 0;
+ params.Op1 = (hsr >> 16) & 0xf;
+ params.Op2 = 0;
+ params.CRn = 0;
+
+ /*
+ * Massive hack here. Store Rt2 in the top 32bits so we only
+ * have one register to deal with. As we use the same trap
+ * backends between AArch32 and AArch64, we get away with it.
+ */
+ if (params.is_write) {
+ u64 val = *vcpu_reg(vcpu, params.Rt);
+ val &= 0xffffffff;
+ val |= *vcpu_reg(vcpu, Rt2) << 32;
+ *vcpu_reg(vcpu, params.Rt) = val;
+ }
+
+ if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+ goto out;
+ if (!emulate_cp(vcpu, &params, global, nr_global))
+ goto out;
+
+ unhandled_cp_access(vcpu, &params);
+
+out:
+ /* Do the opposite hack for the read side */
+ if (!params.is_write) {
+ u64 val = *vcpu_reg(vcpu, params.Rt);
+ val >>= 32;
+ *vcpu_reg(vcpu, Rt2) = val;
+ }
+
+ return 1;
+}
+
+/**
+ * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run struct
+ */
+static int kvm_handle_cp_32(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *global,
+ size_t nr_global,
+ const struct sys_reg_desc *target_specific,
+ size_t nr_specific)
+{
+ struct sys_reg_params params;
+ u32 hsr = kvm_vcpu_get_hsr(vcpu);
+
+ params.is_aarch32 = true;
+ params.is_32bit = true;
+ params.CRm = (hsr >> 1) & 0xf;
+ params.Rt = (hsr >> 5) & 0xf;
+ params.is_write = ((hsr & 1) == 0);
+ params.CRn = (hsr >> 10) & 0xf;
+ params.Op0 = 0;
+ params.Op1 = (hsr >> 14) & 0x7;
+ params.Op2 = (hsr >> 17) & 0x7;
+
+ if (!emulate_cp(vcpu, &params, target_specific, nr_specific))
+ return 1;
+ if (!emulate_cp(vcpu, &params, global, nr_global))
+ return 1;
+
+ unhandled_cp_access(vcpu, &params);
+ return 1;
+}
+
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ const struct sys_reg_desc *target_specific;
+ size_t num;
+
+ target_specific = get_target_table(vcpu->arch.target, false, &num);
+ return kvm_handle_cp_64(vcpu,
+ cp15_64_regs, ARRAY_SIZE(cp15_64_regs),
+ target_specific, num);
+}
+
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ const struct sys_reg_desc *target_specific;
+ size_t num;
+
+ target_specific = get_target_table(vcpu->arch.target, false, &num);
+ return kvm_handle_cp_32(vcpu,
+ cp15_regs, ARRAY_SIZE(cp15_regs),
+ target_specific, num);
+}
+
+int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ return kvm_handle_cp_64(vcpu,
+ cp14_64_regs, ARRAY_SIZE(cp14_64_regs),
+ NULL, 0);
+}
+
+int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ return kvm_handle_cp_32(vcpu,
+ cp14_regs, ARRAY_SIZE(cp14_regs),
+ NULL, 0);
+}
+
+static int emulate_sys_reg(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *params)
+{
+ size_t num;
+ const struct sys_reg_desc *table, *r;
+
+ table = get_target_table(vcpu->arch.target, true, &num);
+
+ /* Search target-specific then generic table. */
+ r = find_reg(params, table, num);
+ if (!r)
+ r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+ if (likely(r)) {
+ /*
+ * Not having an accessor means that we have
+ * configured a trap that we don't know how to
+ * handle. This certainly qualifies as a gross bug
+ * that should be fixed right away.
+ */
+ BUG_ON(!r->access);
+
+ if (likely(r->access(vcpu, params, r))) {
+ /* Skip instruction, since it was emulated */
+ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
+ return 1;
+ }
+ /* If access function fails, it should complain. */
+ } else {
+ kvm_err("Unsupported guest sys_reg access at: %lx\n",
+ *vcpu_pc(vcpu));
+ print_sys_reg_instr(params);
+ }
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
+static void reset_sys_reg_descs(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *table, size_t num)
+{
+ unsigned long i;
+
+ for (i = 0; i < num; i++)
+ if (table[i].reset)
+ table[i].reset(vcpu, &table[i]);
+}
+
+/**
+ * kvm_handle_sys_reg -- handles a mrs/msr trap on a guest sys_reg access
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run struct
+ */
+int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ struct sys_reg_params params;
+ unsigned long esr = kvm_vcpu_get_hsr(vcpu);
+
+ params.is_aarch32 = false;
+ params.is_32bit = false;
+ params.Op0 = (esr >> 20) & 3;
+ params.Op1 = (esr >> 14) & 0x7;
+ params.CRn = (esr >> 10) & 0xf;
+ params.CRm = (esr >> 1) & 0xf;
+ params.Op2 = (esr >> 17) & 0x7;
+ params.Rt = (esr >> 5) & 0x1f;
+ params.is_write = !(esr & 1);
+
+ return emulate_sys_reg(vcpu, &params);
+}
+
+/******************************************************************************
+ * Userspace API
+ *****************************************************************************/
+
+static bool index_to_params(u64 id, struct sys_reg_params *params)
+{
+ switch (id & KVM_REG_SIZE_MASK) {
+ case KVM_REG_SIZE_U64:
+ /* Any unused index bits means it's not valid. */
+ if (id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK
+ | KVM_REG_ARM_COPROC_MASK
+ | KVM_REG_ARM64_SYSREG_OP0_MASK
+ | KVM_REG_ARM64_SYSREG_OP1_MASK
+ | KVM_REG_ARM64_SYSREG_CRN_MASK
+ | KVM_REG_ARM64_SYSREG_CRM_MASK
+ | KVM_REG_ARM64_SYSREG_OP2_MASK))
+ return false;
+ params->Op0 = ((id & KVM_REG_ARM64_SYSREG_OP0_MASK)
+ >> KVM_REG_ARM64_SYSREG_OP0_SHIFT);
+ params->Op1 = ((id & KVM_REG_ARM64_SYSREG_OP1_MASK)
+ >> KVM_REG_ARM64_SYSREG_OP1_SHIFT);
+ params->CRn = ((id & KVM_REG_ARM64_SYSREG_CRN_MASK)
+ >> KVM_REG_ARM64_SYSREG_CRN_SHIFT);
+ params->CRm = ((id & KVM_REG_ARM64_SYSREG_CRM_MASK)
+ >> KVM_REG_ARM64_SYSREG_CRM_SHIFT);
+ params->Op2 = ((id & KVM_REG_ARM64_SYSREG_OP2_MASK)
+ >> KVM_REG_ARM64_SYSREG_OP2_SHIFT);
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* Decode an index value, and find the sys_reg_desc entry. */
+static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
+ u64 id)
+{
+ size_t num;
+ const struct sys_reg_desc *table, *r;
+ struct sys_reg_params params;
+
+ /* We only do sys_reg for now. */
+ if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
+ return NULL;
+
+ if (!index_to_params(id, &params))
+ return NULL;
+
+ table = get_target_table(vcpu->arch.target, true, &num);
+ r = find_reg(&params, table, num);
+ if (!r)
+ r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+ /* Not saved in the sys_reg array? */
+ if (r && !r->reg)
+ r = NULL;
+
+ return r;
+}
+
+/*
+ * These are the invariant sys_reg registers: we let the guest see the
+ * host versions of these, so they're part of the guest state.
+ *
+ * A future CPU may provide a mechanism to present different values to
+ * the guest, or a future kvm may trap them.
+ */
+
+#define FUNCTION_INVARIANT(reg) \
+ static void get_##reg(struct kvm_vcpu *v, \
+ const struct sys_reg_desc *r) \
+ { \
+ u64 val; \
+ \
+ asm volatile("mrs %0, " __stringify(reg) "\n" \
+ : "=r" (val)); \
+ ((struct sys_reg_desc *)r)->val = val; \
+ }
+
+FUNCTION_INVARIANT(midr_el1)
+FUNCTION_INVARIANT(ctr_el0)
+FUNCTION_INVARIANT(revidr_el1)
+FUNCTION_INVARIANT(id_pfr0_el1)
+FUNCTION_INVARIANT(id_pfr1_el1)
+FUNCTION_INVARIANT(id_dfr0_el1)
+FUNCTION_INVARIANT(id_afr0_el1)
+FUNCTION_INVARIANT(id_mmfr0_el1)
+FUNCTION_INVARIANT(id_mmfr1_el1)
+FUNCTION_INVARIANT(id_mmfr2_el1)
+FUNCTION_INVARIANT(id_mmfr3_el1)
+FUNCTION_INVARIANT(id_isar0_el1)
+FUNCTION_INVARIANT(id_isar1_el1)
+FUNCTION_INVARIANT(id_isar2_el1)
+FUNCTION_INVARIANT(id_isar3_el1)
+FUNCTION_INVARIANT(id_isar4_el1)
+FUNCTION_INVARIANT(id_isar5_el1)
+FUNCTION_INVARIANT(clidr_el1)
+FUNCTION_INVARIANT(aidr_el1)
+
+/* ->val is filled in by kvm_sys_reg_table_init() */
+static struct sys_reg_desc invariant_sys_regs[] = {
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000),
+ NULL, get_midr_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110),
+ NULL, get_revidr_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000),
+ NULL, get_id_pfr0_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001),
+ NULL, get_id_pfr1_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010),
+ NULL, get_id_dfr0_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011),
+ NULL, get_id_afr0_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100),
+ NULL, get_id_mmfr0_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101),
+ NULL, get_id_mmfr1_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110),
+ NULL, get_id_mmfr2_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111),
+ NULL, get_id_mmfr3_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
+ NULL, get_id_isar0_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001),
+ NULL, get_id_isar1_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
+ NULL, get_id_isar2_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011),
+ NULL, get_id_isar3_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100),
+ NULL, get_id_isar4_el1 },
+ { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101),
+ NULL, get_id_isar5_el1 },
+ { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001),
+ NULL, get_clidr_el1 },
+ { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111),
+ NULL, get_aidr_el1 },
+ { Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001),
+ NULL, get_ctr_el0 },
+};
+
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id)
+{
+ if (copy_from_user(val, uaddr, KVM_REG_SIZE(id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id)
+{
+ if (copy_to_user(uaddr, val, KVM_REG_SIZE(id)) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int get_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+ struct sys_reg_params params;
+ const struct sys_reg_desc *r;
+
+ if (!index_to_params(id, &params))
+ return -ENOENT;
+
+ r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+ if (!r)
+ return -ENOENT;
+
+ return reg_to_user(uaddr, &r->val, id);
+}
+
+static int set_invariant_sys_reg(u64 id, void __user *uaddr)
+{
+ struct sys_reg_params params;
+ const struct sys_reg_desc *r;
+ int err;
+ u64 val = 0; /* Make sure high bits are 0 for 32-bit regs */
+
+ if (!index_to_params(id, &params))
+ return -ENOENT;
+ r = find_reg(&params, invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs));
+ if (!r)
+ return -ENOENT;
+
+ err = reg_from_user(&val, uaddr, id);
+ if (err)
+ return err;
+
+ /* This is what we mean by invariant: you can't change it. */
+ if (r->val != val)
+ return -EINVAL;
+
+ return 0;
+}
+
+static bool is_valid_cache(u32 val)
+{
+ u32 level, ctype;
+
+ if (val >= CSSELR_MAX)
+ return false;
+
+ /* Bottom bit is Instruction or Data bit. Next 3 bits are level. */
+ level = (val >> 1);
+ ctype = (cache_levels >> (level * 3)) & 7;
+
+ switch (ctype) {
+ case 0: /* No cache */
+ return false;
+ case 1: /* Instruction cache only */
+ return (val & 1);
+ case 2: /* Data cache only */
+ case 4: /* Unified cache */
+ return !(val & 1);
+ case 3: /* Separate instruction and data caches */
+ return true;
+ default: /* Reserved: we can't know instruction or data. */
+ return false;
+ }
+}
+
+static int demux_c15_get(u64 id, void __user *uaddr)
+{
+ u32 val;
+ u32 __user *uval = uaddr;
+
+ /* Fail if we have unknown bits set. */
+ if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+ | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+ return -ENOENT;
+
+ switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+ case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+ if (KVM_REG_SIZE(id) != 4)
+ return -ENOENT;
+ val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+ >> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+ if (!is_valid_cache(val))
+ return -ENOENT;
+
+ return put_user(get_ccsidr(val), uval);
+ default:
+ return -ENOENT;
+ }
+}
+
+static int demux_c15_set(u64 id, void __user *uaddr)
+{
+ u32 val, newval;
+ u32 __user *uval = uaddr;
+
+ /* Fail if we have unknown bits set. */
+ if (id & ~(KVM_REG_ARCH_MASK|KVM_REG_SIZE_MASK|KVM_REG_ARM_COPROC_MASK
+ | ((1 << KVM_REG_ARM_COPROC_SHIFT)-1)))
+ return -ENOENT;
+
+ switch (id & KVM_REG_ARM_DEMUX_ID_MASK) {
+ case KVM_REG_ARM_DEMUX_ID_CCSIDR:
+ if (KVM_REG_SIZE(id) != 4)
+ return -ENOENT;
+ val = (id & KVM_REG_ARM_DEMUX_VAL_MASK)
+ >> KVM_REG_ARM_DEMUX_VAL_SHIFT;
+ if (!is_valid_cache(val))
+ return -ENOENT;
+
+ if (get_user(newval, uval))
+ return -EFAULT;
+
+ /* This is also invariant: you can't change it. */
+ if (newval != get_ccsidr(val))
+ return -EINVAL;
+ return 0;
+ default:
+ return -ENOENT;
+ }
+}
+
+int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ const struct sys_reg_desc *r;
+ void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+ return demux_c15_get(reg->id, uaddr);
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+ return -ENOENT;
+
+ r = index_to_sys_reg_desc(vcpu, reg->id);
+ if (!r)
+ return get_invariant_sys_reg(reg->id, uaddr);
+
+ return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
+}
+
+int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ const struct sys_reg_desc *r;
+ void __user *uaddr = (void __user *)(unsigned long)reg->addr;
+
+ if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+ return demux_c15_set(reg->id, uaddr);
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(__u64))
+ return -ENOENT;
+
+ r = index_to_sys_reg_desc(vcpu, reg->id);
+ if (!r)
+ return set_invariant_sys_reg(reg->id, uaddr);
+
+ return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+}
+
+static unsigned int num_demux_regs(void)
+{
+ unsigned int i, count = 0;
+
+ for (i = 0; i < CSSELR_MAX; i++)
+ if (is_valid_cache(i))
+ count++;
+
+ return count;
+}
+
+static int write_demux_regids(u64 __user *uindices)
+{
+ u64 val = KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX;
+ unsigned int i;
+
+ val |= KVM_REG_ARM_DEMUX_ID_CCSIDR;
+ for (i = 0; i < CSSELR_MAX; i++) {
+ if (!is_valid_cache(i))
+ continue;
+ if (put_user(val | i, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ return 0;
+}
+
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg)
+{
+ return (KVM_REG_ARM64 | KVM_REG_SIZE_U64 |
+ KVM_REG_ARM64_SYSREG |
+ (reg->Op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) |
+ (reg->Op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) |
+ (reg->CRn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) |
+ (reg->CRm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) |
+ (reg->Op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT));
+}
+
+static bool copy_reg_to_user(const struct sys_reg_desc *reg, u64 __user **uind)
+{
+ if (!*uind)
+ return true;
+
+ if (put_user(sys_reg_to_index(reg), *uind))
+ return false;
+
+ (*uind)++;
+ return true;
+}
+
+/* Assumed ordered tables, see kvm_sys_reg_table_init. */
+static int walk_sys_regs(struct kvm_vcpu *vcpu, u64 __user *uind)
+{
+ const struct sys_reg_desc *i1, *i2, *end1, *end2;
+ unsigned int total = 0;
+ size_t num;
+
+ /* We check for duplicates here, to allow arch-specific overrides. */
+ i1 = get_target_table(vcpu->arch.target, true, &num);
+ end1 = i1 + num;
+ i2 = sys_reg_descs;
+ end2 = sys_reg_descs + ARRAY_SIZE(sys_reg_descs);
+
+ BUG_ON(i1 == end1 || i2 == end2);
+
+ /* Walk carefully, as both tables may refer to the same register. */
+ while (i1 || i2) {
+ int cmp = cmp_sys_reg(i1, i2);
+ /* target-specific overrides generic entry. */
+ if (cmp <= 0) {
+ /* Ignore registers we trap but don't save. */
+ if (i1->reg) {
+ if (!copy_reg_to_user(i1, &uind))
+ return -EFAULT;
+ total++;
+ }
+ } else {
+ /* Ignore registers we trap but don't save. */
+ if (i2->reg) {
+ if (!copy_reg_to_user(i2, &uind))
+ return -EFAULT;
+ total++;
+ }
+ }
+
+ if (cmp <= 0 && ++i1 == end1)
+ i1 = NULL;
+ if (cmp >= 0 && ++i2 == end2)
+ i2 = NULL;
+ }
+ return total;
+}
+
+unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu)
+{
+ return ARRAY_SIZE(invariant_sys_regs)
+ + num_demux_regs()
+ + walk_sys_regs(vcpu, (u64 __user *)NULL);
+}
+
+int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ unsigned int i;
+ int err;
+
+ /* Then give them all the invariant registers' indices. */
+ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) {
+ if (put_user(sys_reg_to_index(&invariant_sys_regs[i]), uindices))
+ return -EFAULT;
+ uindices++;
+ }
+
+ err = walk_sys_regs(vcpu, uindices);
+ if (err < 0)
+ return err;
+ uindices += err;
+
+ return write_demux_regids(uindices);
+}
+
+static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 1; i < n; i++) {
+ if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
+ kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+void kvm_sys_reg_table_init(void)
+{
+ unsigned int i;
+ struct sys_reg_desc clidr;
+
+ /* Make sure tables are unique and in order. */
+ BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs)));
+ BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs)));
+ BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs)));
+ BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs)));
+ BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs)));
+ BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs)));
+
+ /* We abuse the reset function to overwrite the table itself. */
+ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++)
+ invariant_sys_regs[i].reset(NULL, &invariant_sys_regs[i]);
+
+ /*
+ * CLIDR format is awkward, so clean it up. See ARM B4.1.20:
+ *
+ * If software reads the Cache Type fields from Ctype1
+ * upwards, once it has seen a value of 0b000, no caches
+ * exist at further-out levels of the hierarchy. So, for
+ * example, if Ctype3 is the first Cache Type field with a
+ * value of 0b000, the values of Ctype4 to Ctype7 must be
+ * ignored.
+ */
+ get_clidr_el1(NULL, &clidr); /* Ugly... */
+ cache_levels = clidr.val;
+ for (i = 0; i < 7; i++)
+ if (((cache_levels >> (i*3)) & 7) == 0)
+ break;
+ /* Clear all higher bits. */
+ cache_levels &= (1 << (i*3))-1;
+}
+
+/**
+ * kvm_reset_sys_regs - sets system registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architecturally defined reset values.
+ */
+void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
+{
+ size_t num;
+ const struct sys_reg_desc *table;
+
+ /* Catch someone adding a register without putting in reset entry. */
+ memset(&vcpu->arch.ctxt.sys_regs, 0x42, sizeof(vcpu->arch.ctxt.sys_regs));
+
+ /* Generic chip reset first (so target could override). */
+ reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+
+ table = get_target_table(vcpu->arch.target, true, &num);
+ reset_sys_reg_descs(vcpu, table, num);
+
+ for (num = 1; num < NR_SYS_REGS; num++)
+ if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
+ panic("Didn't reset vcpu_sys_reg(%zi)", num);
+}
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
new file mode 100644
index 000000000000..d411e251412c
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/kvm/coproc.h
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_SYS_REGS_LOCAL_H__
+#define __ARM64_KVM_SYS_REGS_LOCAL_H__
+
+struct sys_reg_params {
+ u8 Op0;
+ u8 Op1;
+ u8 CRn;
+ u8 CRm;
+ u8 Op2;
+ u8 Rt;
+ bool is_write;
+ bool is_aarch32;
+ bool is_32bit; /* Only valid if is_aarch32 is true */
+};
+
+struct sys_reg_desc {
+ /* MRS/MSR instruction which accesses it. */
+ u8 Op0;
+ u8 Op1;
+ u8 CRn;
+ u8 CRm;
+ u8 Op2;
+
+ /* Trapped access from guest, if non-NULL. */
+ bool (*access)(struct kvm_vcpu *,
+ const struct sys_reg_params *,
+ const struct sys_reg_desc *);
+
+ /* Initialization for vcpu. */
+ void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc *);
+
+ /* Index into sys_reg[], or 0 if we don't need to save it. */
+ int reg;
+
+ /* Value (usually reset value) */
+ u64 val;
+};
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+ /* Look, we even formatted it for you to paste into the table! */
+ kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+ p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+}
+
+static inline bool ignore_write(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p)
+{
+ return true;
+}
+
+static inline bool read_zero(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p)
+{
+ *vcpu_reg(vcpu, p->Rt) = 0;
+ return true;
+}
+
+static inline bool write_to_read_only(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *params)
+{
+ kvm_debug("sys_reg write to read-only register at: %lx\n",
+ *vcpu_pc(vcpu));
+ print_sys_reg_instr(params);
+ return false;
+}
+
+static inline bool read_from_write_only(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *params)
+{
+ kvm_debug("sys_reg read to write-only register at: %lx\n",
+ *vcpu_pc(vcpu));
+ print_sys_reg_instr(params);
+ return false;
+}
+
+/* Reset functions */
+static inline void reset_unknown(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *r)
+{
+ BUG_ON(!r->reg);
+ BUG_ON(r->reg >= NR_SYS_REGS);
+ vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+}
+
+static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ BUG_ON(!r->reg);
+ BUG_ON(r->reg >= NR_SYS_REGS);
+ vcpu_sys_reg(vcpu, r->reg) = r->val;
+}
+
+static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
+ const struct sys_reg_desc *i2)
+{
+ BUG_ON(i1 == i2);
+ if (!i1)
+ return 1;
+ else if (!i2)
+ return -1;
+ if (i1->Op0 != i2->Op0)
+ return i1->Op0 - i2->Op0;
+ if (i1->Op1 != i2->Op1)
+ return i1->Op1 - i2->Op1;
+ if (i1->CRn != i2->CRn)
+ return i1->CRn - i2->CRn;
+ if (i1->CRm != i2->CRm)
+ return i1->CRm - i2->CRm;
+ return i1->Op2 - i2->Op2;
+}
+
+
+#define Op0(_x) .Op0 = _x
+#define Op1(_x) .Op1 = _x
+#define CRn(_x) .CRn = _x
+#define CRm(_x) .CRm = _x
+#define Op2(_x) .Op2 = _x
+
+#endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
new file mode 100644
index 000000000000..475fd2929310
--- /dev/null
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Based on arch/arm/kvm/coproc_a15.c:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Authors: Rusty Russell <rusty@rustcorp.au>
+ * Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kvm_host.h>
+#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_coproc.h>
+#include <linux/init.h>
+
+#include "sys_regs.h"
+
+static bool access_actlr(struct kvm_vcpu *vcpu,
+ const struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ if (p->is_write)
+ return ignore_write(vcpu, p);
+
+ *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, ACTLR_EL1);
+ return true;
+}
+
+static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
+{
+ u64 actlr;
+
+ asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr));
+ vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr;
+}
+
+/*
+ * Implementation specific sys-reg registers.
+ * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
+ */
+static const struct sys_reg_desc genericv8_sys_regs[] = {
+ /* ACTLR_EL1 */
+ { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+ access_actlr, reset_actlr, ACTLR_EL1 },
+};
+
+static const struct sys_reg_desc genericv8_cp15_regs[] = {
+ /* ACTLR */
+ { Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001),
+ access_actlr },
+};
+
+static struct kvm_sys_reg_target_table genericv8_target_table = {
+ .table64 = {
+ .table = genericv8_sys_regs,
+ .num = ARRAY_SIZE(genericv8_sys_regs),
+ },
+ .table32 = {
+ .table = genericv8_cp15_regs,
+ .num = ARRAY_SIZE(genericv8_cp15_regs),
+ },
+};
+
+static int __init sys_reg_genericv8_init(void)
+{
+ unsigned int i;
+
+ for (i = 1; i < ARRAY_SIZE(genericv8_sys_regs); i++)
+ BUG_ON(cmp_sys_reg(&genericv8_sys_regs[i-1],
+ &genericv8_sys_regs[i]) >= 0);
+
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_AEM_V8,
+ &genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_FOUNDATION_V8,
+ &genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A53,
+ &genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
+ &genericv8_target_table);
+ kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
+ &genericv8_target_table);
+
+ return 0;
+}
+late_initcall(sys_reg_genericv8_init);
diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
new file mode 100644
index 000000000000..ae211772f991
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v2-switch.S
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+ .text
+ .pushsection .hyp.text, "ax"
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+ENTRY(__save_vgic_v2_state)
+__save_vgic_v2_state:
+ /* Get VGIC VCTRL base into x2 */
+ ldr x2, [x0, #VCPU_KVM]
+ kern_hyp_va x2
+ ldr x2, [x2, #KVM_VGIC_VCTRL]
+ kern_hyp_va x2
+ cbz x2, 2f // disabled
+
+ /* Compute the address of struct vgic_cpu */
+ add x3, x0, #VCPU_VGIC_CPU
+
+ /* Save all interesting registers */
+ ldr w4, [x2, #GICH_HCR]
+ ldr w5, [x2, #GICH_VMCR]
+ ldr w6, [x2, #GICH_MISR]
+ ldr w7, [x2, #GICH_EISR0]
+ ldr w8, [x2, #GICH_EISR1]
+ ldr w9, [x2, #GICH_ELRSR0]
+ ldr w10, [x2, #GICH_ELRSR1]
+ ldr w11, [x2, #GICH_APR]
+CPU_BE( rev w4, w4 )
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+CPU_BE( rev w7, w7 )
+CPU_BE( rev w8, w8 )
+CPU_BE( rev w9, w9 )
+CPU_BE( rev w10, w10 )
+CPU_BE( rev w11, w11 )
+
+ str w4, [x3, #VGIC_V2_CPU_HCR]
+ str w5, [x3, #VGIC_V2_CPU_VMCR]
+ str w6, [x3, #VGIC_V2_CPU_MISR]
+ str w7, [x3, #VGIC_V2_CPU_EISR]
+ str w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
+ str w9, [x3, #VGIC_V2_CPU_ELRSR]
+ str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
+ str w11, [x3, #VGIC_V2_CPU_APR]
+
+ /* Clear GICH_HCR */
+ str wzr, [x2, #GICH_HCR]
+
+ /* Save list registers */
+ add x2, x2, #GICH_LR0
+ ldr w4, [x3, #VGIC_CPU_NR_LR]
+ add x3, x3, #VGIC_V2_CPU_LR
+1: ldr w5, [x2], #4
+CPU_BE( rev w5, w5 )
+ str w5, [x3], #4
+ sub w4, w4, #1
+ cbnz w4, 1b
+2:
+ ret
+ENDPROC(__save_vgic_v2_state)
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+ENTRY(__restore_vgic_v2_state)
+__restore_vgic_v2_state:
+ /* Get VGIC VCTRL base into x2 */
+ ldr x2, [x0, #VCPU_KVM]
+ kern_hyp_va x2
+ ldr x2, [x2, #KVM_VGIC_VCTRL]
+ kern_hyp_va x2
+ cbz x2, 2f // disabled
+
+ /* Compute the address of struct vgic_cpu */
+ add x3, x0, #VCPU_VGIC_CPU
+
+ /* We only restore a minimal set of registers */
+ ldr w4, [x3, #VGIC_V2_CPU_HCR]
+ ldr w5, [x3, #VGIC_V2_CPU_VMCR]
+ ldr w6, [x3, #VGIC_V2_CPU_APR]
+CPU_BE( rev w4, w4 )
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+
+ str w4, [x2, #GICH_HCR]
+ str w5, [x2, #GICH_VMCR]
+ str w6, [x2, #GICH_APR]
+
+ /* Restore list registers */
+ add x2, x2, #GICH_LR0
+ ldr w4, [x3, #VGIC_CPU_NR_LR]
+ add x3, x3, #VGIC_V2_CPU_LR
+1: ldr w5, [x3], #4
+CPU_BE( rev w5, w5 )
+ str w5, [x2], #4
+ sub w4, w4, #1
+ cbnz w4, 1b
+2:
+ ret
+ENDPROC(__restore_vgic_v2_state)
+
+ .popsection
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
new file mode 100644
index 000000000000..d16046999e06
--- /dev/null
+++ b/arch/arm64/kvm/vgic-v3-switch.S
@@ -0,0 +1,267 @@
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+ .text
+ .pushsection .hyp.text, "ax"
+
+/*
+ * We store LRs in reverse order to let the CPU deal with streaming
+ * access. Use this macro to make it look saner...
+ */
+#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8)
+
+/*
+ * Save the VGIC CPU state into memory
+ * x0: Register pointing to VCPU struct
+ * Do not corrupt x1!!!
+ */
+.macro save_vgic_v3_state
+ // Compute the address of struct vgic_cpu
+ add x3, x0, #VCPU_VGIC_CPU
+
+ // Make sure stores to the GIC via the memory mapped interface
+ // are now visible to the system register interface
+ dsb st
+
+ // Save all interesting registers
+ mrs_s x4, ICH_HCR_EL2
+ mrs_s x5, ICH_VMCR_EL2
+ mrs_s x6, ICH_MISR_EL2
+ mrs_s x7, ICH_EISR_EL2
+ mrs_s x8, ICH_ELSR_EL2
+
+ str w4, [x3, #VGIC_V3_CPU_HCR]
+ str w5, [x3, #VGIC_V3_CPU_VMCR]
+ str w6, [x3, #VGIC_V3_CPU_MISR]
+ str w7, [x3, #VGIC_V3_CPU_EISR]
+ str w8, [x3, #VGIC_V3_CPU_ELRSR]
+
+ msr_s ICH_HCR_EL2, xzr
+
+ mrs_s x21, ICH_VTR_EL2
+ mvn w22, w21
+ ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4
+
+ adr x24, 1f
+ add x24, x24, x23
+ br x24
+
+1:
+ mrs_s x20, ICH_LR15_EL2
+ mrs_s x19, ICH_LR14_EL2
+ mrs_s x18, ICH_LR13_EL2
+ mrs_s x17, ICH_LR12_EL2
+ mrs_s x16, ICH_LR11_EL2
+ mrs_s x15, ICH_LR10_EL2
+ mrs_s x14, ICH_LR9_EL2
+ mrs_s x13, ICH_LR8_EL2
+ mrs_s x12, ICH_LR7_EL2
+ mrs_s x11, ICH_LR6_EL2
+ mrs_s x10, ICH_LR5_EL2
+ mrs_s x9, ICH_LR4_EL2
+ mrs_s x8, ICH_LR3_EL2
+ mrs_s x7, ICH_LR2_EL2
+ mrs_s x6, ICH_LR1_EL2
+ mrs_s x5, ICH_LR0_EL2
+
+ adr x24, 1f
+ add x24, x24, x23
+ br x24
+
+1:
+ str x20, [x3, #LR_OFFSET(15)]
+ str x19, [x3, #LR_OFFSET(14)]
+ str x18, [x3, #LR_OFFSET(13)]
+ str x17, [x3, #LR_OFFSET(12)]
+ str x16, [x3, #LR_OFFSET(11)]
+ str x15, [x3, #LR_OFFSET(10)]
+ str x14, [x3, #LR_OFFSET(9)]
+ str x13, [x3, #LR_OFFSET(8)]
+ str x12, [x3, #LR_OFFSET(7)]
+ str x11, [x3, #LR_OFFSET(6)]
+ str x10, [x3, #LR_OFFSET(5)]
+ str x9, [x3, #LR_OFFSET(4)]
+ str x8, [x3, #LR_OFFSET(3)]
+ str x7, [x3, #LR_OFFSET(2)]
+ str x6, [x3, #LR_OFFSET(1)]
+ str x5, [x3, #LR_OFFSET(0)]
+
+ tbnz w21, #29, 6f // 6 bits
+ tbz w21, #30, 5f // 5 bits
+ // 7 bits
+ mrs_s x20, ICH_AP0R3_EL2
+ str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+ mrs_s x19, ICH_AP0R2_EL2
+ str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+6: mrs_s x18, ICH_AP0R1_EL2
+ str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+5: mrs_s x17, ICH_AP0R0_EL2
+ str w17, [x3, #VGIC_V3_CPU_AP0R]
+
+ tbnz w21, #29, 6f // 6 bits
+ tbz w21, #30, 5f // 5 bits
+ // 7 bits
+ mrs_s x20, ICH_AP1R3_EL2
+ str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+ mrs_s x19, ICH_AP1R2_EL2
+ str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+6: mrs_s x18, ICH_AP1R1_EL2
+ str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+5: mrs_s x17, ICH_AP1R0_EL2
+ str w17, [x3, #VGIC_V3_CPU_AP1R]
+
+ // Restore SRE_EL1 access and re-enable SRE at EL1.
+ mrs_s x5, ICC_SRE_EL2
+ orr x5, x5, #ICC_SRE_EL2_ENABLE
+ msr_s ICC_SRE_EL2, x5
+ isb
+ mov x5, #1
+ msr_s ICC_SRE_EL1, x5
+.endm
+
+/*
+ * Restore the VGIC CPU state from memory
+ * x0: Register pointing to VCPU struct
+ */
+.macro restore_vgic_v3_state
+ // Disable SRE_EL1 access. Necessary, otherwise
+ // ICH_VMCR_EL2.VFIQEn becomes one, and FIQ happens...
+ msr_s ICC_SRE_EL1, xzr
+ isb
+
+ // Compute the address of struct vgic_cpu
+ add x3, x0, #VCPU_VGIC_CPU
+
+ // Restore all interesting registers
+ ldr w4, [x3, #VGIC_V3_CPU_HCR]
+ ldr w5, [x3, #VGIC_V3_CPU_VMCR]
+
+ msr_s ICH_HCR_EL2, x4
+ msr_s ICH_VMCR_EL2, x5
+
+ mrs_s x21, ICH_VTR_EL2
+
+ tbnz w21, #29, 6f // 6 bits
+ tbz w21, #30, 5f // 5 bits
+ // 7 bits
+ ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
+ msr_s ICH_AP1R3_EL2, x20
+ ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
+ msr_s ICH_AP1R2_EL2, x19
+6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
+ msr_s ICH_AP1R1_EL2, x18
+5: ldr w17, [x3, #VGIC_V3_CPU_AP1R]
+ msr_s ICH_AP1R0_EL2, x17
+
+ tbnz w21, #29, 6f // 6 bits
+ tbz w21, #30, 5f // 5 bits
+ // 7 bits
+ ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
+ msr_s ICH_AP0R3_EL2, x20
+ ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
+ msr_s ICH_AP0R2_EL2, x19
+6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
+ msr_s ICH_AP0R1_EL2, x18
+5: ldr w17, [x3, #VGIC_V3_CPU_AP0R]
+ msr_s ICH_AP0R0_EL2, x17
+
+ and w22, w21, #0xf
+ mvn w22, w21
+ ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4
+
+ adr x24, 1f
+ add x24, x24, x23
+ br x24
+
+1:
+ ldr x20, [x3, #LR_OFFSET(15)]
+ ldr x19, [x3, #LR_OFFSET(14)]
+ ldr x18, [x3, #LR_OFFSET(13)]
+ ldr x17, [x3, #LR_OFFSET(12)]
+ ldr x16, [x3, #LR_OFFSET(11)]
+ ldr x15, [x3, #LR_OFFSET(10)]
+ ldr x14, [x3, #LR_OFFSET(9)]
+ ldr x13, [x3, #LR_OFFSET(8)]
+ ldr x12, [x3, #LR_OFFSET(7)]
+ ldr x11, [x3, #LR_OFFSET(6)]
+ ldr x10, [x3, #LR_OFFSET(5)]
+ ldr x9, [x3, #LR_OFFSET(4)]
+ ldr x8, [x3, #LR_OFFSET(3)]
+ ldr x7, [x3, #LR_OFFSET(2)]
+ ldr x6, [x3, #LR_OFFSET(1)]
+ ldr x5, [x3, #LR_OFFSET(0)]
+
+ adr x24, 1f
+ add x24, x24, x23
+ br x24
+
+1:
+ msr_s ICH_LR15_EL2, x20
+ msr_s ICH_LR14_EL2, x19
+ msr_s ICH_LR13_EL2, x18
+ msr_s ICH_LR12_EL2, x17
+ msr_s ICH_LR11_EL2, x16
+ msr_s ICH_LR10_EL2, x15
+ msr_s ICH_LR9_EL2, x14
+ msr_s ICH_LR8_EL2, x13
+ msr_s ICH_LR7_EL2, x12
+ msr_s ICH_LR6_EL2, x11
+ msr_s ICH_LR5_EL2, x10
+ msr_s ICH_LR4_EL2, x9
+ msr_s ICH_LR3_EL2, x8
+ msr_s ICH_LR2_EL2, x7
+ msr_s ICH_LR1_EL2, x6
+ msr_s ICH_LR0_EL2, x5
+
+ // Ensure that the above will have reached the
+ // (re)distributors. This ensure the guest will read
+ // the correct values from the memory-mapped interface.
+ isb
+ dsb sy
+
+ // Prevent the guest from touching the GIC system registers
+ mrs_s x5, ICC_SRE_EL2
+ and x5, x5, #~ICC_SRE_EL2_ENABLE
+ msr_s ICC_SRE_EL2, x5
+.endm
+
+ENTRY(__save_vgic_v3_state)
+ save_vgic_v3_state
+ ret
+ENDPROC(__save_vgic_v3_state)
+
+ENTRY(__restore_vgic_v3_state)
+ restore_vgic_v3_state
+ ret
+ENDPROC(__restore_vgic_v3_state)
+
+ENTRY(__vgic_v3_get_ich_vtr_el2)
+ mrs_s x0, ICH_VTR_EL2
+ ret
+ENDPROC(__vgic_v3_get_ich_vtr_el2)
+
+ .popsection
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index e0ef63cd05dc..e085ee6ef4e2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -209,8 +209,14 @@ ENTRY(__cpu_setup)
* Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
* both user and kernel.
*/
- ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \
+ ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \
TCR_ASID16 | TCR_TBI0 | (1 << 31)
+ /*
+ * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
+ * TCR_EL1.
+ */
+ mrs x9, ID_AA64MMFR0_EL1
+ bfi x10, x9, #32, #3
#ifdef CONFIG_ARM64_64K_PAGES
orr x10, x10, TCR_TG0_64K
orr x10, x10, TCR_TG1_64K
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 989dd3fe8de1..cf03097176b1 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -238,9 +238,6 @@ struct kvm_vm_data {
#define KVM_NR_PAGE_SIZES 1
#define KVM_PAGES_PER_HPAGE(x) 1
-struct kvm;
-struct kvm_vcpu;
-
struct kvm_mmio_req {
uint64_t addr; /* physical address */
uint64_t size; /* size in bytes */
@@ -599,6 +596,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu);
struct kvm *kvm_arch_alloc_vm(void);
void kvm_arch_free_vm(struct kvm *kvm);
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ enum kvm_mr_change change) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+
#endif /* __ASSEMBLY__*/
#endif
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 990b86420cc6..3d50ea955c4c 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -25,6 +25,7 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select KVM_APIC_ARCHITECTURE
select KVM_MMIO
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 1a4053789d01..18e45ec49bbf 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -47,12 +47,13 @@ FORCE : $(obj)/$(offsets-file)
ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
+KVM := ../../../virt/kvm
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
- coalesced_mmio.o irq_comm.o)
+common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+ $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o)
+common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
endif
kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5b2dc0d10c8f..c9aa236dc29b 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
static DEFINE_SPINLOCK(vp_lock);
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
long status;
long tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
{
long status;
@@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn)
*(int *)rtn = 0;
}
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
@@ -702,7 +702,7 @@ again:
out:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (r > 0) {
- kvm_resched(vcpu);
+ cond_resched();
idx = srcu_read_lock(&vcpu->kvm->srcu);
goto again;
}
@@ -1363,10 +1363,6 @@ static void kvm_release_vm_pages(struct kvm *kvm)
}
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
@@ -1375,10 +1371,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_release_vm_pages(kvm);
}
-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
-{
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
if (cpu != vcpu->cpu) {
@@ -1467,7 +1459,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
kfree(vcpu->arch.apic);
}
-
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1550,12 +1541,8 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
{
return 0;
}
@@ -1591,14 +1578,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return 0;
}
-void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
- enum kvm_mr_change change)
-{
- return;
-}
-
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
kvm_flush_remote_tlbs(kvm);
@@ -1847,10 +1826,6 @@ int kvm_arch_hardware_setup(void)
return 0;
}
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
{
return __apic_accept_irq(vcpu, irq->vector);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 4d6fa0bf1305..5e3f4b0f18c8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -71,11 +71,6 @@
#define CAUSEB_DC 27
#define CAUSEF_DC (_ULCAST_(1) << 27)
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-struct kvm_interrupt;
-
extern atomic_t kvm_mips_instance;
extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn);
extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn);
@@ -659,5 +654,16 @@ extern void mips32_SyncICache(unsigned long addr, unsigned long size);
extern int kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
#endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index 2c7b3ade8ec0..5cd48572450e 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -196,16 +196,21 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
return -ENOIOCTLCMD;
}
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
}
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
{
return 0;
}
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9b973e0af9cb..d340d53c345b 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -74,6 +74,7 @@
*/
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL_GPL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
/*
* Not static inline because used by IP27 special magic initialization code
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index af326cde7cb6..f391f3fbde8b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,7 +53,6 @@
#define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
@@ -81,10 +80,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
/* Physical Address Mask - allowed range of real mode RAM access */
#define KVM_PAM 0x0fffffffffffffffULL
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-
struct lppaca;
struct slb_shadow;
struct dtl_entry;
@@ -628,4 +623,12 @@ struct kvm_vcpu_arch {
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_exit(void) {}
+
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe03d77..e2dd05c81bc6 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -143,9 +143,11 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
extern void kvm_release_hpt(struct kvmppc_linear_info *li);
extern int kvmppc_core_init_vm(struct kvm *kvm);
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
-extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+extern void kvmppc_core_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *free,
struct kvm_memory_slot *dont);
-extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+extern int kvmppc_core_create_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
unsigned long npages);
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index eb643f862579..60019a6fd6bb 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -155,6 +155,7 @@ config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
depends on KVM && E500
select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
help
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 422de3f4d46c..008cd856c5b5 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -5,9 +5,10 @@
subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
+KVM := ../../../virt/kvm
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
- eventfd.o)
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
+ $(KVM)/eventfd.o
CFLAGS_44x_tlb.o := -I.
CFLAGS_e500_mmu.o := -I.
@@ -53,7 +54,7 @@ kvm-e500mc-objs := \
kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
kvm-book3s_64-objs-$(CONFIG_KVM_BOOK3S_64_PR) := \
- ../../../virt/kvm/coalesced_mmio.o \
+ $(KVM)/coalesced_mmio.o \
fpu.o \
book3s_paired_singles.o \
book3s_pr.o \
@@ -86,8 +87,8 @@ kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
kvm-book3s_64-module-objs := \
- ../../../virt/kvm/kvm_main.o \
- ../../../virt/kvm/eventfd.o \
+ $(KVM)/kvm_main.o \
+ $(KVM)/eventfd.o \
powerpc.o \
emulate.o \
book3s.o \
@@ -111,7 +112,7 @@ kvm-book3s_32-objs := \
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
-kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o)
+kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 102ad8a255f3..a54c59db7bd8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1258,7 +1258,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
kvm_guest_exit();
preempt_enable();
- kvm_resched(vcpu);
+ cond_resched();
spin_lock(&vc->lock);
now = get_tb();
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1a1b51189773..0a91f47e264b 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1592,12 +1592,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
return -ENOTSUPP;
}
-void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
}
-int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages)
{
return 0;
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 2861ae9eaae6..b58d61039015 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1822,8 +1822,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
return 0;
}
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
- struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int r = -EINVAL;
@@ -1835,7 +1834,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
e->irqchip.pin = ue->u.irqchip.pin;
if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
goto out;
- rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
break;
case KVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6316ee336e88..ea4cfdc991da 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -246,24 +246,16 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
return r;
}
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
int kvm_arch_hardware_setup(void)
{
return 0;
}
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
void kvm_arch_check_processor_compat(void *rtn)
{
*(int *)rtn = kvmppc_core_check_processor_compat();
@@ -296,11 +288,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
mutex_unlock(&kvm->lock);
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
@@ -409,15 +397,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
- kvmppc_core_free_memslot(free, dont);
+ kvmppc_core_free_memslot(kvm, free, dont);
}
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
{
- return kvmppc_core_create_memslot(slot, npages);
+ return kvmppc_core_create_memslot(kvm, slot, npages);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -436,10 +425,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
kvmppc_core_commit_memory_region(kvm, mem, old);
}
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
@@ -1125,7 +1110,3 @@ int kvm_arch_init(void *opaque)
{
return 0;
}
-
-void kvm_arch_exit(void)
-{
-}
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 16bd5d169cdb..99971dfc6b9a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
#ifndef ASM_KVM_HOST_H
#define ASM_KVM_HOST_H
+
+#include <linux/types.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
+#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#include <asm/debug.h>
#include <asm/cpu.h>
@@ -266,4 +269,18 @@ struct kvm_arch{
};
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_check_processor_compat(void *rtn) {}
+static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot) {}
+
#endif
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 8fe9d65a4585..40b4c6470f88 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -6,7 +6,8 @@
# it under the terms of the GNU General Public License (version 2 only)
# as published by the Free Software Foundation.
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o eventfd.o)
+KVM := ../../../virt/kvm
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 698fb826e149..412fbc5dc688 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -86,16 +86,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
static unsigned long long *facilities;
/* Section: not file related */
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
/* every s390 is virtualization enabled ;-) */
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
int kvm_arch_hardware_setup(void)
{
return 0;
@@ -105,19 +101,11 @@ void kvm_arch_hardware_unsetup(void)
{
}
-void kvm_arch_check_processor_compat(void *rtn)
-{
-}
-
int kvm_arch_init(void *opaque)
{
return 0;
}
-void kvm_arch_exit(void)
-{
-}
-
/* Section: device related */
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
@@ -127,7 +115,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
@@ -289,10 +277,6 @@ static void kvm_free_vcpus(struct kvm *kvm)
mutex_unlock(&kvm->lock);
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
@@ -320,11 +304,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
return 0;
}
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
- /* Nothing todo */
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
save_fp_regs(&vcpu->arch.host_fpregs);
@@ -971,12 +950,8 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
-}
-
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
{
return 0;
}
@@ -1026,15 +1001,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
return;
}
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
-}
-
static int __init kvm_s390_init(void)
{
int ret;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index eba15f18fd38..a4dfc0bd05db 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -43,6 +43,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
static void __init setup_zero_pages(void)
{
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f7f20f7fac3c..1b83952b1106 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -79,6 +79,13 @@
#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
+static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
+{
+ /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
+ return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
+ (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
+}
+
#define SELECTOR_TI_MASK (1 << 2)
#define SELECTOR_RPL_MASK 0x03
@@ -96,10 +103,6 @@
#define ASYNC_PF_PER_VCPU 64
-struct kvm_vcpu;
-struct kvm;
-struct kvm_async_pf;
-
enum kvm_reg {
VCPU_REGS_RAX = 0,
VCPU_REGS_RCX = 1,
@@ -631,8 +634,8 @@ struct msr_data {
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
- int (*hardware_enable)(void *dummy);
- void (*hardware_disable)(void *dummy);
+ int (*hardware_enable)(void);
+ void (*hardware_disable)(void);
void (*check_processor_compatibility)(void *rtn);
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5d9a3033b3d7..d3a87780c70b 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 {
__u32 padding[3];
};
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC 2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT 4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
/* for KVM_SET_CPUID2 */
struct kvm_cpuid2 {
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a47a3e54b964..bdccfb62aa0d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
select MMU_NOTIFIER
select ANON_INODES
select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
@@ -38,6 +39,7 @@ config KVM
select PERF_EVENTS
select HAVE_KVM_MSI
select HAVE_KVM_CPU_RELAX_INTERCEPT
+ select KVM_VFIO
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d609e1d84048..25d22b2d6509 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -5,12 +5,13 @@ CFLAGS_x86.o := -I.
CFLAGS_svm.o := -I.
CFLAGS_vmx.o := -I.
-kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
- coalesced_mmio.o irq_comm.o eventfd.o \
- irqchip.o)
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(addprefix ../../../virt/kvm/, \
- assigned-dev.o iommu.o)
-kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
+KVM := ../../../virt/kvm
+
+kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \
+ $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+ $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o
+kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
i8254.o cpuid.o pmu.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a20ecb5b6cbf..89d288237b9c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -187,8 +187,14 @@ static bool supported_xcr0_bit(unsigned bit)
#define F(x) bit(X86_FEATURE_##x)
-static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
- u32 index, int *nent, int maxnent)
+static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
+ u32 func, u32 index, int *nent, int maxnent)
+{
+ return 0;
+}
+
+static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
+ u32 index, int *nent, int maxnent)
{
int r;
unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@@ -480,6 +486,15 @@ out:
return r;
}
+static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
+ u32 idx, int *nent, int maxnent, unsigned int type)
+{
+ if (type == KVM_GET_EMULATED_CPUID)
+ return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
+
+ return __do_cpuid_ent(entry, func, idx, nent, maxnent);
+}
+
#undef F
struct kvm_cpuid_param {
@@ -494,8 +509,34 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
}
-int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
+ __u32 num_entries, unsigned int ioctl_type)
+{
+ int i;
+
+ if (ioctl_type != KVM_GET_EMULATED_CPUID)
+ return false;
+
+ /*
+ * We want to make sure that ->padding is being passed clean from
+ * userspace in case we want to use it for something in the future.
+ *
+ * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
+ * have to give ourselves satisfied only with the emulated side. /me
+ * sheds a tear.
+ */
+ for (i = 0; i < num_entries; i++) {
+ if (entries[i].padding[0] ||
+ entries[i].padding[1] ||
+ entries[i].padding[2])
+ return true;
+ }
+ return false;
+}
+
+int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries,
+ unsigned int type)
{
struct kvm_cpuid_entry2 *cpuid_entries;
int limit, nent = 0, r = -E2BIG, i;
@@ -512,6 +553,10 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
goto out;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
cpuid->nent = KVM_MAX_CPUID_ENTRIES;
+
+ if (sanity_check_entries(entries, cpuid->nent, type))
+ return -EINVAL;
+
r = -ENOMEM;
cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
if (!cpuid_entries)
@@ -525,7 +570,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
continue;
r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
- &nent, cpuid->nent);
+ &nent, cpuid->nent, type);
if (r)
goto out_free;
@@ -536,7 +581,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
limit = cpuid_entries[nent - 1].eax;
for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
- &nent, cpuid->nent);
+ &nent, cpuid->nent, type);
if (r)
goto out_free;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index b7fd07984888..f1e4895174b2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -6,8 +6,9 @@
void kvm_update_cpuid(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
-int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries);
+int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries,
+ unsigned int type);
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
struct kvm_cpuid_entry __user *entries);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 711c649f80b7..6d9635d02f85 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3241,7 +3241,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
arch.direct_map = vcpu->arch.mmu.direct_map;
arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
- return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
+ return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
}
static bool can_do_async_pf(struct kvm_vcpu *vcpu)
@@ -4229,7 +4229,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
if (nr_to_scan == 0)
goto out;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
int idx;
@@ -4265,7 +4265,7 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
break;
}
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
out:
return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7e6090e13237..a6a6370c2010 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -69,6 +69,7 @@ struct guest_walker {
pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
+ bool pte_writable[PT_MAX_FULL_LEVELS];
unsigned pt_access;
unsigned pte_access;
gfn_t gfn;
@@ -130,6 +131,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
if (pte == orig_pte)
continue;
+ /*
+ * If the slot is read-only, simply do not process the accessed
+ * and dirty bits. This is the correct thing to do if the slot
+ * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
+ * are only supported if the accessed and dirty bits are already
+ * set in the ROM (so that MMIO writes are never needed).
+ *
+ * Note that NPT does not allow this at all and faults, since
+ * it always wants nested page table entries for the guest
+ * page tables to be writable. And EPT works but will simply
+ * overwrite the read-only memory to set the accessed and dirty
+ * bits.
+ */
+ if (unlikely(!walker->pte_writable[level - 1]))
+ continue;
+
ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
if (ret)
return ret;
@@ -204,7 +221,8 @@ retry_walk:
goto error;
real_gfn = gpa_to_gfn(real_gfn);
- host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
+ host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
+ &walker->pte_writable[walker->level - 1]);
if (unlikely(kvm_is_error_hva(host_addr)))
goto error;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 765210d4d925..e00860ec66b5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -606,7 +606,7 @@ static int has_svm(void)
return 1;
}
-static void svm_hardware_disable(void *garbage)
+static void svm_hardware_disable(void)
{
/* Make sure we clean up behind us */
if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -617,7 +617,7 @@ static void svm_hardware_disable(void *garbage)
amd_pmu_disable_virt();
}
-static int svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void)
{
struct svm_cpu_data *sd;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7cdafb6dc705..d6c6d1c2e5ce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2566,7 +2566,7 @@ static void kvm_cpu_vmxon(u64 addr)
: "memory", "cc");
}
-static int hardware_enable(void *garbage)
+static int hardware_enable(void)
{
int cpu = raw_smp_processor_id();
u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2630,7 +2630,7 @@ static void kvm_cpu_vmxoff(void)
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
}
-static void hardware_disable(void *garbage)
+static void hardware_disable(void)
{
if (vmm_exclusive) {
vmclear_local_loaded_vmcss();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8753555f144..d32ded4703ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -242,7 +242,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
}
EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
{
unsigned int cpu = smp_processor_id();
struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -2520,7 +2520,7 @@ out:
return r;
}
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
@@ -2530,6 +2530,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
+ case KVM_CAP_EXT_EMUL_CPUID:
case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
@@ -2639,15 +2640,17 @@ long kvm_arch_dev_ioctl(struct file *filp,
r = 0;
break;
}
- case KVM_GET_SUPPORTED_CPUID: {
+ case KVM_GET_SUPPORTED_CPUID:
+ case KVM_GET_EMULATED_CPUID: {
struct kvm_cpuid2 __user *cpuid_arg = argp;
struct kvm_cpuid2 cpuid;
r = -EFAULT;
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
- r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
- cpuid_arg->entries);
+
+ r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
+ ioctl);
if (r)
goto out;
@@ -5123,7 +5126,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != freq->cpu)
@@ -5133,7 +5136,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
send_ipi = 1;
}
}
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
@@ -5280,12 +5283,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
struct kvm_vcpu *vcpu;
int i;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
atomic_set(&kvm_guest_has_master_clock, 0);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
}
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -5928,7 +5931,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
}
if (need_resched()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
- kvm_resched(vcpu);
+ cond_resched();
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
}
}
@@ -6582,7 +6585,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
kvm_rip_write(vcpu, 0);
}
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
struct kvm *kvm;
struct kvm_vcpu *vcpu;
@@ -6593,7 +6596,7 @@ int kvm_arch_hardware_enable(void *garbage)
bool stable, backwards_tsc = false;
kvm_shared_msr_cpu_online();
- ret = kvm_x86_ops->hardware_enable(garbage);
+ ret = kvm_x86_ops->hardware_enable();
if (ret != 0)
return ret;
@@ -6673,10 +6676,10 @@ int kvm_arch_hardware_enable(void *garbage)
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
{
- kvm_x86_ops->hardware_disable(garbage);
- drop_user_return_notifiers(garbage);
+ kvm_x86_ops->hardware_disable();
+ drop_user_return_notifiers();
}
int kvm_arch_hardware_setup(void)
@@ -6788,6 +6791,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
static_key_slow_dec(&kvm_no_apic_vcpu);
}
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
if (type)
@@ -6879,7 +6886,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
}
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
int i;
@@ -6900,7 +6907,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
}
}
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages)
{
int i;
@@ -6958,6 +6966,10 @@ out_free:
return -ENOMEM;
}
+void kvm_arch_memslots_updated(struct kvm *kvm)
+{
+}
+
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
@@ -7096,7 +7108,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
int r;
if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
- is_error_page(work->page))
+ work->wakeup_all)
return;
r = kvm_mmu_reload(vcpu);
@@ -7206,7 +7218,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct x86_exception fault;
trace_kvm_async_pf_ready(work->arch.token, work->gva);
- if (is_error_page(work->page))
+ if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
diff --git a/arch/arm/include/asm/kvm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 68cb9e1dfb81..ad9db6045b2f 100644
--- a/arch/arm/include/asm/kvm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -61,10 +61,16 @@ struct arch_timer_cpu {
#ifdef CONFIG_KVM_ARM_TIMER
int kvm_timer_hyp_init(void);
int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+ const struct kvm_irq_level *irq);
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu);
void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
+int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+
#else
static inline int kvm_timer_hyp_init(void)
{
@@ -76,10 +82,22 @@ static inline int kvm_timer_init(struct kvm *kvm)
return 0;
}
+static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+ const struct kvm_irq_level *irq) {}
static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {}
+
+static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+ return 0;
+}
+
+static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+ return 0;
+}
#endif
#endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
new file mode 100644
index 000000000000..2f2aac8448a4
--- /dev/null
+++ b/include/kvm/arm_vgic.h
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARM_KVM_VGIC_H
+#define __ASM_ARM_KVM_VGIC_H
+
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include <linux/irqreturn.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define VGIC_NR_IRQS_LEGACY 256
+#define VGIC_NR_SGIS 16
+#define VGIC_NR_PPIS 16
+#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS)
+
+#define VGIC_V2_MAX_LRS (1 << 6)
+#define VGIC_V3_MAX_LRS 16
+#define VGIC_MAX_IRQS 1024
+
+/* Sanity checks... */
+#if (KVM_MAX_VCPUS > 8)
+#error Invalid number of CPU interfaces
+#endif
+
+#if (VGIC_NR_IRQS_LEGACY & 31)
+#error "VGIC_NR_IRQS must be a multiple of 32"
+#endif
+
+#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS)
+#error "VGIC_NR_IRQS must be <= 1024"
+#endif
+
+/*
+ * The GIC distributor registers describing interrupts have two parts:
+ * - 32 per-CPU interrupts (SGI + PPI)
+ * - a bunch of shared interrupts (SPI)
+ */
+struct vgic_bitmap {
+ /*
+ * - One UL per VCPU for private interrupts (assumes UL is at
+ * least 32 bits)
+ * - As many UL as necessary for shared interrupts.
+ *
+ * The private interrupts are accessed via the "private"
+ * field, one UL per vcpu (the state for vcpu n is in
+ * private[n]). The shared interrupts are accessed via the
+ * "shared" pointer (IRQn state is at bit n-32 in the bitmap).
+ */
+ unsigned long *private;
+ unsigned long *shared;
+};
+
+struct vgic_bytemap {
+ /*
+ * - 8 u32 per VCPU for private interrupts
+ * - As many u32 as necessary for shared interrupts.
+ *
+ * The private interrupts are accessed via the "private"
+ * field, (the state for vcpu n is in private[n*8] to
+ * private[n*8 + 7]). The shared interrupts are accessed via
+ * the "shared" pointer (IRQn state is at byte (n-32)%4 of the
+ * shared[(n-32)/4] word).
+ */
+ u32 *private;
+ u32 *shared;
+};
+
+struct kvm_vcpu;
+
+enum vgic_type {
+ VGIC_V2, /* Good ol' GICv2 */
+ VGIC_V3, /* New fancy GICv3 */
+};
+
+#define LR_STATE_PENDING (1 << 0)
+#define LR_STATE_ACTIVE (1 << 1)
+#define LR_STATE_MASK (3 << 0)
+#define LR_EOI_INT (1 << 2)
+
+struct vgic_lr {
+ u16 irq;
+ u8 source;
+ u8 state;
+};
+
+struct vgic_vmcr {
+ u32 ctlr;
+ u32 abpr;
+ u32 bpr;
+ u32 pmr;
+};
+
+struct vgic_ops {
+ struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int);
+ void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
+ void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
+ u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
+ u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
+ u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
+ void (*enable_underflow)(struct kvm_vcpu *vcpu);
+ void (*disable_underflow)(struct kvm_vcpu *vcpu);
+ void (*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+ void (*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+ void (*enable)(struct kvm_vcpu *vcpu);
+};
+
+struct vgic_params {
+ /* vgic type */
+ enum vgic_type type;
+ /* Physical address of vgic virtual cpu interface */
+ phys_addr_t vcpu_base;
+ /* Number of list registers */
+ u32 nr_lr;
+ /* Interrupt number */
+ unsigned int maint_irq;
+ /* Virtual control interface base address */
+ void __iomem *vctrl_base;
+};
+
+struct vgic_dist {
+#ifdef CONFIG_KVM_ARM_VGIC
+ spinlock_t lock;
+ bool in_kernel;
+ bool ready;
+
+ int nr_cpus;
+ int nr_irqs;
+
+ /* Virtual control interface mapping */
+ void __iomem *vctrl_base;
+
+ /* Distributor and vcpu interface mapping in the guest */
+ phys_addr_t vgic_dist_base;
+ phys_addr_t vgic_cpu_base;
+
+ /* Distributor enabled */
+ u32 enabled;
+
+ /* Interrupt enabled (one bit per IRQ) */
+ struct vgic_bitmap irq_enabled;
+
+ /* Level-triggered interrupt external input is asserted */
+ struct vgic_bitmap irq_level;
+
+ /*
+ * Interrupt state is pending on the distributor
+ */
+ struct vgic_bitmap irq_pending;
+
+ /*
+ * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered
+ * interrupts. Essentially holds the state of the flip-flop in
+ * Figure 4-10 on page 4-101 in ARM IHI 0048B.b.
+ * Once set, it is only cleared for level-triggered interrupts on
+ * guest ACKs (when we queue it) or writes to GICD_ICPENDRn.
+ */
+ struct vgic_bitmap irq_soft_pend;
+
+ /* Level-triggered interrupt queued on VCPU interface */
+ struct vgic_bitmap irq_queued;
+
+ /* Interrupt priority. Not used yet. */
+ struct vgic_bytemap irq_priority;
+
+ /* Level/edge triggered */
+ struct vgic_bitmap irq_cfg;
+
+ /*
+ * Source CPU per SGI and target CPU:
+ *
+ * Each byte represent a SGI observable on a VCPU, each bit of
+ * this byte indicating if the corresponding VCPU has
+ * generated this interrupt. This is a GICv2 feature only.
+ *
+ * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are
+ * the SGIs observable on VCPUn.
+ */
+ u8 *irq_sgi_sources;
+
+ /*
+ * Target CPU for each SPI:
+ *
+ * Array of available SPI, each byte indicating the target
+ * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32].
+ */
+ u8 *irq_spi_cpu;
+
+ /*
+ * Reverse lookup of irq_spi_cpu for faster compute pending:
+ *
+ * Array of bitmaps, one per VCPU, describing if IRQn is
+ * routed to a particular VCPU.
+ */
+ struct vgic_bitmap *irq_spi_target;
+
+ /* Bitmap indicating which CPU has something pending */
+ unsigned long *irq_pending_on_cpu;
+#endif
+};
+
+struct vgic_v2_cpu_if {
+ u32 vgic_hcr;
+ u32 vgic_vmcr;
+ u32 vgic_misr; /* Saved only */
+ u32 vgic_eisr[2]; /* Saved only */
+ u32 vgic_elrsr[2]; /* Saved only */
+ u32 vgic_apr;
+ u32 vgic_lr[VGIC_V2_MAX_LRS];
+};
+
+struct vgic_v3_cpu_if {
+#ifdef CONFIG_ARM_GIC_V3
+ u32 vgic_hcr;
+ u32 vgic_vmcr;
+ u32 vgic_misr; /* Saved only */
+ u32 vgic_eisr; /* Saved only */
+ u32 vgic_elrsr; /* Saved only */
+ u32 vgic_ap0r[4];
+ u32 vgic_ap1r[4];
+ u64 vgic_lr[VGIC_V3_MAX_LRS];
+#endif
+};
+
+struct vgic_cpu {
+#ifdef CONFIG_KVM_ARM_VGIC
+ /* per IRQ to LR mapping */
+ u8 *vgic_irq_lr_map;
+
+ /* Pending interrupts on this VCPU */
+ DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
+ unsigned long *pending_shared;
+
+ /* Bitmap of used/free list registers */
+ DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
+
+ /* Number of list registers on this CPU */
+ int nr_lr;
+
+ /* CPU vif control registers for world switch */
+ union {
+ struct vgic_v2_cpu_if vgic_v2;
+ struct vgic_v3_cpu_if vgic_v3;
+ };
+#endif
+};
+
+#define LR_EMPTY 0xff
+
+#define INT_STATUS_EOI (1 << 0)
+#define INT_STATUS_UNDERFLOW (1 << 1)
+
+struct kvm;
+struct kvm_vcpu;
+struct kvm_run;
+struct kvm_exit_mmio;
+
+#ifdef CONFIG_KVM_ARM_VGIC
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
+int kvm_vgic_hyp_init(void);
+int kvm_vgic_init(struct kvm *kvm);
+int kvm_vgic_create(struct kvm *kvm);
+void kvm_vgic_destroy(struct kvm *kvm);
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
+void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
+void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
+ bool level);
+int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
+bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_exit_mmio *mmio);
+
+#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
+#define vgic_initialized(k) ((k)->arch.vgic.ready)
+
+int vgic_v2_probe(struct device_node *vgic_node,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params);
+#ifdef CONFIG_ARM_GIC_V3
+int vgic_v3_probe(struct device_node *vgic_node,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params);
+#else
+static inline int vgic_v3_probe(struct device_node *vgic_node,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params)
+{
+ return -ENODEV;
+}
+#endif
+
+#else
+static inline int kvm_vgic_hyp_init(void)
+{
+ return 0;
+}
+
+static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
+{
+ return 0;
+}
+
+static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
+{
+ return -ENXIO;
+}
+
+static inline int kvm_vgic_init(struct kvm *kvm)
+{
+ return 0;
+}
+
+static inline int kvm_vgic_create(struct kvm *kvm)
+{
+ return 0;
+}
+
+static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
+
+static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
+ unsigned int irq_num, bool level)
+{
+ return 0;
+}
+
+static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_exit_mmio *mmio)
+{
+ return false;
+}
+
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+ return 0;
+}
+
+static inline bool vgic_initialized(struct kvm *kvm)
+{
+ return true;
+}
+#endif
+
+#endif
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 13439bac2967..1f004b16641e 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -17,6 +17,9 @@
#define GIC_CPU_EOI 0x10
#define GIC_CPU_RUNNINGPRI 0x14
#define GIC_CPU_HIGHPRI 0x18
+#define GIC_CPU_ALIAS_BINPOINT 0x1c
+#define GIC_CPU_ACTIVEPRIO 0xd0
+#define GIC_CPU_IDENT 0xfc
#define GICC_IAR_INT_ID_MASK 0x3ff
@@ -58,6 +61,15 @@
#define GICH_LR_ACTIVE_BIT (1 << 29)
#define GICH_LR_EOI (1 << 19)
+#define GICH_VMCR_CTRL_SHIFT 0
+#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT)
+#define GICH_VMCR_PRIMASK_SHIFT 27
+#define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT)
+#define GICH_VMCR_BINPOINT_SHIFT 21
+#define GICH_VMCR_BINPOINT_MASK (0x7 << GICH_VMCR_BINPOINT_SHIFT)
+#define GICH_VMCR_ALIAS_BINPOINT_SHIFT 18
+#define GICH_VMCR_ALIAS_BINPOINT_MASK (0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT)
+
#define GICH_MISR_EOI (1 << 0)
#define GICH_MISR_U (1 << 1)
@@ -85,12 +97,11 @@ int gic_get_cpu_id(unsigned int cpu);
void gic_migrate_target(unsigned int new_cpu_id);
unsigned long gic_get_sgir_physaddr(void);
-#endif /* __ASSEMBLY */
-
extern const struct irq_domain_ops *gic_routable_irq_domain_ops;
static inline void __init register_routable_domain_ops
(const struct irq_domain_ops *ops)
{
gic_routable_irq_domain_ops = ops;
}
+#endif /* __ASSEMBLY */
#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8db53cfaccdb..f64e941a4213 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -129,11 +129,9 @@ static inline bool is_error_page(struct page *page)
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
-struct kvm;
-struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache;
-extern raw_spinlock_t kvm_lock;
+extern spinlock_t kvm_lock;
extern struct list_head vm_list;
struct kvm_io_range {
@@ -175,13 +173,12 @@ struct kvm_async_pf {
gva_t gva;
unsigned long addr;
struct kvm_arch_async_pf arch;
- struct page *page;
- bool done;
+ bool wakeup_all;
};
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
struct kvm_arch_async_pf *arch);
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
#endif
@@ -302,25 +299,6 @@ struct kvm_kernel_irq_routing_entry {
struct hlist_node link;
};
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-
-struct kvm_irq_routing_table {
- int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
- struct kvm_kernel_irq_routing_entry *rt_entries;
- u32 nr_rt_entries;
- /*
- * Array indexed by gsi. Each entry contains list of irq chips
- * the gsi is connected to.
- */
- struct hlist_head map[0];
-};
-
-#else
-
-struct kvm_irq_routing_table {};
-
-#endif
-
#ifndef KVM_PRIVATE_MEM_SLOTS
#define KVM_PRIVATE_MEM_SLOTS 0
#endif
@@ -347,6 +325,7 @@ struct kvm {
struct mm_struct *mm; /* userspace tied to this vm */
struct kvm_memslots *memslots;
struct srcu_struct srcu;
+ struct srcu_struct irq_srcu;
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
u32 bsp_vcpu_id;
#endif
@@ -377,11 +356,12 @@ struct kvm {
struct mutex irq_lock;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
/*
- * Update side is protected by irq_lock and,
- * if configured, irqfds.lock.
+ * Update side is protected by irq_lock.
*/
struct kvm_irq_routing_table __rcu *irq_routing;
struct hlist_head mask_notifier_list;
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
struct hlist_head irq_ack_notifier_list;
#endif
@@ -431,7 +411,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
int __must_check vcpu_load(struct kvm_vcpu *vcpu);
void vcpu_put(struct kvm_vcpu *vcpu);
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
int kvm_irqfd_init(void);
void kvm_irqfd_exit(void);
#else
@@ -450,8 +430,6 @@ void kvm_exit(void);
void kvm_get_kvm(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
-void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
- u64 last_generation);
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
{
@@ -494,9 +472,11 @@ int kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont);
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages);
+void kvm_arch_memslots_updated(struct kvm *kvm);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
@@ -518,10 +498,12 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
+ bool *writable);
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
-void kvm_set_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
@@ -533,7 +515,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
-void kvm_release_pfn_dirty(pfn_t pfn);
void kvm_release_pfn_clean(pfn_t pfn);
void kvm_set_pfn_dirty(pfn_t pfn);
void kvm_set_pfn_accessed(pfn_t pfn);
@@ -560,14 +541,11 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
- gfn_t gfn);
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
-void kvm_resched(struct kvm_vcpu *vcpu);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
@@ -582,15 +560,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
-int kvm_dev_ioctl_check_extension(long ext);
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
int kvm_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log, int *is_dirty);
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log);
-int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem);
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
bool line_status);
long kvm_arch_vm_ioctl(struct file *filp,
@@ -622,6 +598,8 @@ void kvm_arch_exit(void);
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
@@ -630,16 +608,14 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
-int kvm_arch_hardware_enable(void *garbage);
-void kvm_arch_hardware_disable(void *garbage);
+int kvm_arch_hardware_enable(void);
+void kvm_arch_hardware_disable(void);
int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
-void kvm_free_physmem(struct kvm *kvm);
-
void *kvm_kvzalloc(unsigned long size);
void kvm_kvfree(const void *addr);
@@ -717,6 +693,10 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
bool mask);
+int kvm_irq_map_gsi(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *entries, int gsi);
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
+
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status);
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level);
@@ -773,7 +753,7 @@ static inline void kvm_guest_enter(void)
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
- * is very similar to exiting to userspase from rcu point of view. In
+ * is very similar to exiting to userspace from rcu point of view. In
* addition CPU may stay in a guest mode for quite a long time (up to
* one time slice). Lets treat guest mode as quiescent state, just like
* we do with user-mode execution.
@@ -826,13 +806,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
return gfn_to_memslot(kvm, gfn)->id;
}
-static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
-{
- /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */
- return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
- (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
-}
-
static inline gfn_t
hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
{
@@ -856,6 +829,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn)
return (hpa_t)pfn << PAGE_SHIFT;
}
+static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+{
+ unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+ return kvm_is_error_hva(hva);
+}
+
static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
@@ -906,28 +886,27 @@ int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
unsigned flags);
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
- struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
void kvm_free_irq_routing(struct kvm *kvm);
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
-
#else
static inline void kvm_free_irq_routing(struct kvm *kvm) {}
#endif
+int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
+
#ifdef CONFIG_HAVE_KVM_EVENTFD
void kvm_eventfd_init(struct kvm *kvm);
int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
+#ifdef CONFIG_HAVE_KVM_IRQFD
int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
void kvm_irqfd_release(struct kvm *kvm);
-void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
+void kvm_irq_routing_update(struct kvm *);
#else
static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
{
@@ -949,10 +928,8 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
static inline void kvm_irqfd_release(struct kvm *kvm) {}
#ifdef CONFIG_HAVE_KVM_IRQCHIP
-static inline void kvm_irq_routing_update(struct kvm *kvm,
- struct kvm_irq_routing_table *irq_rt)
+static inline void kvm_irq_routing_update(struct kvm *kvm)
{
- rcu_assign_pointer(kvm->irq_routing, irq_rt);
}
#endif
@@ -1013,8 +990,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
extern bool kvm_rebooting;
-struct kvm_device_ops;
-
struct kvm_device {
struct kvm_device_ops *ops;
struct kvm *kvm;
@@ -1047,6 +1022,7 @@ struct kvm_device_ops {
void kvm_device_get(struct kvm_device *dev);
void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops;
@@ -1071,12 +1047,6 @@ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
{
}
-
-static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
-{
- return true;
-}
-
#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
#endif
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b0bcce0ddc95..b606bb689a3e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -17,6 +17,20 @@
#ifndef __KVM_TYPES_H__
#define __KVM_TYPES_H__
+struct kvm;
+struct kvm_async_pf;
+struct kvm_device_ops;
+struct kvm_interrupt;
+struct kvm_irq_routing_table;
+struct kvm_memory_slot;
+struct kvm_one_reg;
+struct kvm_run;
+struct kvm_userspace_memory_region;
+struct kvm_vcpu;
+struct kvm_vcpu_init;
+
+enum kvm_mr_change;
+
#include <asm/types.h>
/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a9a48309f045..ca9d31009904 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -52,6 +52,9 @@ extern unsigned long sysctl_admin_reserve_kbytes;
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
+/* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */
+#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE)
+
/*
* Linux kernel virtual memory manager primitives.
* The idea being to have a "virtual" mm in the same way
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 7005d1109ec9..908925ace776 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit,
__entry->errno < 0 ? -__entry->errno : __entry->reason)
);
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
TRACE_EVENT(kvm_set_irq,
TP_PROTO(unsigned int gsi, int level, int irq_source_id),
TP_ARGS(gsi, level, irq_source_id),
@@ -57,7 +57,7 @@ TRACE_EVENT(kvm_set_irq,
TP_printk("gsi %u level %d source %d",
__entry->gsi, __entry->level, __entry->irq_source_id)
);
-#endif
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
#if defined(__KVM_HAVE_IOAPIC)
#define kvm_deliver_mode \
@@ -124,7 +124,7 @@ TRACE_EVENT(kvm_msi_set_irq,
#endif /* defined(__KVM_HAVE_IOAPIC) */
-#if defined(CONFIG_HAVE_KVM_IRQCHIP)
+#if defined(CONFIG_HAVE_KVM_IRQFD)
TRACE_EVENT(kvm_ack_irq,
TP_PROTO(unsigned int irqchip, unsigned int pin),
@@ -149,7 +149,7 @@ TRACE_EVENT(kvm_ack_irq,
#endif
);
-#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
+#endif /* defined(CONFIG_HAVE_KVM_IRQFD) */
@@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready,
TRACE_EVENT(
kvm_async_pf_completed,
- TP_PROTO(unsigned long address, struct page *page, u64 gva),
- TP_ARGS(address, page, gva),
+ TP_PROTO(unsigned long address, u64 gva),
+ TP_ARGS(address, gva),
TP_STRUCT__entry(
__field(unsigned long, address)
- __field(pfn_t, pfn)
__field(u64, gva)
),
TP_fast_assign(
__entry->address = address;
- __entry->pfn = page ? page_to_pfn(page) : 0;
__entry->gva = gva;
),
- TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva,
- __entry->address, __entry->pfn)
+ TP_printk("gva %#llx address %#lx", __entry->gva,
+ __entry->address)
);
#endif
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index bdc6e87ff3eb..405887bec8b3 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -311,6 +311,7 @@ header-y += ppp-ioctl.h
header-y += ppp_defs.h
header-y += pps.h
header-y += prctl.h
+header-y += psci.h
header-y += ptp_clock.h
header-y += ptrace.h
header-y += qnx4_fs.h
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d88c8ee00c8b..00d2c69a3cb6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -171,6 +171,7 @@ struct kvm_pit_config {
#define KVM_EXIT_WATCHDOG 21
#define KVM_EXIT_S390_TSCH 22
#define KVM_EXIT_EPR 23
+#define KVM_EXIT_SYSTEM_EVENT 24
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -301,6 +302,13 @@ struct kvm_run {
struct {
__u32 epr;
} epr;
+ /* KVM_EXIT_SYSTEM_EVENT */
+ struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN 1
+#define KVM_SYSTEM_EVENT_RESET 2
+ __u32 type;
+ __u64 flags;
+ } system_event;
/* Fix the size of the union. */
char padding[256];
};
@@ -391,8 +399,9 @@ struct kvm_vapic_addr {
__u64 vapic_addr;
};
-/* for KVM_SET_MPSTATE */
+/* for KVM_SET_MP_STATE */
+/* not all states are valid on all architectures */
#define KVM_MP_STATE_RUNNABLE 0
#define KVM_MP_STATE_UNINITIALIZED 1
#define KVM_MP_STATE_INIT_RECEIVED 2
@@ -541,6 +550,7 @@ struct kvm_ppc_smmu_info {
#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06
#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07
#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08
+#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
/*
* Extension capability list.
@@ -568,9 +578,7 @@ struct kvm_ppc_smmu_info {
#endif
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#ifdef __KVM_HAVE_USER_NMI
#define KVM_CAP_USER_NMI 22
-#endif
#ifdef __KVM_HAVE_GUEST_DEBUG
#define KVM_CAP_SET_GUEST_DEBUG 23
#endif
@@ -652,9 +660,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_PPC_GET_SMMU_INFO 78
#define KVM_CAP_S390_COW 79
#define KVM_CAP_PPC_ALLOC_HTAB 80
-#ifdef __KVM_HAVE_READONLY_MEM
#define KVM_CAP_READONLY_MEM 81
-#endif
#define KVM_CAP_IRQFD_RESAMPLE 82
#define KVM_CAP_PPC_BOOKE_WATCHDOG 83
#define KVM_CAP_PPC_HTAB_FD 84
@@ -666,6 +672,10 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_IRQ_MPIC 90
#define KVM_CAP_PPC_RTAS 91
#define KVM_CAP_IRQ_XICS 92
+#define KVM_CAP_ARM_EL1_32BIT 93
+#define KVM_CAP_EXT_EMUL_CPUID 95
+#define KVM_CAP_ARM_PSCI_0_2 102
+#define KVM_CAP_CHECK_EXTENSION_VM 105
#ifdef KVM_CAP_IRQ_ROUTING
@@ -783,6 +793,7 @@ struct kvm_dirty_tlb {
#define KVM_REG_IA64 0x3000000000000000ULL
#define KVM_REG_ARM 0x4000000000000000ULL
#define KVM_REG_S390 0x5000000000000000ULL
+#define KVM_REG_ARM64 0x6000000000000000ULL
#define KVM_REG_MIPS 0x7000000000000000ULL
#define KVM_REG_SIZE_SHIFT 52
@@ -837,9 +848,25 @@ struct kvm_device_attr {
__u64 addr; /* userspace address of attr data */
};
-#define KVM_DEV_TYPE_FSL_MPIC_20 1
-#define KVM_DEV_TYPE_FSL_MPIC_42 2
-#define KVM_DEV_TYPE_XICS 3
+#define KVM_DEV_VFIO_GROUP 1
+#define KVM_DEV_VFIO_GROUP_ADD 1
+#define KVM_DEV_VFIO_GROUP_DEL 2
+
+enum kvm_device_type {
+ KVM_DEV_TYPE_FSL_MPIC_20 = 1,
+#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20
+ KVM_DEV_TYPE_FSL_MPIC_42,
+#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42
+ KVM_DEV_TYPE_XICS,
+#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS
+ KVM_DEV_TYPE_VFIO,
+#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO
+ KVM_DEV_TYPE_ARM_VGIC_V2,
+#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2
+ KVM_DEV_TYPE_FLIC,
+#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC
+ KVM_DEV_TYPE_MAX,
+};
/*
* ioctls for VM fds
@@ -977,7 +1004,7 @@ struct kvm_s390_ucas_mapping {
#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
-/* Available with KVM_CAP_NMI */
+/* Available with KVM_CAP_USER_NMI */
#define KVM_NMI _IO(KVMIO, 0x9a)
/* Available with KVM_CAP_SET_GUEST_DEBUG */
#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
@@ -1009,6 +1036,7 @@ struct kvm_s390_ucas_mapping {
/* VM is being stopped by host */
#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad)
#define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init)
+#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init)
#define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
new file mode 100644
index 000000000000..310d83e0a91b
--- /dev/null
+++ b/include/uapi/linux/psci.h
@@ -0,0 +1,90 @@
+/*
+ * ARM Power State and Coordination Interface (PSCI) header
+ *
+ * This header holds common PSCI defines and macros shared
+ * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space.
+ *
+ * Copyright (C) 2014 Linaro Ltd.
+ * Author: Anup Patel <anup.patel@linaro.org>
+ */
+
+#ifndef _UAPI_LINUX_PSCI_H
+#define _UAPI_LINUX_PSCI_H
+
+/*
+ * PSCI v0.1 interface
+ *
+ * The PSCI v0.1 function numbers are implementation defined.
+ *
+ * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED,
+ * INVALID_PARAMS, and DENIED defined below are applicable
+ * to PSCI v0.1.
+ */
+
+/* PSCI v0.2 interface */
+#define PSCI_0_2_FN_BASE 0x84000000
+#define PSCI_0_2_FN(n) (PSCI_0_2_FN_BASE + (n))
+#define PSCI_0_2_64BIT 0x40000000
+#define PSCI_0_2_FN64_BASE \
+ (PSCI_0_2_FN_BASE + PSCI_0_2_64BIT)
+#define PSCI_0_2_FN64(n) (PSCI_0_2_FN64_BASE + (n))
+
+#define PSCI_0_2_FN_PSCI_VERSION PSCI_0_2_FN(0)
+#define PSCI_0_2_FN_CPU_SUSPEND PSCI_0_2_FN(1)
+#define PSCI_0_2_FN_CPU_OFF PSCI_0_2_FN(2)
+#define PSCI_0_2_FN_CPU_ON PSCI_0_2_FN(3)
+#define PSCI_0_2_FN_AFFINITY_INFO PSCI_0_2_FN(4)
+#define PSCI_0_2_FN_MIGRATE PSCI_0_2_FN(5)
+#define PSCI_0_2_FN_MIGRATE_INFO_TYPE PSCI_0_2_FN(6)
+#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU PSCI_0_2_FN(7)
+#define PSCI_0_2_FN_SYSTEM_OFF PSCI_0_2_FN(8)
+#define PSCI_0_2_FN_SYSTEM_RESET PSCI_0_2_FN(9)
+
+#define PSCI_0_2_FN64_CPU_SUSPEND PSCI_0_2_FN64(1)
+#define PSCI_0_2_FN64_CPU_ON PSCI_0_2_FN64(3)
+#define PSCI_0_2_FN64_AFFINITY_INFO PSCI_0_2_FN64(4)
+#define PSCI_0_2_FN64_MIGRATE PSCI_0_2_FN64(5)
+#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7)
+
+/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
+#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff
+#define PSCI_0_2_POWER_STATE_ID_SHIFT 0
+#define PSCI_0_2_POWER_STATE_TYPE_SHIFT 16
+#define PSCI_0_2_POWER_STATE_TYPE_MASK \
+ (0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT)
+#define PSCI_0_2_POWER_STATE_AFFL_SHIFT 24
+#define PSCI_0_2_POWER_STATE_AFFL_MASK \
+ (0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT)
+
+/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */
+#define PSCI_0_2_AFFINITY_LEVEL_ON 0
+#define PSCI_0_2_AFFINITY_LEVEL_OFF 1
+#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING 2
+
+/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */
+#define PSCI_0_2_TOS_UP_MIGRATE 0
+#define PSCI_0_2_TOS_UP_NO_MIGRATE 1
+#define PSCI_0_2_TOS_MP 2
+
+/* PSCI version decoding (independent of PSCI version) */
+#define PSCI_VERSION_MAJOR_SHIFT 16
+#define PSCI_VERSION_MINOR_MASK \
+ ((1U << PSCI_VERSION_MAJOR_SHIFT) - 1)
+#define PSCI_VERSION_MAJOR_MASK ~PSCI_VERSION_MINOR_MASK
+#define PSCI_VERSION_MAJOR(ver) \
+ (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT)
+#define PSCI_VERSION_MINOR(ver) \
+ ((ver) & PSCI_VERSION_MINOR_MASK)
+
+/* PSCI return values (inclusive of all PSCI versions) */
+#define PSCI_RET_SUCCESS 0
+#define PSCI_RET_NOT_SUPPORTED -1
+#define PSCI_RET_INVALID_PARAMS -2
+#define PSCI_RET_DENIED -3
+#define PSCI_RET_ALREADY_ON -4
+#define PSCI_RET_ON_PENDING -5
+#define PSCI_RET_INTERNAL_FAILURE -6
+#define PSCI_RET_NOT_PRESENT -7
+#define PSCI_RET_DISABLED -8
+
+#endif /* _UAPI_LINUX_PSCI_H */
diff --git a/mm/memory.c b/mm/memory.c
index ebe0f285c0e7..8a6ab7be24b6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -118,6 +118,8 @@ __setup("norandmaps", disable_randmaps);
unsigned long zero_pfn __read_mostly;
unsigned long highest_memmap_pfn __read_mostly;
+EXPORT_SYMBOL(zero_pfn);
+
/*
* CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
*/
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 779262f59e25..fc0c5e603eb4 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -6,6 +6,9 @@ config HAVE_KVM
config HAVE_KVM_IRQCHIP
bool
+config HAVE_KVM_IRQFD
+ bool
+
config HAVE_KVM_IRQ_ROUTING
bool
@@ -22,8 +25,15 @@ config KVM_MMIO
config KVM_ASYNC_PF
bool
+# Toggle to switch between direct notification and batch job
+config KVM_ASYNC_PF_SYNC
+ bool
+
config HAVE_KVM_MSI
bool
config HAVE_KVM_CPU_RELAX_INTERCEPT
bool
+
+config KVM_VFIO
+ bool
diff --git a/arch/arm/kvm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index c55b6089e923..5081e809821f 100644
--- a/arch/arm/kvm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -25,14 +25,12 @@
#include <clocksource/arm_arch_timer.h>
#include <asm/arch_timer.h>
-#include <asm/kvm_vgic.h>
-#include <asm/kvm_arch_timer.h>
+#include <kvm/arm_vgic.h>
+#include <kvm/arm_arch_timer.h>
static struct timecounter *timecounter;
static struct workqueue_struct *wqueue;
-static struct kvm_irq_level timer_irq = {
- .level = 1,
-};
+static unsigned int host_vtimer_irq;
static cycle_t kvm_phys_timer_read(void)
{
@@ -67,8 +65,8 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
- vcpu->arch.timer_cpu.irq->irq,
- vcpu->arch.timer_cpu.irq->level);
+ timer->irq->irq,
+ timer->irq->level);
}
static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -156,6 +154,20 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
timer_arm(timer, ns);
}
+void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+ const struct kvm_irq_level *irq)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ /*
+ * The vcpu timer irq number cannot be determined in
+ * kvm_timer_vcpu_init() because it is called much before
+ * kvm_vcpu_set_target(). To handle this, we determine
+ * vcpu timer irq number when the vcpu is reset.
+ */
+ timer->irq = irq;
+}
+
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -163,14 +175,47 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
timer->timer.function = kvm_timer_expire;
- timer->irq = &timer_irq;
}
static void kvm_timer_init_interrupt(void *info)
{
- enable_percpu_irq(timer_irq.irq, 0);
+ enable_percpu_irq(host_vtimer_irq, 0);
}
+int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ switch (regid) {
+ case KVM_REG_ARM_TIMER_CTL:
+ timer->cntv_ctl = value;
+ break;
+ case KVM_REG_ARM_TIMER_CNT:
+ vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
+ break;
+ case KVM_REG_ARM_TIMER_CVAL:
+ timer->cntv_cval = value;
+ break;
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ switch (regid) {
+ case KVM_REG_ARM_TIMER_CTL:
+ return timer->cntv_ctl;
+ case KVM_REG_ARM_TIMER_CNT:
+ return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+ case KVM_REG_ARM_TIMER_CVAL:
+ return timer->cntv_cval;
+ }
+ return (u64)-1;
+}
static int kvm_timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *cpu)
@@ -182,7 +227,7 @@ static int kvm_timer_cpu_notify(struct notifier_block *self,
break;
case CPU_DYING:
case CPU_DYING_FROZEN:
- disable_percpu_irq(timer_irq.irq);
+ disable_percpu_irq(host_vtimer_irq);
break;
}
@@ -195,6 +240,7 @@ static struct notifier_block kvm_timer_cpu_nb = {
static const struct of_device_id arch_timer_of_match[] = {
{ .compatible = "arm,armv7-timer", },
+ { .compatible = "arm,armv8-timer", },
{},
};
@@ -229,7 +275,7 @@ int kvm_timer_hyp_init(void)
goto out;
}
- timer_irq.irq = ppi;
+ host_vtimer_irq = ppi;
err = register_cpu_notifier(&kvm_timer_cpu_nb);
if (err) {
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
new file mode 100644
index 000000000000..01124ef3690a
--- /dev/null
+++ b/virt/kvm/arm/vgic-v2.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+ struct vgic_lr lr_desc;
+ u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr];
+
+ lr_desc.irq = val & GICH_LR_VIRTUALID;
+ if (lr_desc.irq <= 15)
+ lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+ else
+ lr_desc.source = 0;
+ lr_desc.state = 0;
+
+ if (val & GICH_LR_PENDING_BIT)
+ lr_desc.state |= LR_STATE_PENDING;
+ if (val & GICH_LR_ACTIVE_BIT)
+ lr_desc.state |= LR_STATE_ACTIVE;
+ if (val & GICH_LR_EOI)
+ lr_desc.state |= LR_EOI_INT;
+
+ return lr_desc;
+}
+
+static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr lr_desc)
+{
+ u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+
+ if (lr_desc.state & LR_STATE_PENDING)
+ lr_val |= GICH_LR_PENDING_BIT;
+ if (lr_desc.state & LR_STATE_ACTIVE)
+ lr_val |= GICH_LR_ACTIVE_BIT;
+ if (lr_desc.state & LR_EOI_INT)
+ lr_val |= GICH_LR_EOI;
+
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
+}
+
+static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr lr_desc)
+{
+ if (!(lr_desc.state & LR_STATE_MASK))
+ set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
+}
+
+static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+#if BITS_PER_LONG == 64
+ val = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
+ val <<= 32;
+ val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
+#else
+ val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
+#endif
+ return val;
+}
+
+static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+#if BITS_PER_LONG == 64
+ val = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
+ val <<= 32;
+ val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
+#else
+ val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
+#endif
+ return val;
+}
+
+static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+ u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
+ u32 ret = 0;
+
+ if (misr & GICH_MISR_EOI)
+ ret |= INT_STATUS_EOI;
+ if (misr & GICH_MISR_U)
+ ret |= INT_STATUS_UNDERFLOW;
+
+ return ret;
+}
+
+static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE;
+}
+
+static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE;
+}
+
+static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+
+ vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT;
+ vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+ vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT;
+ vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT;
+}
+
+static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr;
+
+ vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+ vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK;
+ vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK;
+ vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
+
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+}
+
+static void vgic_v2_enable(struct kvm_vcpu *vcpu)
+{
+ /*
+ * By forcing VMCR to zero, the GIC will restore the binary
+ * points to their reset values. Anything else resets to zero
+ * anyway.
+ */
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+
+ /* Get the show on the road... */
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
+}
+
+static const struct vgic_ops vgic_v2_ops = {
+ .get_lr = vgic_v2_get_lr,
+ .set_lr = vgic_v2_set_lr,
+ .sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
+ .get_elrsr = vgic_v2_get_elrsr,
+ .get_eisr = vgic_v2_get_eisr,
+ .get_interrupt_status = vgic_v2_get_interrupt_status,
+ .enable_underflow = vgic_v2_enable_underflow,
+ .disable_underflow = vgic_v2_disable_underflow,
+ .get_vmcr = vgic_v2_get_vmcr,
+ .set_vmcr = vgic_v2_set_vmcr,
+ .enable = vgic_v2_enable,
+};
+
+static struct vgic_params vgic_v2_params;
+
+/**
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
+ * @node: pointer to the DT node
+ * @ops: address of a pointer to the GICv2 operations
+ * @params: address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv2 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v2_probe(struct device_node *vgic_node,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params)
+{
+ int ret;
+ struct resource vctrl_res;
+ struct resource vcpu_res;
+ struct vgic_params *vgic = &vgic_v2_params;
+
+ vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+ if (!vgic->maint_irq) {
+ kvm_err("error getting vgic maintenance irq from DT\n");
+ ret = -ENXIO;
+ goto out;
+ }
+
+ ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
+ if (ret) {
+ kvm_err("Cannot obtain GICH resource\n");
+ goto out;
+ }
+
+ vgic->vctrl_base = of_iomap(vgic_node, 2);
+ if (!vgic->vctrl_base) {
+ kvm_err("Cannot ioremap GICH\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR);
+ vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
+
+ ret = create_hyp_io_mappings(vgic->vctrl_base,
+ vgic->vctrl_base + resource_size(&vctrl_res),
+ vctrl_res.start);
+ if (ret) {
+ kvm_err("Cannot map VCTRL into hyp\n");
+ goto out_unmap;
+ }
+
+ if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
+ kvm_err("Cannot obtain GICV resource\n");
+ ret = -ENXIO;
+ goto out_unmap;
+ }
+
+ if (!PAGE_ALIGNED(vcpu_res.start)) {
+ kvm_err("GICV physical address 0x%llx not page aligned\n",
+ (unsigned long long)vcpu_res.start);
+ ret = -ENXIO;
+ goto out_unmap;
+ }
+
+ if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+ kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+ (unsigned long long)resource_size(&vcpu_res),
+ PAGE_SIZE);
+ ret = -ENXIO;
+ goto out_unmap;
+ }
+
+ vgic->vcpu_base = vcpu_res.start;
+
+ kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+ vctrl_res.start, vgic->maint_irq);
+
+ vgic->type = VGIC_V2;
+ *ops = &vgic_v2_ops;
+ *params = vgic;
+ goto out;
+
+out_unmap:
+ iounmap(vgic->vctrl_base);
+out:
+ of_node_put(vgic_node);
+ return ret;
+}
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
new file mode 100644
index 000000000000..1c2c8eef0599
--- /dev/null
+++ b/virt/kvm/arm/vgic-v3.c
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2013 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID (0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT (10)
+#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+/*
+ * LRs are stored in reverse order in memory. make sure we index them
+ * correctly.
+ */
+#define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr)
+
+static u32 ich_vtr_el2;
+
+static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+ struct vgic_lr lr_desc;
+ u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
+
+ lr_desc.irq = val & GICH_LR_VIRTUALID;
+ if (lr_desc.irq <= 15)
+ lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7;
+ else
+ lr_desc.source = 0;
+ lr_desc.state = 0;
+
+ if (val & ICH_LR_PENDING_BIT)
+ lr_desc.state |= LR_STATE_PENDING;
+ if (val & ICH_LR_ACTIVE_BIT)
+ lr_desc.state |= LR_STATE_ACTIVE;
+ if (val & ICH_LR_EOI)
+ lr_desc.state |= LR_EOI_INT;
+
+ return lr_desc;
+}
+
+static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr lr_desc)
+{
+ u64 lr_val = (((u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) |
+ lr_desc.irq);
+
+ if (lr_desc.state & LR_STATE_PENDING)
+ lr_val |= ICH_LR_PENDING_BIT;
+ if (lr_desc.state & LR_STATE_ACTIVE)
+ lr_val |= ICH_LR_ACTIVE_BIT;
+ if (lr_desc.state & LR_EOI_INT)
+ lr_val |= ICH_LR_EOI;
+
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
+}
+
+static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr lr_desc)
+{
+ if (!(lr_desc.state & LR_STATE_MASK))
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
+}
+
+static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr;
+}
+
+static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
+}
+
+static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
+{
+ u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
+ u32 ret = 0;
+
+ if (misr & ICH_MISR_EOI)
+ ret |= INT_STATUS_EOI;
+ if (misr & ICH_MISR_U)
+ ret |= INT_STATUS_UNDERFLOW;
+
+ return ret;
+}
+
+static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
+
+ vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
+ vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
+ vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
+ vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+}
+
+static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE;
+}
+
+static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE;
+}
+
+static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr;
+
+ vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
+ vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
+ vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
+ vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
+
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
+}
+
+static void vgic_v3_enable(struct kvm_vcpu *vcpu)
+{
+ /*
+ * By forcing VMCR to zero, the GIC will restore the binary
+ * points to their reset values. Anything else resets to zero
+ * anyway.
+ */
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = 0;
+
+ /* Get the show on the road... */
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr = ICH_HCR_EN;
+}
+
+static const struct vgic_ops vgic_v3_ops = {
+ .get_lr = vgic_v3_get_lr,
+ .set_lr = vgic_v3_set_lr,
+ .sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
+ .get_elrsr = vgic_v3_get_elrsr,
+ .get_eisr = vgic_v3_get_eisr,
+ .get_interrupt_status = vgic_v3_get_interrupt_status,
+ .enable_underflow = vgic_v3_enable_underflow,
+ .disable_underflow = vgic_v3_disable_underflow,
+ .get_vmcr = vgic_v3_get_vmcr,
+ .set_vmcr = vgic_v3_set_vmcr,
+ .enable = vgic_v3_enable,
+};
+
+static struct vgic_params vgic_v3_params;
+
+/**
+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
+ * @node: pointer to the DT node
+ * @ops: address of a pointer to the GICv3 operations
+ * @params: address of a pointer to HW-specific parameters
+ *
+ * Returns 0 if a GICv3 has been found, with the low level operations
+ * in *ops and the HW parameters in *params. Returns an error code
+ * otherwise.
+ */
+int vgic_v3_probe(struct device_node *vgic_node,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params)
+{
+ int ret = 0;
+ u32 gicv_idx;
+ struct resource vcpu_res;
+ struct vgic_params *vgic = &vgic_v3_params;
+
+ vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
+ if (!vgic->maint_irq) {
+ kvm_err("error getting vgic maintenance irq from DT\n");
+ ret = -ENXIO;
+ goto out;
+ }
+
+ ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+
+ /*
+ * The ListRegs field is 5 bits, but there is a architectural
+ * maximum of 16 list registers. Just ignore bit 4...
+ */
+ vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
+
+ if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
+ gicv_idx = 1;
+
+ gicv_idx += 3; /* Also skip GICD, GICC, GICH */
+ if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
+ kvm_err("Cannot obtain GICV region\n");
+ ret = -ENXIO;
+ goto out;
+ }
+
+ if (!PAGE_ALIGNED(vcpu_res.start)) {
+ kvm_err("GICV physical address 0x%llx not page aligned\n",
+ (unsigned long long)vcpu_res.start);
+ ret = -ENXIO;
+ goto out;
+ }
+
+ if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+ kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+ (unsigned long long)resource_size(&vcpu_res),
+ PAGE_SIZE);
+ ret = -ENXIO;
+ goto out;
+ }
+
+ vgic->vcpu_base = vcpu_res.start;
+ vgic->vctrl_base = NULL;
+ vgic->type = VGIC_V3;
+
+ kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
+ vcpu_res.start, vgic->maint_irq);
+
+ *ops = &vgic_v3_ops;
+ *params = vgic;
+
+out:
+ of_node_put(vgic_node);
+ return ret;
+}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
new file mode 100644
index 000000000000..8e1dc03342c3
--- /dev/null
+++ b/virt/kvm/arm/vgic.c
@@ -0,0 +1,2464 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/uaccess.h>
+
+#include <linux/irqchip/arm-gic.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * How the whole thing works (courtesy of Christoffer Dall):
+ *
+ * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
+ * something is pending on the CPU interface.
+ * - Interrupts that are pending on the distributor are stored on the
+ * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
+ * ioctls and guest mmio ops, and other in-kernel peripherals such as the
+ * arch. timers).
+ * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
+ * recalculated
+ * - To calculate the oracle, we need info for each cpu from
+ * compute_pending_for_cpu, which considers:
+ * - PPI: dist->irq_pending & dist->irq_enable
+ * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
+ * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
+ * registers, stored on each vcpu. We only keep one bit of
+ * information per interrupt, making sure that only one vcpu can
+ * accept the interrupt.
+ * - If any of the above state changes, we must recalculate the oracle.
+ * - The same is true when injecting an interrupt, except that we only
+ * consider a single interrupt at a time. The irq_spi_cpu array
+ * contains the target CPU for each SPI.
+ *
+ * The handling of level interrupts adds some extra complexity. We
+ * need to track when the interrupt has been EOIed, so we can sample
+ * the 'line' again. This is achieved as such:
+ *
+ * - When a level interrupt is moved onto a vcpu, the corresponding
+ * bit in irq_queued is set. As long as this bit is set, the line
+ * will be ignored for further interrupts. The interrupt is injected
+ * into the vcpu with the GICH_LR_EOI bit set (generate a
+ * maintenance interrupt on EOI).
+ * - When the interrupt is EOIed, the maintenance interrupt fires,
+ * and clears the corresponding bit in irq_queued. This allows the
+ * interrupt line to be sampled again.
+ * - Note that level-triggered interrupts can also be set to pending from
+ * writes to GICD_ISPENDRn and lowering the external input line does not
+ * cause the interrupt to become inactive in such a situation.
+ * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
+ * inactive as long as the external input line is held high.
+ */
+
+#define VGIC_ADDR_UNDEF (-1)
+#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
+
+#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
+#define IMPLEMENTER_ARM 0x43b
+#define GICC_ARCH_VERSION_V2 0x2
+
+#define ACCESS_READ_VALUE (1 << 0)
+#define ACCESS_READ_RAZ (0 << 0)
+#define ACCESS_READ_MASK(x) ((x) & (1 << 0))
+#define ACCESS_WRITE_IGNORED (0 << 1)
+#define ACCESS_WRITE_SETBIT (1 << 1)
+#define ACCESS_WRITE_CLEARBIT (2 << 1)
+#define ACCESS_WRITE_VALUE (3 << 1)
+#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1))
+
+static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
+static void vgic_update_state(struct kvm *kvm);
+static void vgic_kick_vcpus(struct kvm *kvm);
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi);
+static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg);
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
+static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+
+static const struct vgic_ops *vgic_ops;
+static const struct vgic_params *vgic;
+
+/*
+ * struct vgic_bitmap contains a bitmap made of unsigned longs, but
+ * extracts u32s out of them.
+ *
+ * This does not work on 64-bit BE systems, because the bitmap access
+ * will store two consecutive 32-bit words with the higher-addressed
+ * register's bits at the lower index and the lower-addressed register's
+ * bits at the higher index.
+ *
+ * Therefore, swizzle the register index when accessing the 32-bit word
+ * registers to access the right register's value.
+ */
+#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
+#define REG_OFFSET_SWIZZLE 1
+#else
+#define REG_OFFSET_SWIZZLE 0
+#endif
+
+static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
+{
+ int nr_longs;
+
+ nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+
+ b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
+ if (!b->private)
+ return -ENOMEM;
+
+ b->shared = b->private + nr_cpus;
+
+ return 0;
+}
+
+static void vgic_free_bitmap(struct vgic_bitmap *b)
+{
+ kfree(b->private);
+ b->private = NULL;
+ b->shared = NULL;
+}
+
+static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
+ int cpuid, u32 offset)
+{
+ offset >>= 2;
+ if (!offset)
+ return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
+ else
+ return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
+}
+
+static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
+ int cpuid, int irq)
+{
+ if (irq < VGIC_NR_PRIVATE_IRQS)
+ return test_bit(irq, x->private + cpuid);
+
+ return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
+}
+
+static void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
+ int irq, int val)
+{
+ unsigned long *reg;
+
+ if (irq < VGIC_NR_PRIVATE_IRQS) {
+ reg = x->private + cpuid;
+ } else {
+ reg = x->shared;
+ irq -= VGIC_NR_PRIVATE_IRQS;
+ }
+
+ if (val)
+ set_bit(irq, reg);
+ else
+ clear_bit(irq, reg);
+}
+
+static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
+{
+ return x->private + cpuid;
+}
+
+static unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
+{
+ return x->shared;
+}
+
+static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
+{
+ int size;
+
+ size = nr_cpus * VGIC_NR_PRIVATE_IRQS;
+ size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
+
+ x->private = kzalloc(size, GFP_KERNEL);
+ if (!x->private)
+ return -ENOMEM;
+
+ x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
+ return 0;
+}
+
+static void vgic_free_bytemap(struct vgic_bytemap *b)
+{
+ kfree(b->private);
+ b->private = NULL;
+ b->shared = NULL;
+}
+
+static u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
+{
+ u32 *reg;
+
+ if (offset < VGIC_NR_PRIVATE_IRQS) {
+ reg = x->private;
+ offset += cpuid * VGIC_NR_PRIVATE_IRQS;
+ } else {
+ reg = x->shared;
+ offset -= VGIC_NR_PRIVATE_IRQS;
+ }
+
+ return reg + (offset / sizeof(u32));
+}
+
+#define VGIC_CFG_LEVEL 0
+#define VGIC_CFG_EDGE 1
+
+static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int irq_val;
+
+ irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
+ return irq_val == VGIC_CFG_EDGE;
+}
+
+static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
+}
+
+static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
+}
+
+static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
+}
+
+static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
+}
+
+static void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
+}
+
+static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
+{
+ if (irq < VGIC_NR_PRIVATE_IRQS)
+ set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
+ else
+ set_bit(irq - VGIC_NR_PRIVATE_IRQS,
+ vcpu->arch.vgic_cpu.pending_shared);
+}
+
+static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
+{
+ if (irq < VGIC_NR_PRIVATE_IRQS)
+ clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
+ else
+ clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
+ vcpu->arch.vgic_cpu.pending_shared);
+}
+
+static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
+{
+ return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
+}
+
+static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
+{
+ return le32_to_cpu(*((u32 *)mmio->data)) & mask;
+}
+
+static void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
+{
+ *((u32 *)mmio->data) = cpu_to_le32(value) & mask;
+}
+
+/**
+ * vgic_reg_access - access vgic register
+ * @mmio: pointer to the data describing the mmio access
+ * @reg: pointer to the virtual backing of vgic distributor data
+ * @offset: least significant 2 bits used for word offset
+ * @mode: ACCESS_ mode (see defines above)
+ *
+ * Helper to make vgic register access easier using one of the access
+ * modes defined for vgic register access
+ * (read,raz,write-ignored,setbit,clearbit,write)
+ */
+static void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
+ phys_addr_t offset, int mode)
+{
+ int word_offset = (offset & 3) * 8;
+ u32 mask = (1UL << (mmio->len * 8)) - 1;
+ u32 regval;
+
+ /*
+ * Any alignment fault should have been delivered to the guest
+ * directly (ARM ARM B3.12.7 "Prioritization of aborts").
+ */
+
+ if (reg) {
+ regval = *reg;
+ } else {
+ BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
+ regval = 0;
+ }
+
+ if (mmio->is_write) {
+ u32 data = mmio_data_read(mmio, mask) << word_offset;
+ switch (ACCESS_WRITE_MASK(mode)) {
+ case ACCESS_WRITE_IGNORED:
+ return;
+
+ case ACCESS_WRITE_SETBIT:
+ regval |= data;
+ break;
+
+ case ACCESS_WRITE_CLEARBIT:
+ regval &= ~data;
+ break;
+
+ case ACCESS_WRITE_VALUE:
+ regval = (regval & ~(mask << word_offset)) | data;
+ break;
+ }
+ *reg = regval;
+ } else {
+ switch (ACCESS_READ_MASK(mode)) {
+ case ACCESS_READ_RAZ:
+ regval = 0;
+ /* fall through */
+
+ case ACCESS_READ_VALUE:
+ mmio_data_write(mmio, mask, regval >> word_offset);
+ }
+ }
+}
+
+static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+ u32 reg;
+ u32 word_offset = offset & 3;
+
+ switch (offset & ~3) {
+ case 0: /* GICD_CTLR */
+ reg = vcpu->kvm->arch.vgic.enabled;
+ vgic_reg_access(mmio, &reg, word_offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+ if (mmio->is_write) {
+ vcpu->kvm->arch.vgic.enabled = reg & 1;
+ vgic_update_state(vcpu->kvm);
+ return true;
+ }
+ break;
+
+ case 4: /* GICD_TYPER */
+ reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
+ reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1;
+ vgic_reg_access(mmio, &reg, word_offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+ break;
+
+ case 8: /* GICD_IIDR */
+ reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+ vgic_reg_access(mmio, &reg, word_offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+ break;
+ }
+
+ return false;
+}
+
+static bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+ vgic_reg_access(mmio, NULL, offset,
+ ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+ return false;
+}
+
+static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+ if (mmio->is_write) {
+ vgic_update_state(vcpu->kvm);
+ return true;
+ }
+
+ return false;
+}
+
+static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_enabled,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+ if (mmio->is_write) {
+ if (offset < 4) /* Force SGI enabled */
+ *reg |= 0xffff;
+ vgic_retire_disabled_irqs(vcpu);
+ vgic_update_state(vcpu->kvm);
+ return true;
+ }
+
+ return false;
+}
+
+static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ u32 *reg, orig;
+ u32 level_mask;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu->vcpu_id, offset);
+ level_mask = (~(*reg));
+
+ /* Mark both level and edge triggered irqs as pending */
+ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+ orig = *reg;
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
+ if (mmio->is_write) {
+ /* Set the soft-pending flag only for level-triggered irqs */
+ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+ *reg &= level_mask;
+
+ /* Ignore writes to SGIs */
+ if (offset < 2) {
+ *reg &= ~0xffff;
+ *reg |= orig & 0xffff;
+ }
+
+ vgic_update_state(vcpu->kvm);
+ return true;
+ }
+
+ return false;
+}
+
+static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ u32 *level_active;
+ u32 *reg, orig;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu->vcpu_id, offset);
+ orig = *reg;
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+ if (mmio->is_write) {
+ /* Re-set level triggered level-active interrupts */
+ level_active = vgic_bitmap_get_reg(&dist->irq_level,
+ vcpu->vcpu_id, offset);
+ reg = vgic_bitmap_get_reg(&dist->irq_pending,
+ vcpu->vcpu_id, offset);
+ *reg |= *level_active;
+
+ /* Ignore writes to SGIs */
+ if (offset < 2) {
+ *reg &= ~0xffff;
+ *reg |= orig & 0xffff;
+ }
+
+ /* Clear soft-pending flags */
+ reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
+ vgic_update_state(vcpu->kvm);
+ return true;
+ }
+
+ return false;
+}
+
+static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority,
+ vcpu->vcpu_id, offset);
+ vgic_reg_access(mmio, reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+ return false;
+}
+
+#define GICD_ITARGETSR_SIZE 32
+#define GICD_CPUTARGETS_BITS 8
+#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS)
+static u32 vgic_get_target_reg(struct kvm *kvm, int irq)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ int i;
+ u32 val = 0;
+
+ irq -= VGIC_NR_PRIVATE_IRQS;
+
+ for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++)
+ val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8);
+
+ return val;
+}
+
+static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int i, c;
+ unsigned long *bmap;
+ u32 target;
+
+ irq -= VGIC_NR_PRIVATE_IRQS;
+
+ /*
+ * Pick the LSB in each byte. This ensures we target exactly
+ * one vcpu per IRQ. If the byte is null, assume we target
+ * CPU0.
+ */
+ for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) {
+ int shift = i * GICD_CPUTARGETS_BITS;
+ target = ffs((val >> shift) & 0xffU);
+ target = target ? (target - 1) : 0;
+ dist->irq_spi_cpu[irq + i] = target;
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]);
+ if (c == target)
+ set_bit(irq + i, bmap);
+ else
+ clear_bit(irq + i, bmap);
+ }
+ }
+}
+
+static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ u32 reg;
+
+ /* We treat the banked interrupts targets as read-only */
+ if (offset < 32) {
+ u32 roreg = 1 << vcpu->vcpu_id;
+ roreg |= roreg << 8;
+ roreg |= roreg << 16;
+
+ vgic_reg_access(mmio, &roreg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+ return false;
+ }
+
+ reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U);
+ vgic_reg_access(mmio, &reg, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+ if (mmio->is_write) {
+ vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U);
+ vgic_update_state(vcpu->kvm);
+ return true;
+ }
+
+ return false;
+}
+
+static u32 vgic_cfg_expand(u16 val)
+{
+ u32 res = 0;
+ int i;
+
+ /*
+ * Turn a 16bit value like abcd...mnop into a 32bit word
+ * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
+ */
+ for (i = 0; i < 16; i++)
+ res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
+
+ return res;
+}
+
+static u16 vgic_cfg_compress(u32 val)
+{
+ u16 res = 0;
+ int i;
+
+ /*
+ * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
+ * abcd...mnop which is what we really care about.
+ */
+ for (i = 0; i < 16; i++)
+ res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
+
+ return res;
+}
+
+/*
+ * The distributor uses 2 bits per IRQ for the CFG register, but the
+ * LSB is always 0. As such, we only keep the upper bit, and use the
+ * two above functions to compress/expand the bits
+ */
+static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+ u32 val;
+ u32 *reg;
+
+ reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
+ vcpu->vcpu_id, offset >> 1);
+
+ if (offset & 4)
+ val = *reg >> 16;
+ else
+ val = *reg & 0xffff;
+
+ val = vgic_cfg_expand(val);
+ vgic_reg_access(mmio, &val, offset,
+ ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
+ if (mmio->is_write) {
+ if (offset < 8) {
+ *reg = ~0U; /* Force PPIs/SGIs to 1 */
+ return false;
+ }
+
+ val = vgic_cfg_compress(val);
+ if (offset & 4) {
+ *reg &= 0xffff;
+ *reg |= val << 16;
+ } else {
+ *reg &= 0xffff << 16;
+ *reg |= val;
+ }
+ }
+
+ return false;
+}
+
+static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+ u32 reg;
+ vgic_reg_access(mmio, &reg, offset,
+ ACCESS_READ_RAZ | ACCESS_WRITE_VALUE);
+ if (mmio->is_write) {
+ vgic_dispatch_sgi(vcpu, reg);
+ vgic_update_state(vcpu->kvm);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
+ * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
+ *
+ * Move any pending IRQs that have already been assigned to LRs back to the
+ * emulated distributor state so that the complete emulated state can be read
+ * from the main emulation structures without investigating the LRs.
+ *
+ * Note that IRQs in the active state in the LRs get their pending state moved
+ * to the distributor but the active state stays in the LRs, because we don't
+ * track the active state on the distributor side.
+ */
+static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ int vcpu_id = vcpu->vcpu_id;
+ int i;
+
+ for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+ struct vgic_lr lr = vgic_get_lr(vcpu, i);
+
+ /*
+ * There are three options for the state bits:
+ *
+ * 01: pending
+ * 10: active
+ * 11: pending and active
+ *
+ * If the LR holds only an active interrupt (not pending) then
+ * just leave it alone.
+ */
+ if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
+ continue;
+
+ /*
+ * Reestablish the pending state on the distributor and the
+ * CPU interface. It may have already been pending, but that
+ * is fine, then we are only setting a few bits that were
+ * already set.
+ */
+ vgic_dist_irq_set_pending(vcpu, lr.irq);
+ if (lr.irq < VGIC_NR_SGIS)
+ *vgic_get_sgi_sources(dist, vcpu_id, lr.irq) |= 1 << lr.source;
+ lr.state &= ~LR_STATE_PENDING;
+ vgic_set_lr(vcpu, i, lr);
+
+ /*
+ * If there's no state left on the LR (it could still be
+ * active), then the LR does not hold any useful info and can
+ * be marked as free for other use.
+ */
+ if (!(lr.state & LR_STATE_MASK)) {
+ vgic_retire_lr(i, lr.irq, vcpu);
+ vgic_irq_clear_queued(vcpu, lr.irq);
+ }
+
+ /* Finally update the VGIC state. */
+ vgic_update_state(vcpu->kvm);
+ }
+}
+
+/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
+static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int sgi;
+ int min_sgi = (offset & ~0x3);
+ int max_sgi = min_sgi + 3;
+ int vcpu_id = vcpu->vcpu_id;
+ u32 reg = 0;
+
+ /* Copy source SGIs from distributor side */
+ for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
+ int shift = 8 * (sgi - min_sgi);
+ reg |= ((u32)*vgic_get_sgi_sources(dist, vcpu_id, sgi)) << shift;
+ }
+
+ mmio_data_write(mmio, ~0, reg);
+ return false;
+}
+
+static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset, bool set)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int sgi;
+ int min_sgi = (offset & ~0x3);
+ int max_sgi = min_sgi + 3;
+ int vcpu_id = vcpu->vcpu_id;
+ u32 reg;
+ bool updated = false;
+
+ reg = mmio_data_read(mmio, ~0);
+
+ /* Clear pending SGIs on the distributor */
+ for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
+ u8 mask = reg >> (8 * (sgi - min_sgi));
+ u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi);
+ if (set) {
+ if ((*src & mask) != mask)
+ updated = true;
+ *src |= mask;
+ } else {
+ if (*src & mask)
+ updated = true;
+ *src &= ~mask;
+ }
+ }
+
+ if (updated)
+ vgic_update_state(vcpu->kvm);
+
+ return updated;
+}
+
+static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ if (!mmio->is_write)
+ return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
+ else
+ return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
+}
+
+static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ if (!mmio->is_write)
+ return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
+ else
+ return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
+}
+
+/*
+ * I would have liked to use the kvm_bus_io_*() API instead, but it
+ * cannot cope with banked registers (only the VM pointer is passed
+ * around, and we need the vcpu). One of these days, someone please
+ * fix it!
+ */
+struct mmio_range {
+ phys_addr_t base;
+ unsigned long len;
+ int bits_per_irq;
+ bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
+ phys_addr_t offset);
+};
+
+static const struct mmio_range vgic_dist_ranges[] = {
+ {
+ .base = GIC_DIST_CTRL,
+ .len = 12,
+ .bits_per_irq = 0,
+ .handle_mmio = handle_mmio_misc,
+ },
+ {
+ .base = GIC_DIST_IGROUP,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
+ .handle_mmio = handle_mmio_raz_wi,
+ },
+ {
+ .base = GIC_DIST_ENABLE_SET,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
+ .handle_mmio = handle_mmio_set_enable_reg,
+ },
+ {
+ .base = GIC_DIST_ENABLE_CLEAR,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
+ .handle_mmio = handle_mmio_clear_enable_reg,
+ },
+ {
+ .base = GIC_DIST_PENDING_SET,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
+ .handle_mmio = handle_mmio_set_pending_reg,
+ },
+ {
+ .base = GIC_DIST_PENDING_CLEAR,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
+ .handle_mmio = handle_mmio_clear_pending_reg,
+ },
+ {
+ .base = GIC_DIST_ACTIVE_SET,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
+ .handle_mmio = handle_mmio_raz_wi,
+ },
+ {
+ .base = GIC_DIST_ACTIVE_CLEAR,
+ .len = VGIC_MAX_IRQS / 8,
+ .bits_per_irq = 1,
+ .handle_mmio = handle_mmio_raz_wi,
+ },
+ {
+ .base = GIC_DIST_PRI,
+ .len = VGIC_MAX_IRQS,
+ .bits_per_irq = 8,
+ .handle_mmio = handle_mmio_priority_reg,
+ },
+ {
+ .base = GIC_DIST_TARGET,
+ .len = VGIC_MAX_IRQS,
+ .bits_per_irq = 8,
+ .handle_mmio = handle_mmio_target_reg,
+ },
+ {
+ .base = GIC_DIST_CONFIG,
+ .len = VGIC_MAX_IRQS / 4,
+ .bits_per_irq = 2,
+ .handle_mmio = handle_mmio_cfg_reg,
+ },
+ {
+ .base = GIC_DIST_SOFTINT,
+ .len = 4,
+ .handle_mmio = handle_mmio_sgi_reg,
+ },
+ {
+ .base = GIC_DIST_SGI_PENDING_CLEAR,
+ .len = VGIC_NR_SGIS,
+ .handle_mmio = handle_mmio_sgi_clear,
+ },
+ {
+ .base = GIC_DIST_SGI_PENDING_SET,
+ .len = VGIC_NR_SGIS,
+ .handle_mmio = handle_mmio_sgi_set,
+ },
+ {}
+};
+
+static const
+struct mmio_range *find_matching_range(const struct mmio_range *ranges,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ const struct mmio_range *r = ranges;
+
+ while (r->len) {
+ if (offset >= r->base &&
+ (offset + mmio->len) <= (r->base + r->len))
+ return r;
+ r++;
+ }
+
+ return NULL;
+}
+
+static bool vgic_validate_access(const struct vgic_dist *dist,
+ const struct mmio_range *range,
+ unsigned long offset)
+{
+ int irq;
+
+ if (!range->bits_per_irq)
+ return true; /* Not an irq-based access */
+
+ irq = offset * 8 / range->bits_per_irq;
+ if (irq >= dist->nr_irqs)
+ return false;
+
+ return true;
+}
+
+/**
+ * vgic_handle_mmio - handle an in-kernel MMIO access
+ * @vcpu: pointer to the vcpu performing the access
+ * @run: pointer to the kvm_run structure
+ * @mmio: pointer to the data describing the access
+ *
+ * returns true if the MMIO access has been performed in kernel space,
+ * and false if it needs to be emulated in user space.
+ */
+bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_exit_mmio *mmio)
+{
+ const struct mmio_range *range;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ unsigned long base = dist->vgic_dist_base;
+ bool updated_state;
+ unsigned long offset;
+
+ if (!irqchip_in_kernel(vcpu->kvm) ||
+ mmio->phys_addr < base ||
+ (mmio->phys_addr + mmio->len) > (base + KVM_VGIC_V2_DIST_SIZE))
+ return false;
+
+ /* We don't support ldrd / strd or ldm / stm to the emulated vgic */
+ if (mmio->len > 4) {
+ kvm_inject_dabt(vcpu, mmio->phys_addr);
+ return true;
+ }
+
+ offset = mmio->phys_addr - base;
+ range = find_matching_range(vgic_dist_ranges, mmio, offset);
+ if (unlikely(!range || !range->handle_mmio)) {
+ pr_warn("Unhandled access %d %08llx %d\n",
+ mmio->is_write, mmio->phys_addr, mmio->len);
+ return false;
+ }
+
+ spin_lock(&vcpu->kvm->arch.vgic.lock);
+ offset = mmio->phys_addr - range->base - base;
+ if (vgic_validate_access(dist, range, offset)) {
+ updated_state = range->handle_mmio(vcpu, mmio, offset);
+ } else {
+ vgic_reg_access(mmio, NULL, offset,
+ ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+ updated_state = false;
+ }
+ spin_unlock(&vcpu->kvm->arch.vgic.lock);
+ kvm_prepare_mmio(run, mmio);
+ kvm_handle_mmio_return(vcpu, run);
+
+ if (updated_state)
+ vgic_kick_vcpus(vcpu->kvm);
+
+ return true;
+}
+
+static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi)
+{
+ return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi;
+}
+
+static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ int nrcpus = atomic_read(&kvm->online_vcpus);
+ u8 target_cpus;
+ int sgi, mode, c, vcpu_id;
+
+ vcpu_id = vcpu->vcpu_id;
+
+ sgi = reg & 0xf;
+ target_cpus = (reg >> 16) & 0xff;
+ mode = (reg >> 24) & 3;
+
+ switch (mode) {
+ case 0:
+ if (!target_cpus)
+ return;
+ break;
+
+ case 1:
+ target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
+ break;
+
+ case 2:
+ target_cpus = 1 << vcpu_id;
+ break;
+ }
+
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ if (target_cpus & 1) {
+ /* Flag the SGI as pending */
+ vgic_dist_irq_set_pending(vcpu, sgi);
+ *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id;
+ kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c);
+ }
+
+ target_cpus >>= 1;
+ }
+}
+
+static int vgic_nr_shared_irqs(struct vgic_dist *dist)
+{
+ return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
+}
+
+static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
+ unsigned long pending_private, pending_shared;
+ int nr_shared = vgic_nr_shared_irqs(dist);
+ int vcpu_id;
+
+ vcpu_id = vcpu->vcpu_id;
+ pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
+ pend_shared = vcpu->arch.vgic_cpu.pending_shared;
+
+ pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
+ enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
+ bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
+
+ pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
+ enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
+ bitmap_and(pend_shared, pending, enabled, nr_shared);
+ bitmap_and(pend_shared, pend_shared,
+ vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
+ nr_shared);
+
+ pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
+ pending_shared = find_first_bit(pend_shared, nr_shared);
+ return (pending_private < VGIC_NR_PRIVATE_IRQS ||
+ pending_shared < vgic_nr_shared_irqs(dist));
+}
+
+/*
+ * Update the interrupt state and determine which CPUs have pending
+ * interrupts. Must be called with distributor lock held.
+ */
+static void vgic_update_state(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int c;
+
+ if (!dist->enabled) {
+ set_bit(0, dist->irq_pending_on_cpu);
+ return;
+ }
+
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ if (compute_pending_for_cpu(vcpu)) {
+ pr_debug("CPU%d has pending interrupts\n", c);
+ set_bit(c, dist->irq_pending_on_cpu);
+ }
+ }
+}
+
+static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
+{
+ return vgic_ops->get_lr(vcpu, lr);
+}
+
+static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr vlr)
+{
+ vgic_ops->set_lr(vcpu, lr, vlr);
+}
+
+static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
+ struct vgic_lr vlr)
+{
+ vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
+}
+
+static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
+{
+ return vgic_ops->get_elrsr(vcpu);
+}
+
+static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
+{
+ return vgic_ops->get_eisr(vcpu);
+}
+
+static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
+{
+ return vgic_ops->get_interrupt_status(vcpu);
+}
+
+static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
+{
+ vgic_ops->enable_underflow(vcpu);
+}
+
+static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
+{
+ vgic_ops->disable_underflow(vcpu);
+}
+
+static inline void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+ vgic_ops->get_vmcr(vcpu, vmcr);
+}
+
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+ vgic_ops->set_vmcr(vcpu, vmcr);
+}
+
+static inline void vgic_enable(struct kvm_vcpu *vcpu)
+{
+ vgic_ops->enable(vcpu);
+}
+
+static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
+
+ vlr.state = 0;
+ vgic_set_lr(vcpu, lr_nr, vlr);
+ clear_bit(lr_nr, vgic_cpu->lr_used);
+ vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+}
+
+/*
+ * An interrupt may have been disabled after being made pending on the
+ * CPU interface (the classic case is a timer running while we're
+ * rebooting the guest - the interrupt would kick as soon as the CPU
+ * interface gets enabled, with deadly consequences).
+ *
+ * The solution is to examine already active LRs, and check the
+ * interrupt is still enabled. If not, just retire it.
+ */
+static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ int lr;
+
+ for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
+ struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+
+ if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
+ vgic_retire_lr(lr, vlr.irq, vcpu);
+ if (vgic_irq_is_queued(vcpu, vlr.irq))
+ vgic_irq_clear_queued(vcpu, vlr.irq);
+ }
+ }
+}
+
+/*
+ * Queue an interrupt to a CPU virtual interface. Return true on success,
+ * or false if it wasn't possible to queue it.
+ */
+static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ struct vgic_lr vlr;
+ int lr;
+
+ /* Sanitize the input... */
+ BUG_ON(sgi_source_id & ~7);
+ BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
+ BUG_ON(irq >= dist->nr_irqs);
+
+ kvm_debug("Queue IRQ%d\n", irq);
+
+ lr = vgic_cpu->vgic_irq_lr_map[irq];
+
+ /* Do we have an active interrupt for the same CPUID? */
+ if (lr != LR_EMPTY) {
+ vlr = vgic_get_lr(vcpu, lr);
+ if (vlr.source == sgi_source_id) {
+ kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
+ BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
+ vlr.state |= LR_STATE_PENDING;
+ vgic_set_lr(vcpu, lr, vlr);
+ return true;
+ }
+ }
+
+ /* Try to use another LR for this interrupt */
+ lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
+ vgic->nr_lr);
+ if (lr >= vgic->nr_lr)
+ return false;
+
+ kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
+ vgic_cpu->vgic_irq_lr_map[irq] = lr;
+ set_bit(lr, vgic_cpu->lr_used);
+
+ vlr.irq = irq;
+ vlr.source = sgi_source_id;
+ vlr.state = LR_STATE_PENDING;
+ if (!vgic_irq_is_edge(vcpu, irq))
+ vlr.state |= LR_EOI_INT;
+
+ vgic_set_lr(vcpu, lr, vlr);
+
+ return true;
+}
+
+static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ unsigned long sources;
+ int vcpu_id = vcpu->vcpu_id;
+ int c;
+
+ sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
+
+ for_each_set_bit(c, &sources, dist->nr_cpus) {
+ if (vgic_queue_irq(vcpu, c, irq))
+ clear_bit(c, &sources);
+ }
+
+ *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources;
+
+ /*
+ * If the sources bitmap has been cleared it means that we
+ * could queue all the SGIs onto link registers (see the
+ * clear_bit above), and therefore we are done with them in
+ * our emulated gic and can get rid of them.
+ */
+ if (!sources) {
+ vgic_dist_irq_clear_pending(vcpu, irq);
+ vgic_cpu_irq_clear(vcpu, irq);
+ return true;
+ }
+
+ return false;
+}
+
+static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
+{
+ if (!vgic_can_sample_irq(vcpu, irq))
+ return true; /* level interrupt, already queued */
+
+ if (vgic_queue_irq(vcpu, 0, irq)) {
+ if (vgic_irq_is_edge(vcpu, irq)) {
+ vgic_dist_irq_clear_pending(vcpu, irq);
+ vgic_cpu_irq_clear(vcpu, irq);
+ } else {
+ vgic_irq_set_queued(vcpu, irq);
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Fill the list registers with pending interrupts before running the
+ * guest.
+ */
+static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int i, vcpu_id;
+ int overflow = 0;
+
+ vcpu_id = vcpu->vcpu_id;
+
+ /*
+ * We may not have any pending interrupt, or the interrupts
+ * may have been serviced from another vcpu. In all cases,
+ * move along.
+ */
+ if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
+ pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
+ goto epilog;
+ }
+
+ /* SGIs */
+ for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
+ if (!vgic_queue_sgi(vcpu, i))
+ overflow = 1;
+ }
+
+ /* PPIs */
+ for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
+ if (!vgic_queue_hwirq(vcpu, i))
+ overflow = 1;
+ }
+
+ /* SPIs */
+ for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
+ if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
+ overflow = 1;
+ }
+
+epilog:
+ if (overflow) {
+ vgic_enable_underflow(vcpu);
+ } else {
+ vgic_disable_underflow(vcpu);
+ /*
+ * We're about to run this VCPU, and we've consumed
+ * everything the distributor had in store for
+ * us. Claim we don't have anything pending. We'll
+ * adjust that if needed while exiting.
+ */
+ clear_bit(vcpu_id, dist->irq_pending_on_cpu);
+ }
+}
+
+static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
+{
+ u32 status = vgic_get_interrupt_status(vcpu);
+ bool level_pending = false;
+
+ kvm_debug("STATUS = %08x\n", status);
+
+ if (status & INT_STATUS_EOI) {
+ /*
+ * Some level interrupts have been EOIed. Clear their
+ * active bit.
+ */
+ u64 eisr = vgic_get_eisr(vcpu);
+ unsigned long *eisr_ptr = (unsigned long *)&eisr;
+ int lr;
+
+ for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
+ struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
+ WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
+
+ vgic_irq_clear_queued(vcpu, vlr.irq);
+ WARN_ON(vlr.state & LR_STATE_MASK);
+ vlr.state = 0;
+ vgic_set_lr(vcpu, lr, vlr);
+
+ /*
+ * If the IRQ was EOIed it was also ACKed and we we
+ * therefore assume we can clear the soft pending
+ * state (should it had been set) for this interrupt.
+ *
+ * Note: if the IRQ soft pending state was set after
+ * the IRQ was acked, it actually shouldn't be
+ * cleared, but we have no way of knowing that unless
+ * we start trapping ACKs when the soft-pending state
+ * is set.
+ */
+ vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+
+ /* Any additional pending interrupt? */
+ if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
+ vgic_cpu_irq_set(vcpu, vlr.irq);
+ level_pending = true;
+ } else {
+ vgic_dist_irq_clear_pending(vcpu, vlr.irq);
+ vgic_cpu_irq_clear(vcpu, vlr.irq);
+ }
+
+ /*
+ * Despite being EOIed, the LR may not have
+ * been marked as empty.
+ */
+ vgic_sync_lr_elrsr(vcpu, lr, vlr);
+ }
+ }
+
+ if (status & INT_STATUS_UNDERFLOW)
+ vgic_disable_underflow(vcpu);
+
+ return level_pending;
+}
+
+/*
+ * Sync back the VGIC state after a guest run. The distributor lock is
+ * needed so we don't get preempted in the middle of the state processing.
+ */
+static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ u64 elrsr;
+ unsigned long *elrsr_ptr;
+ int lr, pending;
+ bool level_pending;
+
+ level_pending = vgic_process_maintenance(vcpu);
+ elrsr = vgic_get_elrsr(vcpu);
+ elrsr_ptr = (unsigned long *)&elrsr;
+
+ /* Clear mappings for empty LRs */
+ for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
+ struct vgic_lr vlr;
+
+ if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
+ continue;
+
+ vlr = vgic_get_lr(vcpu, lr);
+
+ BUG_ON(vlr.irq >= dist->nr_irqs);
+ vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
+ }
+
+ /* Check if we still have something up our sleeve... */
+ pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
+ if (level_pending || pending < vgic->nr_lr)
+ set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
+}
+
+void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return;
+
+ spin_lock(&dist->lock);
+ __kvm_vgic_flush_hwstate(vcpu);
+ spin_unlock(&dist->lock);
+}
+
+void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return;
+
+ spin_lock(&dist->lock);
+ __kvm_vgic_sync_hwstate(vcpu);
+ spin_unlock(&dist->lock);
+}
+
+int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return 0;
+
+ return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
+}
+
+static void vgic_kick_vcpus(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int c;
+
+ /*
+ * We've injected an interrupt, time to find out who deserves
+ * a good kick...
+ */
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ if (kvm_vgic_vcpu_pending_irq(vcpu))
+ kvm_vcpu_kick(vcpu);
+ }
+}
+
+static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
+{
+ int edge_triggered = vgic_irq_is_edge(vcpu, irq);
+
+ /*
+ * Only inject an interrupt if:
+ * - edge triggered and we have a rising edge
+ * - level triggered and we change level
+ */
+ if (edge_triggered) {
+ int state = vgic_dist_irq_is_pending(vcpu, irq);
+ return level > state;
+ } else {
+ int state = vgic_dist_irq_get_level(vcpu, irq);
+ return level != state;
+ }
+}
+
+static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
+ unsigned int irq_num, bool level)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int edge_triggered, level_triggered;
+ int enabled;
+ bool ret = true;
+
+ spin_lock(&dist->lock);
+
+ vcpu = kvm_get_vcpu(kvm, cpuid);
+ edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
+ level_triggered = !edge_triggered;
+
+ if (!vgic_validate_injection(vcpu, irq_num, level)) {
+ ret = false;
+ goto out;
+ }
+
+ if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
+ cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
+ vcpu = kvm_get_vcpu(kvm, cpuid);
+ }
+
+ kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
+
+ if (level) {
+ if (level_triggered)
+ vgic_dist_irq_set_level(vcpu, irq_num);
+ vgic_dist_irq_set_pending(vcpu, irq_num);
+ } else {
+ if (level_triggered) {
+ vgic_dist_irq_clear_level(vcpu, irq_num);
+ if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+ vgic_dist_irq_clear_pending(vcpu, irq_num);
+ } else {
+ vgic_dist_irq_clear_pending(vcpu, irq_num);
+ }
+ }
+
+ enabled = vgic_irq_is_enabled(vcpu, irq_num);
+
+ if (!enabled) {
+ ret = false;
+ goto out;
+ }
+
+ if (!vgic_can_sample_irq(vcpu, irq_num)) {
+ /*
+ * Level interrupt in progress, will be picked up
+ * when EOId.
+ */
+ ret = false;
+ goto out;
+ }
+
+ if (level) {
+ vgic_cpu_irq_set(vcpu, irq_num);
+ set_bit(cpuid, dist->irq_pending_on_cpu);
+ }
+
+out:
+ spin_unlock(&dist->lock);
+
+ return ret;
+}
+
+/**
+ * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
+ * @kvm: The VM structure pointer
+ * @cpuid: The CPU for PPIs
+ * @irq_num: The IRQ number that is assigned to the device
+ * @level: Edge-triggered: true: to trigger the interrupt
+ * false: to ignore the call
+ * Level-sensitive true: activates an interrupt
+ * false: deactivates an interrupt
+ *
+ * The GIC is not concerned with devices being active-LOW or active-HIGH for
+ * level-sensitive interrupts. You can think of the level parameter as 1
+ * being HIGH and 0 being LOW and all devices being active-HIGH.
+ */
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
+ bool level)
+{
+ if (likely(vgic_initialized(kvm)) &&
+ vgic_update_irq_pending(kvm, cpuid, irq_num, level))
+ vgic_kick_vcpus(kvm);
+
+ return 0;
+}
+
+static irqreturn_t vgic_maintenance_handler(int irq, void *data)
+{
+ /*
+ * We cannot rely on the vgic maintenance interrupt to be
+ * delivered synchronously. This means we can only use it to
+ * exit the VM, and we perform the handling of EOIed
+ * interrupts on the exit path (see vgic_process_maintenance).
+ */
+ return IRQ_HANDLED;
+}
+
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ kfree(vgic_cpu->pending_shared);
+ kfree(vgic_cpu->vgic_irq_lr_map);
+ vgic_cpu->pending_shared = NULL;
+ vgic_cpu->vgic_irq_lr_map = NULL;
+}
+
+static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+ vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+ vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+
+ if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+ kvm_vgic_vcpu_destroy(vcpu);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+/**
+ * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
+ * @vcpu: pointer to the vcpu struct
+ *
+ * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
+ * this vcpu and enable the VGIC for this VCPU
+ */
+static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int i;
+
+ for (i = 0; i < dist->nr_irqs; i++) {
+ if (i < VGIC_NR_PPIS)
+ vgic_bitmap_set_irq_val(&dist->irq_enabled,
+ vcpu->vcpu_id, i, 1);
+ if (i < VGIC_NR_PRIVATE_IRQS)
+ vgic_bitmap_set_irq_val(&dist->irq_cfg,
+ vcpu->vcpu_id, i, VGIC_CFG_EDGE);
+
+ vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
+ }
+
+ /*
+ * Store the number of LRs per vcpu, so we don't have to go
+ * all the way to the distributor structure to find out. Only
+ * assembly code should use this one.
+ */
+ vgic_cpu->nr_lr = vgic->nr_lr;
+
+ vgic_enable(vcpu);
+}
+
+void kvm_vgic_destroy(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vgic_vcpu_destroy(vcpu);
+
+ vgic_free_bitmap(&dist->irq_enabled);
+ vgic_free_bitmap(&dist->irq_level);
+ vgic_free_bitmap(&dist->irq_pending);
+ vgic_free_bitmap(&dist->irq_soft_pend);
+ vgic_free_bitmap(&dist->irq_queued);
+ vgic_free_bitmap(&dist->irq_cfg);
+ vgic_free_bytemap(&dist->irq_priority);
+ if (dist->irq_spi_target) {
+ for (i = 0; i < dist->nr_cpus; i++)
+ vgic_free_bitmap(&dist->irq_spi_target[i]);
+ }
+ kfree(dist->irq_sgi_sources);
+ kfree(dist->irq_spi_cpu);
+ kfree(dist->irq_spi_target);
+ kfree(dist->irq_pending_on_cpu);
+ dist->irq_sgi_sources = NULL;
+ dist->irq_spi_cpu = NULL;
+ dist->irq_spi_target = NULL;
+ dist->irq_pending_on_cpu = NULL;
+}
+
+/*
+ * Allocate and initialize the various data structures. Must be called
+ * with kvm->lock held!
+ */
+static int vgic_init_maps(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int nr_cpus, nr_irqs;
+ int ret, i;
+
+ if (dist->nr_cpus) /* Already allocated */
+ return 0;
+
+ nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
+ if (!nr_cpus) /* No vcpus? Can't be good... */
+ return -EINVAL;
+
+ /*
+ * If nobody configured the number of interrupts, use the
+ * legacy one.
+ */
+ if (!dist->nr_irqs)
+ dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
+
+ nr_irqs = dist->nr_irqs;
+
+ ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+ ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
+ ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
+
+ if (ret)
+ goto out;
+
+ dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
+ dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
+ dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
+ GFP_KERNEL);
+ dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+ GFP_KERNEL);
+ if (!dist->irq_sgi_sources ||
+ !dist->irq_spi_cpu ||
+ !dist->irq_spi_target ||
+ !dist->irq_pending_on_cpu) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
+ nr_cpus, nr_irqs);
+
+ if (ret)
+ goto out;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
+ if (ret) {
+ kvm_err("VGIC: Failed to allocate vcpu memory\n");
+ break;
+ }
+ }
+
+ for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+ vgic_set_target_reg(kvm, 0, i);
+
+out:
+ if (ret)
+ kvm_vgic_destroy(kvm);
+
+ return ret;
+}
+
+/**
+ * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
+ * @kvm: pointer to the kvm struct
+ *
+ * Map the virtual CPU interface into the VM before running any VCPUs. We
+ * can't do this at creation time, because user space must first set the
+ * virtual CPU interface address in the guest physical address space. Also
+ * initialize the ITARGETSRn regs to 0 on the emulated distributor.
+ */
+int kvm_vgic_init(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int ret = 0, i;
+
+ if (!irqchip_in_kernel(kvm))
+ return 0;
+
+ mutex_lock(&kvm->lock);
+
+ if (vgic_initialized(kvm))
+ goto out;
+
+ if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
+ IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
+ kvm_err("Need to set vgic cpu and dist addresses first\n");
+ ret = -ENXIO;
+ goto out;
+ }
+
+ ret = vgic_init_maps(kvm);
+ if (ret) {
+ kvm_err("Unable to allocate maps\n");
+ goto out;
+ }
+
+ ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
+ vgic->vcpu_base, KVM_VGIC_V2_CPU_SIZE);
+ if (ret) {
+ kvm_err("Unable to remap VGIC CPU to VCPU\n");
+ goto out;
+ }
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vgic_vcpu_init(vcpu);
+
+ kvm->arch.vgic.ready = true;
+out:
+ if (ret)
+ kvm_vgic_destroy(kvm);
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+int kvm_vgic_create(struct kvm *kvm)
+{
+ int i, vcpu_lock_idx = -1, ret = 0;
+ struct kvm_vcpu *vcpu;
+
+ mutex_lock(&kvm->lock);
+
+ if (kvm->arch.vgic.vctrl_base) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ /*
+ * Any time a vcpu is run, vcpu_load is called which tries to grab the
+ * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
+ * that no other VCPUs are run while we create the vgic.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!mutex_trylock(&vcpu->mutex))
+ goto out_unlock;
+ vcpu_lock_idx = i;
+ }
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.has_run_once) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ }
+
+ spin_lock_init(&kvm->arch.vgic.lock);
+ kvm->arch.vgic.in_kernel = true;
+ kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
+ kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
+ kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+
+out_unlock:
+ for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+ vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+ mutex_unlock(&vcpu->mutex);
+ }
+
+out:
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+static int vgic_ioaddr_overlap(struct kvm *kvm)
+{
+ phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
+ phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
+
+ if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
+ return 0;
+ if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
+ (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
+ return -EBUSY;
+ return 0;
+}
+
+static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
+ phys_addr_t addr, phys_addr_t size)
+{
+ int ret;
+
+ if (addr & ~KVM_PHYS_MASK)
+ return -E2BIG;
+
+ if (addr & (SZ_4K - 1))
+ return -EINVAL;
+
+ if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
+ return -EEXIST;
+ if (addr + size < addr)
+ return -EINVAL;
+
+ *ioaddr = addr;
+ ret = vgic_ioaddr_overlap(kvm);
+ if (ret)
+ *ioaddr = VGIC_ADDR_UNDEF;
+
+ return ret;
+}
+
+/**
+ * kvm_vgic_addr - set or get vgic VM base addresses
+ * @kvm: pointer to the vm struct
+ * @type: the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX
+ * @addr: pointer to address value
+ * @write: if true set the address in the VM address space, if false read the
+ * address
+ *
+ * Set or get the vgic base addresses for the distributor and the virtual CPU
+ * interface in the VM physical address space. These addresses are properties
+ * of the emulated core/SoC and therefore user space initially knows this
+ * information.
+ */
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
+{
+ int r = 0;
+ struct vgic_dist *vgic = &kvm->arch.vgic;
+
+ mutex_lock(&kvm->lock);
+ switch (type) {
+ case KVM_VGIC_V2_ADDR_TYPE_DIST:
+ if (write) {
+ r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
+ *addr, KVM_VGIC_V2_DIST_SIZE);
+ } else {
+ *addr = vgic->vgic_dist_base;
+ }
+ break;
+ case KVM_VGIC_V2_ADDR_TYPE_CPU:
+ if (write) {
+ r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
+ *addr, KVM_VGIC_V2_CPU_SIZE);
+ } else {
+ *addr = vgic->vgic_cpu_base;
+ }
+ break;
+ default:
+ r = -ENODEV;
+ }
+
+ mutex_unlock(&kvm->lock);
+ return r;
+}
+
+static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+ bool updated = false;
+ struct vgic_vmcr vmcr;
+ u32 *vmcr_field;
+ u32 reg;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+
+ switch (offset & ~0x3) {
+ case GIC_CPU_CTRL:
+ vmcr_field = &vmcr.ctlr;
+ break;
+ case GIC_CPU_PRIMASK:
+ vmcr_field = &vmcr.pmr;
+ break;
+ case GIC_CPU_BINPOINT:
+ vmcr_field = &vmcr.bpr;
+ break;
+ case GIC_CPU_ALIAS_BINPOINT:
+ vmcr_field = &vmcr.abpr;
+ break;
+ default:
+ BUG();
+ }
+
+ if (!mmio->is_write) {
+ reg = *vmcr_field;
+ mmio_data_write(mmio, ~0, reg);
+ } else {
+ reg = mmio_data_read(mmio, ~0);
+ if (reg != *vmcr_field) {
+ *vmcr_field = reg;
+ vgic_set_vmcr(vcpu, &vmcr);
+ updated = true;
+ }
+ }
+ return updated;
+}
+
+static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+ return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
+}
+
+static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ u32 reg;
+
+ if (mmio->is_write)
+ return false;
+
+ /* GICC_IIDR */
+ reg = (PRODUCT_ID_KVM << 20) |
+ (GICC_ARCH_VERSION_V2 << 16) |
+ (IMPLEMENTER_ARM << 0);
+ mmio_data_write(mmio, ~0, reg);
+ return false;
+}
+
+/*
+ * CPU Interface Register accesses - these are not accessed by the VM, but by
+ * user space for saving and restoring VGIC state.
+ */
+static const struct mmio_range vgic_cpu_ranges[] = {
+ {
+ .base = GIC_CPU_CTRL,
+ .len = 12,
+ .handle_mmio = handle_cpu_mmio_misc,
+ },
+ {
+ .base = GIC_CPU_ALIAS_BINPOINT,
+ .len = 4,
+ .handle_mmio = handle_mmio_abpr,
+ },
+ {
+ .base = GIC_CPU_ACTIVEPRIO,
+ .len = 16,
+ .handle_mmio = handle_mmio_raz_wi,
+ },
+ {
+ .base = GIC_CPU_IDENT,
+ .len = 4,
+ .handle_mmio = handle_cpu_mmio_ident,
+ },
+};
+
+static int vgic_attr_regs_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ u32 *reg, bool is_write)
+{
+ const struct mmio_range *r = NULL, *ranges;
+ phys_addr_t offset;
+ int ret, cpuid, c;
+ struct kvm_vcpu *vcpu, *tmp_vcpu;
+ struct vgic_dist *vgic;
+ struct kvm_exit_mmio mmio;
+
+ offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+ cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
+ KVM_DEV_ARM_VGIC_CPUID_SHIFT;
+
+ mutex_lock(&dev->kvm->lock);
+
+ ret = vgic_init_maps(dev->kvm);
+ if (ret)
+ goto out;
+
+ if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ vcpu = kvm_get_vcpu(dev->kvm, cpuid);
+ vgic = &dev->kvm->arch.vgic;
+
+ mmio.len = 4;
+ mmio.is_write = is_write;
+ if (is_write)
+ mmio_data_write(&mmio, ~0, *reg);
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ mmio.phys_addr = vgic->vgic_dist_base + offset;
+ ranges = vgic_dist_ranges;
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+ mmio.phys_addr = vgic->vgic_cpu_base + offset;
+ ranges = vgic_cpu_ranges;
+ break;
+ default:
+ BUG();
+ }
+ r = find_matching_range(ranges, &mmio, offset);
+
+ if (unlikely(!r || !r->handle_mmio)) {
+ ret = -ENXIO;
+ goto out;
+ }
+
+
+ spin_lock(&vgic->lock);
+
+ /*
+ * Ensure that no other VCPU is running by checking the vcpu->cpu
+ * field. If no other VPCUs are running we can safely access the VGIC
+ * state, because even if another VPU is run after this point, that
+ * VCPU will not touch the vgic state, because it will block on
+ * getting the vgic->lock in kvm_vgic_sync_hwstate().
+ */
+ kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
+ if (unlikely(tmp_vcpu->cpu != -1)) {
+ ret = -EBUSY;
+ goto out_vgic_unlock;
+ }
+ }
+
+ /*
+ * Move all pending IRQs from the LRs on all VCPUs so the pending
+ * state can be properly represented in the register state accessible
+ * through this API.
+ */
+ kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
+ vgic_unqueue_irqs(tmp_vcpu);
+
+ offset -= r->base;
+ r->handle_mmio(vcpu, &mmio, offset);
+
+ if (!is_write)
+ *reg = mmio_data_read(&mmio, ~0);
+
+ ret = 0;
+out_vgic_unlock:
+ spin_unlock(&vgic->lock);
+out:
+ mutex_unlock(&dev->kvm->lock);
+ return ret;
+}
+
+static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ u64 addr;
+ unsigned long type = (unsigned long)attr->attr;
+
+ if (copy_from_user(&addr, uaddr, sizeof(addr)))
+ return -EFAULT;
+
+ r = kvm_vgic_addr(dev->kvm, type, &addr, true);
+ return (r == -ENODEV) ? -ENXIO : r;
+ }
+
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 reg;
+
+ if (get_user(reg, uaddr))
+ return -EFAULT;
+
+ return vgic_attr_regs_access(dev, attr, &reg, true);
+ }
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 val;
+ int ret = 0;
+
+ if (get_user(val, uaddr))
+ return -EFAULT;
+
+ /*
+ * We require:
+ * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
+ * - at most 1024 interrupts
+ * - a multiple of 32 interrupts
+ */
+ if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
+ val > VGIC_MAX_IRQS ||
+ (val & 31))
+ return -EINVAL;
+
+ mutex_lock(&dev->kvm->lock);
+
+ if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+ ret = -EBUSY;
+ else
+ dev->kvm->arch.vgic.nr_irqs = val;
+
+ mutex_unlock(&dev->kvm->lock);
+
+ return ret;
+ }
+
+ }
+
+ return -ENXIO;
+}
+
+static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ u64 addr;
+ unsigned long type = (unsigned long)attr->attr;
+
+ r = kvm_vgic_addr(dev->kvm, type, &addr, false);
+ if (r)
+ return (r == -ENODEV) ? -ENXIO : r;
+
+ if (copy_to_user(uaddr, &addr, sizeof(addr)))
+ return -EFAULT;
+ break;
+ }
+
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 reg = 0;
+
+ r = vgic_attr_regs_access(dev, attr, &reg, false);
+ if (r)
+ return r;
+ r = put_user(reg, uaddr);
+ break;
+ }
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
+ break;
+ }
+
+ }
+
+ return r;
+}
+
+static int vgic_has_attr_regs(const struct mmio_range *ranges,
+ phys_addr_t offset)
+{
+ struct kvm_exit_mmio dev_attr_mmio;
+
+ dev_attr_mmio.len = 4;
+ if (find_matching_range(ranges, &dev_attr_mmio, offset))
+ return 0;
+ else
+ return -ENXIO;
+}
+
+static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ phys_addr_t offset;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR:
+ switch (attr->attr) {
+ case KVM_VGIC_V2_ADDR_TYPE_DIST:
+ case KVM_VGIC_V2_ADDR_TYPE_CPU:
+ return 0;
+ }
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+ return vgic_has_attr_regs(vgic_dist_ranges, offset);
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+ offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+ return vgic_has_attr_regs(vgic_cpu_ranges, offset);
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+ return 0;
+ }
+ return -ENXIO;
+}
+
+static void vgic_destroy(struct kvm_device *dev)
+{
+ kfree(dev);
+}
+
+static int vgic_create(struct kvm_device *dev, u32 type)
+{
+ return kvm_vgic_create(dev->kvm);
+}
+
+static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+ .name = "kvm-arm-vgic",
+ .create = vgic_create,
+ .destroy = vgic_destroy,
+ .set_attr = vgic_set_attr,
+ .get_attr = vgic_get_attr,
+ .has_attr = vgic_has_attr,
+};
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+ enable_percpu_irq(vgic->maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *cpu)
+{
+ switch (action) {
+ case CPU_STARTING:
+ case CPU_STARTING_FROZEN:
+ vgic_init_maintenance_interrupt(NULL);
+ break;
+ case CPU_DYING:
+ case CPU_DYING_FROZEN:
+ disable_percpu_irq(vgic->maint_irq);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+ .notifier_call = vgic_cpu_notify,
+};
+
+static const struct of_device_id vgic_ids[] = {
+ { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+ { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+ {},
+};
+
+int kvm_vgic_hyp_init(void)
+{
+ const struct of_device_id *matched_id;
+ int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+ const struct vgic_params **);
+ struct device_node *vgic_node;
+ int ret;
+
+ vgic_node = of_find_matching_node_and_match(NULL,
+ vgic_ids, &matched_id);
+ if (!vgic_node) {
+ kvm_err("error: no compatible GIC node found\n");
+ return -ENODEV;
+ }
+
+ vgic_probe = matched_id->data;
+ ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+ if (ret)
+ return ret;
+
+ ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
+ "vgic", kvm_get_running_vcpus());
+ if (ret) {
+ kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+ return ret;
+ }
+
+ ret = register_cpu_notifier(&vgic_cpu_nb);
+ if (ret) {
+ kvm_err("Cannot register vgic CPU notifier\n");
+ goto out_free_irq;
+ }
+
+ /* Callback into for arch code for setup */
+ vgic_arch_setup(vgic);
+
+ on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+ return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
+ KVM_DEV_TYPE_ARM_VGIC_V2);
+
+out_free_irq:
+ free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
+ return ret;
+}
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index ea475cd03511..d6a3d0993d88 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -28,6 +28,21 @@
#include "async_pf.h"
#include <trace/events/kvm.h>
+static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+ kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+#ifndef CONFIG_KVM_ASYNC_PF_SYNC
+ kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+
static struct kmem_cache *async_pf_cache;
int kvm_async_pf_init(void)
@@ -56,7 +71,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu)
static void async_pf_execute(struct work_struct *work)
{
- struct page *page = NULL;
struct kvm_async_pf *apf =
container_of(work, struct kvm_async_pf, work);
struct mm_struct *mm = apf->mm;
@@ -66,16 +80,13 @@ static void async_pf_execute(struct work_struct *work)
might_sleep();
- use_mm(mm);
down_read(&mm->mmap_sem);
- get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL);
+ get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
up_read(&mm->mmap_sem);
- unuse_mm(mm);
+ kvm_async_page_present_sync(vcpu, apf);
spin_lock(&vcpu->async_pf.lock);
list_add_tail(&apf->link, &vcpu->async_pf.done);
- apf->page = page;
- apf->done = true;
spin_unlock(&vcpu->async_pf.lock);
/*
@@ -83,12 +94,12 @@ static void async_pf_execute(struct work_struct *work)
* this point
*/
- trace_kvm_async_pf_completed(addr, page, gva);
+ trace_kvm_async_pf_completed(addr, gva);
if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq);
- mmdrop(mm);
+ mmput(mm);
kvm_put_kvm(vcpu->kvm);
}
@@ -99,10 +110,17 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
struct kvm_async_pf *work =
list_entry(vcpu->async_pf.queue.next,
typeof(*work), queue);
- cancel_work_sync(&work->work);
list_del(&work->queue);
- if (!work->done) /* work was canceled */
+
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+ flush_work(&work->work);
+#else
+ if (cancel_work_sync(&work->work)) {
+ mmput(work->mm);
+ kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
kmem_cache_free(async_pf_cache, work);
+ }
+#endif
}
spin_lock(&vcpu->async_pf.lock);
@@ -111,8 +129,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
list_entry(vcpu->async_pf.done.next,
typeof(*work), link);
list_del(&work->link);
- if (!is_error_page(work->page))
- kvm_release_page_clean(work->page);
kmem_cache_free(async_pf_cache, work);
}
spin_unlock(&vcpu->async_pf.lock);
@@ -132,19 +148,16 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
list_del(&work->link);
spin_unlock(&vcpu->async_pf.lock);
- if (work->page)
- kvm_arch_async_page_ready(vcpu, work);
- kvm_arch_async_page_present(vcpu, work);
+ kvm_arch_async_page_ready(vcpu, work);
+ kvm_async_page_present_async(vcpu, work);
list_del(&work->queue);
vcpu->async_pf.queued--;
- if (!is_error_page(work->page))
- kvm_release_page_clean(work->page);
kmem_cache_free(async_pf_cache, work);
}
}
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
struct kvm_arch_async_pf *arch)
{
struct kvm_async_pf *work;
@@ -162,14 +175,13 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
if (!work)
return 0;
- work->page = NULL;
- work->done = false;
+ work->wakeup_all = false;
work->vcpu = vcpu;
work->gva = gva;
- work->addr = gfn_to_hva(vcpu->kvm, gfn);
+ work->addr = hva;
work->arch = *arch;
work->mm = current->mm;
- atomic_inc(&work->mm->mm_count);
+ atomic_inc(&work->mm->mm_users);
kvm_get_kvm(work->vcpu->kvm);
/* this can't really happen otherwise gfn_to_pfn_async
@@ -187,7 +199,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
return 1;
retry_sync:
kvm_put_kvm(work->vcpu->kvm);
- mmdrop(work->mm);
+ mmput(work->mm);
kmem_cache_free(async_pf_cache, work);
return 0;
}
@@ -203,7 +215,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu)
if (!work)
return -ENOMEM;
- work->page = KVM_ERR_PTR_BAD_PAGE;
+ work->wakeup_all = true;
INIT_LIST_HEAD(&work->queue); /* for list_del to work */
spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 64ee720b75c7..71ed39941b9c 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -31,11 +31,14 @@
#include <linux/list.h>
#include <linux/eventfd.h>
#include <linux/kernel.h>
+#include <linux/srcu.h>
#include <linux/slab.h>
+#include <linux/seqlock.h>
+#include <trace/events/kvm.h>
#include "iodev.h"
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
/*
* --------------------------------------------------------------------
* irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -74,7 +77,8 @@ struct _irqfd {
struct kvm *kvm;
wait_queue_t wait;
/* Update side is protected by irqfds.lock */
- struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
+ struct kvm_kernel_irq_routing_entry irq_entry;
+ seqcount_t irq_entry_sc;
/* Used for level IRQ fast-path */
int gsi;
struct work_struct inject;
@@ -118,19 +122,22 @@ static void
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
{
struct _irqfd_resampler *resampler;
+ struct kvm *kvm;
struct _irqfd *irqfd;
+ int idx;
resampler = container_of(kian, struct _irqfd_resampler, notifier);
+ kvm = resampler->kvm;
- kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+ kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
resampler->notifier.gsi, 0, false);
- rcu_read_lock();
+ idx = srcu_read_lock(&kvm->irq_srcu);
list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
eventfd_signal(irqfd->resamplefd, 1);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
}
static void
@@ -142,7 +149,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
mutex_lock(&kvm->irqfds.resampler_lock);
list_del_rcu(&irqfd->resampler_link);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_srcu);
if (list_empty(&resampler->list)) {
list_del(&resampler->link);
@@ -219,19 +226,24 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
unsigned long flags = (unsigned long)key;
- struct kvm_kernel_irq_routing_entry *irq;
+ struct kvm_kernel_irq_routing_entry irq;
struct kvm *kvm = irqfd->kvm;
+ unsigned seq;
+ int idx;
if (flags & POLLIN) {
- rcu_read_lock();
- irq = rcu_dereference(irqfd->irq_entry);
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ do {
+ seq = read_seqcount_begin(&irqfd->irq_entry_sc);
+ irq = irqfd->irq_entry;
+ } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
/* An event has been signaled, inject an interrupt */
- if (irq)
- kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+ if (irq.type == KVM_IRQ_ROUTING_MSI)
+ kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
false);
else
schedule_work(&irqfd->inject);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
}
if (flags & POLLHUP) {
@@ -267,34 +279,37 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
}
/* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
- struct kvm_irq_routing_table *irq_rt)
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
{
struct kvm_kernel_irq_routing_entry *e;
+ struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+ int i, n_entries;
- if (irqfd->gsi >= irq_rt->nr_rt_entries) {
- rcu_assign_pointer(irqfd->irq_entry, NULL);
- return;
- }
+ n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
+
+ write_seqcount_begin(&irqfd->irq_entry_sc);
+
+ irqfd->irq_entry.type = 0;
- hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) {
+ e = entries;
+ for (i = 0; i < n_entries; ++i, ++e) {
/* Only fast-path MSI. */
if (e->type == KVM_IRQ_ROUTING_MSI)
- rcu_assign_pointer(irqfd->irq_entry, e);
- else
- rcu_assign_pointer(irqfd->irq_entry, NULL);
+ irqfd->irq_entry = *e;
}
+
+ write_seqcount_end(&irqfd->irq_entry_sc);
}
static int
kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
{
- struct kvm_irq_routing_table *irq_rt;
struct _irqfd *irqfd, *tmp;
struct file *file = NULL;
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
int ret;
unsigned int events;
+ int idx;
irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
if (!irqfd)
@@ -305,6 +320,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
INIT_LIST_HEAD(&irqfd->list);
INIT_WORK(&irqfd->inject, irqfd_inject);
INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
+ seqcount_init(&irqfd->irq_entry_sc);
file = eventfd_fget(args->fd);
if (IS_ERR(file)) {
@@ -363,7 +379,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
}
list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_srcu);
mutex_unlock(&kvm->irqfds.resampler_lock);
}
@@ -387,9 +403,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
goto fail;
}
- irq_rt = rcu_dereference_protected(kvm->irq_routing,
- lockdep_is_held(&kvm->irqfds.lock));
- irqfd_update(kvm, irqfd, irq_rt);
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irqfd_update(kvm, irqfd);
+ srcu_read_unlock(&kvm->irq_srcu, idx);
events = file->f_op->poll(file, &irqfd->pt);
@@ -428,12 +444,73 @@ fail:
kfree(irqfd);
return ret;
}
+
+bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+ struct kvm_irq_ack_notifier *kian;
+ int gsi, idx;
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+ if (gsi != -1)
+ hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+ link)
+ if (kian->gsi == gsi) {
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+ return true;
+ }
+
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+ struct kvm_irq_ack_notifier *kian;
+ int gsi, idx;
+
+ trace_kvm_ack_irq(irqchip, pin);
+
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+ if (gsi != -1)
+ hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+ link)
+ if (kian->gsi == gsi)
+ kian->irq_acked(kian);
+ srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+void kvm_register_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian)
+{
+ mutex_lock(&kvm->irq_lock);
+ hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
+ mutex_unlock(&kvm->irq_lock);
+#ifdef __KVM_HAVE_IOAPIC
+ kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
+
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian)
+{
+ mutex_lock(&kvm->irq_lock);
+ hlist_del_init_rcu(&kian->link);
+ mutex_unlock(&kvm->irq_lock);
+ synchronize_srcu(&kvm->irq_srcu);
+#ifdef __KVM_HAVE_IOAPIC
+ kvm_vcpu_request_scan_ioapic(kvm);
+#endif
+}
#endif
void
kvm_eventfd_init(struct kvm *kvm)
{
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
spin_lock_init(&kvm->irqfds.lock);
INIT_LIST_HEAD(&kvm->irqfds.items);
INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
@@ -442,7 +519,7 @@ kvm_eventfd_init(struct kvm *kvm)
INIT_LIST_HEAD(&kvm->ioeventfds);
}
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+#ifdef CONFIG_HAVE_KVM_IRQFD
/*
* shutdown any irqfd's that match fd+gsi
*/
@@ -461,14 +538,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
/*
- * This rcu_assign_pointer is needed for when
+ * This clearing of irq_entry.type is needed for when
* another thread calls kvm_irq_routing_update before
* we flush workqueue below (we synchronize with
* kvm_irq_routing_update using irqfds.lock).
- * It is paired with synchronize_rcu done by caller
- * of that function.
*/
- rcu_assign_pointer(irqfd->irq_entry, NULL);
+ write_seqcount_begin(&irqfd->irq_entry_sc);
+ irqfd->irq_entry.type = 0;
+ write_seqcount_end(&irqfd->irq_entry_sc);
irqfd_deactivate(irqfd);
}
}
@@ -523,20 +600,17 @@ kvm_irqfd_release(struct kvm *kvm)
}
/*
- * Change irq_routing and irqfd.
- * Caller must invoke synchronize_rcu afterwards.
+ * Take note of a change in irq routing.
+ * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
*/
-void kvm_irq_routing_update(struct kvm *kvm,
- struct kvm_irq_routing_table *irq_rt)
+void kvm_irq_routing_update(struct kvm *kvm)
{
struct _irqfd *irqfd;
spin_lock_irq(&kvm->irqfds.lock);
- rcu_assign_pointer(kvm->irq_routing, irq_rt);
-
list_for_each_entry(irqfd, &kvm->irqfds.items, list)
- irqfd_update(kvm, irqfd, irq_rt);
+ irqfd_update(kvm, irqfd);
spin_unlock_irq(&kvm->irqfds.lock);
}
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 5eaf18f90e83..b47541dd798f 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -519,7 +519,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
return 0;
}
-void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
+static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
{
int i;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 615d8c995c3c..90d43e95dcf8 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -91,7 +91,6 @@ void kvm_ioapic_destroy(struct kvm *kvm);
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
int level, bool line_status);
void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
-void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, unsigned long *dest_map);
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index dec997188dfb..7dd680a1f3bd 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -203,11 +203,7 @@ int kvm_assign_device(struct kvm *kvm,
pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
- printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
- assigned_dev->host_segnr,
- assigned_dev->host_busnr,
- PCI_SLOT(assigned_dev->host_devfn),
- PCI_FUNC(assigned_dev->host_devfn));
+ dev_info(&pdev->dev, "kvm assign device\n");
return 0;
out_unmap:
@@ -233,11 +229,7 @@ int kvm_deassign_device(struct kvm *kvm,
pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
- printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
- assigned_dev->host_segnr,
- assigned_dev->host_busnr,
- PCI_SLOT(assigned_dev->host_devfn),
- PCI_FUNC(assigned_dev->host_devfn));
+ dev_info(&pdev->dev, "kvm deassign device\n");
return 0;
}
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e2e6b4473a96..963b8995a9e8 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,9 +160,10 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
*/
int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
{
+ struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
struct kvm_kernel_irq_routing_entry *e;
int ret = -EINVAL;
- struct kvm_irq_routing_table *irq_rt;
+ int idx;
trace_kvm_set_irq(irq, level, irq_source_id);
@@ -174,17 +175,15 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
* Since there's no easy way to do this, we only support injecting MSI
* which is limited to 1:1 GSI mapping.
*/
- rcu_read_lock();
- irq_rt = rcu_dereference(kvm->irq_routing);
- if (irq < irq_rt->nr_rt_entries)
- hlist_for_each_entry(e, &irq_rt->map[irq], link) {
- if (likely(e->type == KVM_IRQ_ROUTING_MSI))
- ret = kvm_set_msi_inatomic(e, kvm);
- else
- ret = -EWOULDBLOCK;
- break;
- }
- rcu_read_unlock();
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
+ e = &entries[0];
+ if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+ ret = kvm_set_msi_inatomic(e, kvm);
+ else
+ ret = -EWOULDBLOCK;
+ }
+ srcu_read_unlock(&kvm->irq_srcu, idx);
return ret;
}
@@ -253,26 +252,25 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
mutex_lock(&kvm->irq_lock);
hlist_del_rcu(&kimn->link);
mutex_unlock(&kvm->irq_lock);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_srcu);
}
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
bool mask)
{
struct kvm_irq_mask_notifier *kimn;
- int gsi;
+ int idx, gsi;
- rcu_read_lock();
- gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
if (gsi != -1)
hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
if (kimn->irq == gsi)
kimn->func(kimn, mask);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
}
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
- struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int r = -EINVAL;
@@ -303,7 +301,6 @@ int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
e->irqchip.pin = ue->u.irqchip.pin + delta;
if (e->irqchip.pin >= max_pin)
goto out;
- rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
break;
case KVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
@@ -322,13 +319,13 @@ out:
#define IOAPIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
- .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) }
+ .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
#ifdef CONFIG_X86
# define PIC_ROUTING_ENTRY(irq) \
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
- .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 }
+ .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
# define ROUTING_ENTRY2(irq) \
IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
#else
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 20dc9e4a8f6c..7f256f31df10 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -26,69 +26,47 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
+#include <linux/srcu.h>
#include <linux/export.h>
#include <trace/events/kvm.h>
#include "irq.h"
-bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
-{
- struct kvm_irq_ack_notifier *kian;
- int gsi;
-
- rcu_read_lock();
- gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
- if (gsi != -1)
- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
- link)
- if (kian->gsi == gsi) {
- rcu_read_unlock();
- return true;
- }
-
- rcu_read_unlock();
-
- return false;
-}
-EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
+struct kvm_irq_routing_table {
+ int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
+ struct kvm_kernel_irq_routing_entry *rt_entries;
+ u32 nr_rt_entries;
+ /*
+ * Array indexed by gsi. Each entry contains list of irq chips
+ * the gsi is connected to.
+ */
+ struct hlist_head map[0];
+};
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+int kvm_irq_map_gsi(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *entries, int gsi)
{
- struct kvm_irq_ack_notifier *kian;
- int gsi;
-
- trace_kvm_ack_irq(irqchip, pin);
-
- rcu_read_lock();
- gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
- if (gsi != -1)
- hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
- link)
- if (kian->gsi == gsi)
- kian->irq_acked(kian);
- rcu_read_unlock();
-}
+ struct kvm_irq_routing_table *irq_rt;
+ struct kvm_kernel_irq_routing_entry *e;
+ int n = 0;
+
+ irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
+ lockdep_is_held(&kvm->irq_lock));
+ if (gsi < irq_rt->nr_rt_entries) {
+ hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
+ entries[n] = *e;
+ ++n;
+ }
+ }
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
- struct kvm_irq_ack_notifier *kian)
-{
- mutex_lock(&kvm->irq_lock);
- hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
- mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
- kvm_vcpu_request_scan_ioapic(kvm);
-#endif
+ return n;
}
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
- struct kvm_irq_ack_notifier *kian)
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
- mutex_lock(&kvm->irq_lock);
- hlist_del_init_rcu(&kian->link);
- mutex_unlock(&kvm->irq_lock);
- synchronize_rcu();
-#ifdef __KVM_HAVE_IOAPIC
- kvm_vcpu_request_scan_ioapic(kvm);
-#endif
+ struct kvm_irq_routing_table *irq_rt;
+
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
+ return irq_rt->chip[irqchip][pin];
}
int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
@@ -114,9 +92,8 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
- struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
- int ret = -1, i = 0;
- struct kvm_irq_routing_table *irq_rt;
+ struct kvm_kernel_irq_routing_entry irq_set[KVM_NR_IRQCHIPS];
+ int ret = -1, i, idx;
trace_kvm_set_irq(irq, level, irq_source_id);
@@ -124,12 +101,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
- rcu_read_lock();
- irq_rt = rcu_dereference(kvm->irq_routing);
- if (irq < irq_rt->nr_rt_entries)
- hlist_for_each_entry(e, &irq_rt->map[irq], link)
- irq_set[i++] = *e;
- rcu_read_unlock();
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ i = kvm_irq_map_gsi(kvm, irq_set, irq);
+ srcu_read_unlock(&kvm->irq_srcu, idx);
while(i--) {
int r;
@@ -170,9 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
e->gsi = ue->gsi;
e->type = ue->type;
- r = kvm_set_routing_entry(rt, e, ue);
+ r = kvm_set_routing_entry(e, ue);
if (r)
goto out;
+ if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
+ rt->chip[e->irqchip.irqchip][e->irqchip.pin] = e->gsi;
hlist_add_head(&e->link, &rt->map[e->gsi]);
r = 0;
@@ -223,10 +199,11 @@ int kvm_set_irq_routing(struct kvm *kvm,
mutex_lock(&kvm->irq_lock);
old = kvm->irq_routing;
- kvm_irq_routing_update(kvm, new);
+ rcu_assign_pointer(kvm->irq_routing, new);
+ kvm_irq_routing_update(kvm);
mutex_unlock(&kvm->irq_lock);
- synchronize_rcu();
+ synchronize_srcu_expedited(&kvm->irq_srcu);
new = old;
r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8cf1cd2fadaa..9cae94206f41 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -70,7 +70,8 @@ MODULE_LICENSE("GPL");
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
-DEFINE_RAW_SPINLOCK(kvm_lock);
+DEFINE_SPINLOCK(kvm_lock);
+static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
static cpumask_var_t cpus_hardware_enabled;
@@ -94,36 +95,22 @@ static int hardware_enable_all(void);
static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
+static void update_memslots(struct kvm_memslots *slots,
+ struct kvm_memory_slot *new, u64 last_generation);
-bool kvm_rebooting;
+static void kvm_release_pfn_dirty(pfn_t pfn);
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot, gfn_t gfn);
+
+__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
static bool largepages_enabled = true;
bool kvm_is_mmio_pfn(pfn_t pfn)
{
- if (pfn_valid(pfn)) {
- int reserved;
- struct page *tail = pfn_to_page(pfn);
- struct page *head = compound_head(tail);
- reserved = PageReserved(head);
- if (head != tail) {
- /*
- * "head" is not a dangling pointer
- * (compound_head takes care of that)
- * but the hugepage may have been splitted
- * from under us (and we may not hold a
- * reference count on the head page so it can
- * be reused before we run PageReferenced), so
- * we've to check PageTail before returning
- * what we just read.
- */
- smp_rmb();
- if (PageTail(tail))
- return reserved;
- }
- return PageReserved(tail);
- }
+ if (pfn_valid(pfn))
+ return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
return true;
}
@@ -142,7 +129,8 @@ int vcpu_load(struct kvm_vcpu *vcpu)
struct pid *oldpid = vcpu->pid;
struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
rcu_assign_pointer(vcpu->pid, newpid);
- synchronize_rcu();
+ if (oldpid)
+ synchronize_rcu();
put_pid(oldpid);
}
cpu = get_cpu();
@@ -469,14 +457,16 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = kvm_arch_init_vm(kvm, type);
if (r)
- goto out_err_nodisable;
+ goto out_err_no_disable;
r = hardware_enable_all();
if (r)
- goto out_err_nodisable;
+ goto out_err_no_disable;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
#endif
@@ -485,10 +475,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = -ENOMEM;
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!kvm->memslots)
- goto out_err_nosrcu;
+ goto out_err_no_srcu;
kvm_init_memslots_id(kvm);
if (init_srcu_struct(&kvm->srcu))
- goto out_err_nosrcu;
+ goto out_err_no_srcu;
+ if (init_srcu_struct(&kvm->irq_srcu))
+ goto out_err_no_irq_srcu;
for (i = 0; i < KVM_NR_BUSES; i++) {
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
GFP_KERNEL);
@@ -510,17 +502,19 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (r)
goto out_err;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return kvm;
out_err:
+ cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
cleanup_srcu_struct(&kvm->srcu);
-out_err_nosrcu:
+out_err_no_srcu:
hardware_disable_all();
-out_err_nodisable:
+out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
kfree(kvm->memslots);
@@ -560,24 +554,24 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
/*
* Free any memory in @free but not in @dont.
*/
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
kvm_destroy_dirty_bitmap(free);
- kvm_arch_free_memslot(free, dont);
+ kvm_arch_free_memslot(kvm, free, dont);
free->npages = 0;
}
-void kvm_free_physmem(struct kvm *kvm)
+static void kvm_free_physmem(struct kvm *kvm)
{
struct kvm_memslots *slots = kvm->memslots;
struct kvm_memory_slot *memslot;
kvm_for_each_memslot(memslot, slots)
- kvm_free_physmem_slot(memslot, NULL);
+ kvm_free_physmem_slot(kvm, memslot, NULL);
kfree(kvm->memslots);
}
@@ -601,9 +595,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
struct mm_struct *mm = kvm->mm;
kvm_arch_sync_events(kvm);
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++)
kvm_io_bus_destroy(kvm->buses[i]);
@@ -616,6 +610,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
kvm_free_physmem(kvm);
+ cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
hardware_disable_all();
@@ -693,8 +688,9 @@ static void sort_memslots(struct kvm_memslots *slots)
slots->id_to_index[slots->memslots[i].id] = i;
}
-void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new,
- u64 last_generation)
+static void update_memslots(struct kvm_memslots *slots,
+ struct kvm_memory_slot *new,
+ u64 last_generation)
{
if (new) {
int id = new->id;
@@ -713,7 +709,7 @@ static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
{
u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
-#ifdef KVM_CAP_READONLY_MEM
+#ifdef __KVM_HAVE_READONLY_MEM
valid_flags |= KVM_MEM_READONLY;
#endif
@@ -731,7 +727,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
update_memslots(slots, new, kvm->memslots->generation);
rcu_assign_pointer(kvm->memslots, slots);
synchronize_srcu_expedited(&kvm->srcu);
- return old_memslots;
+
+ kvm_arch_memslots_updated(kvm);
+
+ return old_memslots;
}
/*
@@ -779,7 +778,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
- r = -EINVAL;
if (npages > KVM_MEM_MAX_NR_PAGES)
goto out;
@@ -793,7 +791,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
new.npages = npages;
new.flags = mem->flags;
- r = -EINVAL;
if (npages) {
if (!old.npages)
change = KVM_MR_CREATE;
@@ -838,7 +835,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (change == KVM_MR_CREATE) {
new.userspace_addr = mem->userspace_addr;
- if (kvm_arch_create_memslot(&new, npages))
+ if (kvm_arch_create_memslot(kvm, &new, npages))
goto out_free;
}
@@ -849,7 +846,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
}
if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
- r = -ENOMEM;
slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
GFP_KERNEL);
if (!slots)
@@ -889,6 +885,19 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_free;
}
+ /* actual memory is freed via old in kvm_free_physmem_slot below */
+ if (change == KVM_MR_DELETE) {
+ new.dirty_bitmap = NULL;
+ memset(&new.arch, 0, sizeof(new.arch));
+ }
+
+ old_memslots = install_new_memslots(kvm, slots, &new);
+
+ kvm_arch_commit_memory_region(kvm, mem, &old, change);
+
+ kvm_free_physmem_slot(kvm, &old, &new);
+ kfree(old_memslots);
+
/*
* IOMMU mapping: New slots need to be mapped. Old slots need to be
* un-mapped and re-mapped if their base changes. Since base change
@@ -900,29 +909,15 @@ int __kvm_set_memory_region(struct kvm *kvm,
*/
if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
r = kvm_iommu_map_pages(kvm, &new);
- if (r)
- goto out_slots;
- }
-
- /* actual memory is freed via old in kvm_free_physmem_slot below */
- if (change == KVM_MR_DELETE) {
- new.dirty_bitmap = NULL;
- memset(&new.arch, 0, sizeof(new.arch));
+ return r;
}
- old_memslots = install_new_memslots(kvm, slots, &new);
-
- kvm_arch_commit_memory_region(kvm, mem, &old, change);
-
- kvm_free_physmem_slot(&old, &new);
- kfree(old_memslots);
-
return 0;
out_slots:
kfree(slots);
out_free:
- kvm_free_physmem_slot(&new, &old);
+ kvm_free_physmem_slot(kvm, &new, &old);
out:
return r;
}
@@ -940,8 +935,8 @@ int kvm_set_memory_region(struct kvm *kvm,
}
EXPORT_SYMBOL_GPL(kvm_set_memory_region);
-int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem)
+static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem)
{
if (mem->slot >= KVM_USER_MEM_SLOTS)
return -EINVAL;
@@ -1062,7 +1057,7 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
}
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
- gfn_t gfn)
+ gfn_t gfn)
{
return gfn_to_hva_many(slot, gfn, NULL);
}
@@ -1075,12 +1070,25 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
EXPORT_SYMBOL_GPL(gfn_to_hva);
/*
- * The hva returned by this function is only allowed to be read.
- * It should pair with kvm_read_hva() or kvm_read_hva_atomic().
+ * If writable is set to false, the hva returned by this function is only
+ * allowed to be read.
*/
-static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+ gfn_t gfn, bool *writable)
{
- return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
+ unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
+
+ if (!kvm_is_error_hva(hva) && writable)
+ *writable = !memslot_is_readonly(slot);
+
+ return hva;
+}
+
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+{
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+ return gfn_to_hva_memslot_prot(slot, gfn, writable);
}
static int kvm_read_hva(void *data, void __user *hva, int len)
@@ -1396,18 +1404,11 @@ void kvm_release_page_dirty(struct page *page)
}
EXPORT_SYMBOL_GPL(kvm_release_page_dirty);
-void kvm_release_pfn_dirty(pfn_t pfn)
+static void kvm_release_pfn_dirty(pfn_t pfn)
{
kvm_set_pfn_dirty(pfn);
kvm_release_pfn_clean(pfn);
}
-EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
-
-void kvm_set_page_dirty(struct page *page)
-{
- kvm_set_pfn_dirty(page_to_pfn(page));
-}
-EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
void kvm_set_pfn_dirty(pfn_t pfn)
{
@@ -1447,7 +1448,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int r;
unsigned long addr;
- addr = gfn_to_hva_read(kvm, gfn);
+ addr = gfn_to_hva_prot(kvm, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
r = kvm_read_hva(data, (void __user *)addr + offset, len);
@@ -1485,7 +1486,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
gfn_t gfn = gpa >> PAGE_SHIFT;
int offset = offset_in_page(gpa);
- addr = gfn_to_hva_read(kvm, gfn);
+ addr = gfn_to_hva_prot(kvm, gfn, NULL);
if (kvm_is_error_hva(addr))
return -EFAULT;
pagefault_disable();
@@ -1624,8 +1625,9 @@ EXPORT_SYMBOL_GPL(kvm_read_guest_cached);
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
{
- return kvm_write_guest_page(kvm, gfn, (const void *) empty_zero_page,
- offset, len);
+ const void *zero_page = (const void *) __va(page_to_phys(ZERO_PAGE(0)));
+
+ return kvm_write_guest_page(kvm, gfn, zero_page, offset, len);
}
EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
@@ -1648,8 +1650,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
}
EXPORT_SYMBOL_GPL(kvm_clear_guest);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
- gfn_t gfn)
+static void mark_page_dirty_in_slot(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ gfn_t gfn)
{
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
@@ -1716,14 +1719,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
#endif /* !CONFIG_S390 */
-void kvm_resched(struct kvm_vcpu *vcpu)
-{
- if (!need_resched())
- return;
- cond_resched();
-}
-EXPORT_SYMBOL_GPL(kvm_resched);
-
bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
{
struct pid *pid;
@@ -1733,7 +1728,7 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
rcu_read_lock();
pid = rcu_dereference(target->pid);
if (pid)
- task = get_pid_task(target->pid, PIDTYPE_PID);
+ task = get_pid_task(pid, PIDTYPE_PID);
rcu_read_unlock();
if (!task)
return ret;
@@ -1748,7 +1743,6 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
-#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
/*
* Helper that checks whether a VCPU is eligible for directed yield.
* Most eligible candidate to yield is decided by following heuristics:
@@ -1771,20 +1765,22 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to);
* locking does not harm. It may result in trying to yield to same VCPU, fail
* and continue with next VCPU and so on.
*/
-bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
+static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
{
+#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
bool eligible;
eligible = !vcpu->spin_loop.in_spin_loop ||
- (vcpu->spin_loop.in_spin_loop &&
- vcpu->spin_loop.dy_eligible);
+ vcpu->spin_loop.dy_eligible;
if (vcpu->spin_loop.in_spin_loop)
kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible);
return eligible;
-}
+#else
+ return true;
#endif
+}
void kvm_vcpu_on_spin(struct kvm_vcpu *me)
{
@@ -1815,7 +1811,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
continue;
if (vcpu == me)
continue;
- if (waitqueue_active(&vcpu->wq))
+ if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;
@@ -1893,7 +1889,7 @@ static struct file_operations kvm_vcpu_fops = {
*/
static int create_vcpu_fd(struct kvm_vcpu *vcpu)
{
- return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
+ return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
}
/*
@@ -2265,6 +2261,29 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
return filp->private_data;
}
+static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
+#ifdef CONFIG_KVM_MPIC
+ [KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
+ [KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
+#endif
+
+#ifdef CONFIG_KVM_XICS
+ [KVM_DEV_TYPE_XICS] = &kvm_xics_ops,
+#endif
+};
+
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+{
+ if (type >= ARRAY_SIZE(kvm_device_ops_table))
+ return -ENOSPC;
+
+ if (kvm_device_ops_table[type] != NULL)
+ return -EEXIST;
+
+ kvm_device_ops_table[type] = ops;
+ return 0;
+}
+
static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd)
{
@@ -2273,21 +2292,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
int ret;
- switch (cd->type) {
-#ifdef CONFIG_KVM_MPIC
- case KVM_DEV_TYPE_FSL_MPIC_20:
- case KVM_DEV_TYPE_FSL_MPIC_42:
- ops = &kvm_mpic_ops;
- break;
-#endif
-#ifdef CONFIG_KVM_XICS
- case KVM_DEV_TYPE_XICS:
- ops = &kvm_xics_ops;
- break;
-#endif
- default:
+ if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
+ return -ENODEV;
+
+ ops = kvm_device_ops_table[cd->type];
+ if (ops == NULL)
return -ENODEV;
- }
if (test)
return 0;
@@ -2305,7 +2315,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
return ret;
}
- ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR);
+ ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
if (ret < 0) {
ops->destroy(dev);
return ret;
@@ -2317,6 +2327,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
return 0;
}
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+{
+ switch (arg) {
+ case KVM_CAP_USER_MEMORY:
+ case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+ case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+ case KVM_CAP_SET_BOOT_CPU_ID:
+#endif
+ case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+ case KVM_CAP_SIGNAL_MSI:
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQFD
+ case KVM_CAP_IRQFD_RESAMPLE:
+#endif
+ case KVM_CAP_CHECK_EXTENSION_VM:
+ return 1;
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+ case KVM_CAP_IRQ_ROUTING:
+ return KVM_MAX_IRQ_ROUTES;
+#endif
+ default:
+ break;
+ }
+ return kvm_vm_ioctl_check_extension(kvm, arg);
+}
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2480,6 +2518,9 @@ static long kvm_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_CHECK_EXTENSION:
+ r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
+ break;
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
if (r == -ENOTTY)
@@ -2533,44 +2574,12 @@ out:
}
#endif
-static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *page[1];
- unsigned long addr;
- int npages;
- gfn_t gfn = vmf->pgoff;
- struct kvm *kvm = vma->vm_file->private_data;
-
- addr = gfn_to_hva(kvm, gfn);
- if (kvm_is_error_hva(addr))
- return VM_FAULT_SIGBUS;
-
- npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
- NULL);
- if (unlikely(npages != 1))
- return VM_FAULT_SIGBUS;
-
- vmf->page = page[0];
- return 0;
-}
-
-static const struct vm_operations_struct kvm_vm_vm_ops = {
- .fault = kvm_vm_fault,
-};
-
-static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
-{
- vma->vm_ops = &kvm_vm_vm_ops;
- return 0;
-}
-
static struct file_operations kvm_vm_fops = {
.release = kvm_vm_release,
.unlocked_ioctl = kvm_vm_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = kvm_vm_compat_ioctl,
#endif
- .mmap = kvm_vm_mmap,
.llseek = noop_llseek,
};
@@ -2589,40 +2598,13 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
return r;
}
#endif
- r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
+ r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
if (r < 0)
kvm_put_kvm(kvm);
return r;
}
-static long kvm_dev_ioctl_check_extension_generic(long arg)
-{
- switch (arg) {
- case KVM_CAP_USER_MEMORY:
- case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
- case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
- case KVM_CAP_SET_BOOT_CPU_ID:
-#endif
- case KVM_CAP_INTERNAL_ERROR_DATA:
-#ifdef CONFIG_HAVE_KVM_MSI
- case KVM_CAP_SIGNAL_MSI:
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
- case KVM_CAP_IRQFD_RESAMPLE:
-#endif
- return 1;
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
- case KVM_CAP_IRQ_ROUTING:
- return KVM_MAX_IRQ_ROUTES;
-#endif
- default:
- break;
- }
- return kvm_dev_ioctl_check_extension(arg);
-}
-
static long kvm_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2630,7 +2612,6 @@ static long kvm_dev_ioctl(struct file *filp,
switch (ioctl) {
case KVM_GET_API_VERSION:
- r = -EINVAL;
if (arg)
goto out;
r = KVM_API_VERSION;
@@ -2639,10 +2620,9 @@ static long kvm_dev_ioctl(struct file *filp,
r = kvm_dev_ioctl_create_vm(arg);
break;
case KVM_CHECK_EXTENSION:
- r = kvm_dev_ioctl_check_extension_generic(arg);
+ r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
break;
case KVM_GET_VCPU_MMAP_SIZE:
- r = -EINVAL;
if (arg)
goto out;
r = PAGE_SIZE; /* struct kvm_run */
@@ -2687,7 +2667,7 @@ static void hardware_enable_nolock(void *junk)
cpumask_set_cpu(cpu, cpus_hardware_enabled);
- r = kvm_arch_hardware_enable(NULL);
+ r = kvm_arch_hardware_enable();
if (r) {
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2697,11 +2677,12 @@ static void hardware_enable_nolock(void *junk)
}
}
-static void hardware_enable(void *junk)
+static void hardware_enable(void)
{
- raw_spin_lock(&kvm_lock);
- hardware_enable_nolock(junk);
- raw_spin_unlock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
+ if (kvm_usage_count)
+ hardware_enable_nolock(NULL);
+ raw_spin_unlock(&kvm_count_lock);
}
static void hardware_disable_nolock(void *junk)
@@ -2711,14 +2692,15 @@ static void hardware_disable_nolock(void *junk)
if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
return;
cpumask_clear_cpu(cpu, cpus_hardware_enabled);
- kvm_arch_hardware_disable(NULL);
+ kvm_arch_hardware_disable();
}
-static void hardware_disable(void *junk)
+static void hardware_disable(void)
{
- raw_spin_lock(&kvm_lock);
- hardware_disable_nolock(junk);
- raw_spin_unlock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
+ if (kvm_usage_count)
+ hardware_disable_nolock(NULL);
+ raw_spin_unlock(&kvm_count_lock);
}
static void hardware_disable_all_nolock(void)
@@ -2732,16 +2714,16 @@ static void hardware_disable_all_nolock(void)
static void hardware_disable_all(void)
{
- raw_spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
hardware_disable_all_nolock();
- raw_spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_count_lock);
}
static int hardware_enable_all(void)
{
int r = 0;
- raw_spin_lock(&kvm_lock);
+ raw_spin_lock(&kvm_count_lock);
kvm_usage_count++;
if (kvm_usage_count == 1) {
@@ -2754,7 +2736,7 @@ static int hardware_enable_all(void)
}
}
- raw_spin_unlock(&kvm_lock);
+ raw_spin_unlock(&kvm_count_lock);
return r;
}
@@ -2764,20 +2746,17 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
{
int cpu = (long)v;
- if (!kvm_usage_count)
- return NOTIFY_OK;
-
val &= ~CPU_TASKS_FROZEN;
switch (val) {
case CPU_DYING:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
cpu);
- hardware_disable(NULL);
+ hardware_disable();
break;
case CPU_STARTING:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
cpu);
- hardware_enable(NULL);
+ hardware_enable();
break;
}
return NOTIFY_OK;
@@ -2990,10 +2969,10 @@ static int vm_stat_get(void *_offset, u64 *val)
struct kvm *kvm;
*val = 0;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
*val += *(u32 *)((void *)kvm + offset);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return 0;
}
@@ -3007,12 +2986,12 @@ static int vcpu_stat_get(void *_offset, u64 *val)
int i;
*val = 0;
- raw_spin_lock(&kvm_lock);
+ spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
*val += *(u32 *)((void *)vcpu + offset);
- raw_spin_unlock(&kvm_lock);
+ spin_unlock(&kvm_lock);
return 0;
}
@@ -3025,7 +3004,7 @@ static const struct file_operations *stat_fops[] = {
static int kvm_init_debug(void)
{
- int r = -EFAULT;
+ int r = -EEXIST;
struct kvm_stats_debugfs_item *p;
kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
@@ -3067,7 +3046,7 @@ static int kvm_suspend(void)
static void kvm_resume(void)
{
if (kvm_usage_count) {
- WARN_ON(raw_spin_is_locked(&kvm_lock));
+ WARN_ON(raw_spin_is_locked(&kvm_count_lock));
hardware_enable_nolock(NULL);
}
}
@@ -3089,6 +3068,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
if (vcpu->preempted)
vcpu->preempted = false;
+ kvm_arch_sched_in(vcpu, cpu);
+
kvm_arch_vcpu_load(vcpu, cpu);
}
@@ -3184,6 +3165,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
out_undebugfs:
unregister_syscore_ops(&kvm_syscore_ops);
+ misc_deregister(&kvm_dev);
out_unreg:
kvm_async_pf_deinit();
out_free:
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
new file mode 100644
index 000000000000..475487e238e1
--- /dev/null
+++ b/virt/kvm/vfio.c
@@ -0,0 +1,228 @@
+/*
+ * VFIO-KVM bridge pseudo device
+ *
+ * Copyright (C) 2013 Red Hat, Inc. All rights reserved.
+ * Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+struct kvm_vfio_group {
+ struct list_head node;
+ struct vfio_group *vfio_group;
+};
+
+struct kvm_vfio {
+ struct list_head group_list;
+ struct mutex lock;
+};
+
+static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep)
+{
+ struct vfio_group *vfio_group;
+ struct vfio_group *(*fn)(struct file *);
+
+ fn = symbol_get(vfio_group_get_external_user);
+ if (!fn)
+ return ERR_PTR(-EINVAL);
+
+ vfio_group = fn(filep);
+
+ symbol_put(vfio_group_get_external_user);
+
+ return vfio_group;
+}
+
+static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
+{
+ void (*fn)(struct vfio_group *);
+
+ fn = symbol_get(vfio_group_put_external_user);
+ if (!fn)
+ return;
+
+ fn(vfio_group);
+
+ symbol_put(vfio_group_put_external_user);
+}
+
+static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg)
+{
+ struct kvm_vfio *kv = dev->private;
+ struct vfio_group *vfio_group;
+ struct kvm_vfio_group *kvg;
+ void __user *argp = (void __user *)arg;
+ struct fd f;
+ int32_t fd;
+ int ret;
+
+ switch (attr) {
+ case KVM_DEV_VFIO_GROUP_ADD:
+ if (get_user(fd, (int32_t __user *)argp))
+ return -EFAULT;
+
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+
+ vfio_group = kvm_vfio_group_get_external_user(f.file);
+ fdput(f);
+
+ if (IS_ERR(vfio_group))
+ return PTR_ERR(vfio_group);
+
+ mutex_lock(&kv->lock);
+
+ list_for_each_entry(kvg, &kv->group_list, node) {
+ if (kvg->vfio_group == vfio_group) {
+ mutex_unlock(&kv->lock);
+ kvm_vfio_group_put_external_user(vfio_group);
+ return -EEXIST;
+ }
+ }
+
+ kvg = kzalloc(sizeof(*kvg), GFP_KERNEL);
+ if (!kvg) {
+ mutex_unlock(&kv->lock);
+ kvm_vfio_group_put_external_user(vfio_group);
+ return -ENOMEM;
+ }
+
+ list_add_tail(&kvg->node, &kv->group_list);
+ kvg->vfio_group = vfio_group;
+
+ mutex_unlock(&kv->lock);
+
+ return 0;
+
+ case KVM_DEV_VFIO_GROUP_DEL:
+ if (get_user(fd, (int32_t __user *)argp))
+ return -EFAULT;
+
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+
+ vfio_group = kvm_vfio_group_get_external_user(f.file);
+ fdput(f);
+
+ if (IS_ERR(vfio_group))
+ return PTR_ERR(vfio_group);
+
+ ret = -ENOENT;
+
+ mutex_lock(&kv->lock);
+
+ list_for_each_entry(kvg, &kv->group_list, node) {
+ if (kvg->vfio_group != vfio_group)
+ continue;
+
+ list_del(&kvg->node);
+ kvm_vfio_group_put_external_user(kvg->vfio_group);
+ kfree(kvg);
+ ret = 0;
+ break;
+ }
+
+ mutex_unlock(&kv->lock);
+
+ kvm_vfio_group_put_external_user(vfio_group);
+
+ return ret;
+ }
+
+ return -ENXIO;
+}
+
+static int kvm_vfio_set_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_VFIO_GROUP:
+ return kvm_vfio_set_group(dev, attr->attr, attr->addr);
+ }
+
+ return -ENXIO;
+}
+
+static int kvm_vfio_has_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_VFIO_GROUP:
+ switch (attr->attr) {
+ case KVM_DEV_VFIO_GROUP_ADD:
+ case KVM_DEV_VFIO_GROUP_DEL:
+ return 0;
+ }
+
+ break;
+ }
+
+ return -ENXIO;
+}
+
+static void kvm_vfio_destroy(struct kvm_device *dev)
+{
+ struct kvm_vfio *kv = dev->private;
+ struct kvm_vfio_group *kvg, *tmp;
+
+ list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) {
+ kvm_vfio_group_put_external_user(kvg->vfio_group);
+ list_del(&kvg->node);
+ kfree(kvg);
+ }
+
+ kfree(kv);
+ kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
+}
+
+static int kvm_vfio_create(struct kvm_device *dev, u32 type);
+
+static struct kvm_device_ops kvm_vfio_ops = {
+ .name = "kvm-vfio",
+ .create = kvm_vfio_create,
+ .destroy = kvm_vfio_destroy,
+ .set_attr = kvm_vfio_set_attr,
+ .has_attr = kvm_vfio_has_attr,
+};
+
+static int kvm_vfio_create(struct kvm_device *dev, u32 type)
+{
+ struct kvm_device *tmp;
+ struct kvm_vfio *kv;
+
+ /* Only one VFIO "device" per VM */
+ list_for_each_entry(tmp, &dev->kvm->devices, vm_node)
+ if (tmp->ops == &kvm_vfio_ops)
+ return -EBUSY;
+
+ kv = kzalloc(sizeof(*kv), GFP_KERNEL);
+ if (!kv)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&kv->group_list);
+ mutex_init(&kv->lock);
+
+ dev->private = kv;
+
+ return 0;
+}
+
+static int __init kvm_vfio_ops_init(void)
+{
+ return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
+}
+module_init(kvm_vfio_ops_init);