From dc9b2d933a1d5782b70977024f862759c8ebb2f7 Mon Sep 17 00:00:00 2001
From: Wei Huang <wehuang@redhat.com>
Date: Wed, 13 Aug 2014 12:06:14 -0400
Subject: KVM: SVM: add rdmsr support for AMD event registers

Current KVM only supports RDMSR for K7_EVNTSEL0 and K7_PERFCTR0
MSRs. Reading the rest MSRs will trigger KVM to inject #GP into
guest VM. This causes a warning message "Failed to access perfctr
msr (MSR c0010001 is ffffffffffffffff)" on AMD host. This patch
adds RDMSR support for all K7_EVNTSELn and K7_PERFCTRn registers
and thus supresses the warning message.

Signed-off-by: Wei Huang <wehuang@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f1e22d3b286..5f5edb6ddc83 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2419,7 +2419,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_K7_HWCR:
 	case MSR_VM_HSAVE_PA:
 	case MSR_K7_EVNTSEL0:
+	case MSR_K7_EVNTSEL1:
+	case MSR_K7_EVNTSEL2:
+	case MSR_K7_EVNTSEL3:
 	case MSR_K7_PERFCTR0:
+	case MSR_K7_PERFCTR1:
+	case MSR_K7_PERFCTR2:
+	case MSR_K7_PERFCTR3:
 	case MSR_K8_INT_PENDING_MSG:
 	case MSR_AMD64_NB_CFG:
 	case MSR_FAM10H_MMIO_CONF_BASE:
-- 
cgit v1.2.3


From 4473b570a7ebb502f63f292ccfba7df622e5fdd3 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Mon, 18 Aug 2014 17:50:28 +0800
Subject: KVM: x86: drop fpu_activate hook

The only user of the fpu_activate hook was dropped in commit
2d04a05bd7e9 (KVM: x86 emulator: emulate CLTS internally, 2011-04-20).
vmx_fpu_activate and svm_fpu_activate are still called on #NM (and for
Intel CLTS), but never from common code; hence, there's no need for
a hook.

Reviewed-by: Yang Zhang <yang.z.zhang@intel.com>
Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 arch/x86/kvm/svm.c              | 1 -
 arch/x86/kvm/vmx.c              | 1 -
 3 files changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7c492ed9087b..4bda61b582e6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -710,7 +710,6 @@ struct kvm_x86_ops {
 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
-	void (*fpu_activate)(struct kvm_vcpu *vcpu);
 	void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 
 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ddf742768ecf..1f49c867e72e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4349,7 +4349,6 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.cache_reg = svm_cache_reg,
 	.get_rflags = svm_get_rflags,
 	.set_rflags = svm_set_rflags,
-	.fpu_activate = svm_fpu_activate,
 	.fpu_deactivate = svm_fpu_deactivate,
 
 	.tlb_flush = svm_flush_tlb,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe11cf124a1..6a216e4762c7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8890,7 +8890,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.cache_reg = vmx_cache_reg,
 	.get_rflags = vmx_get_rflags,
 	.set_rflags = vmx_set_rflags,
-	.fpu_activate = vmx_fpu_activate,
 	.fpu_deactivate = vmx_fpu_deactivate,
 
 	.tlb_flush = vmx_flush_tlb,
-- 
cgit v1.2.3


From 15fc075269e42230605343554c5c8001eb819228 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 18 Aug 2014 13:17:00 +0200
Subject: KVM: x86: raise invalid TSS exceptions during a task switch

Conditions that would usually trigger a general protection fault should
instead raise #TS.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 03954f7900f5..ef297919a691 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1468,7 +1468,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 		return ret;
 
 	err_code = selector & 0xfffc;
-	err_vec = GP_VECTOR;
+	err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR;
 
 	/* can't load system descriptor into segment selector */
 	if (seg <= VCPU_SREG_GS && !seg_desc.s)
-- 
cgit v1.2.3


From 3b63a43f1e04b935e1ce0383f78ac0f5c65433d8 Mon Sep 17 00:00:00 2001
From: Monam Agarwal <monamagarwal123@gmail.com>
Date: Sat, 22 Mar 2014 12:28:10 +0530
Subject: arch/x86: Use RCU_INIT_POINTER(x, NULL) in kvm/vmx.c

Here rcu_assign_pointer() is ensuring that the
initialization of a structure is carried out before storing a pointer
to that structure.
So, rcu_assign_pointer(p, NULL) can always safely be converted to
RCU_INIT_POINTER(p, NULL).

Signed-off-by: Monam Agarwal <monamagarwal123@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6a216e4762c7..cad37d57cc47 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9097,7 +9097,7 @@ static void __exit vmx_exit(void)
 	free_page((unsigned long)vmx_vmread_bitmap);
 
 #ifdef CONFIG_KEXEC
-	rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
+	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
 	synchronize_rcu();
 #endif
 
-- 
cgit v1.2.3


From adfb5d2746bfbe692324bd26a6de05a3a036b38e Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Tue, 19 Aug 2014 17:04:39 +0800
Subject: KVM: x86: fix check legal type of Variable Range MTRRs

The first entry in each pair(IA32_MTRR_PHYSBASEn) defines the base
address and memory type for the range; the second entry(IA32_MTRR_PHYSMASKn)
contains a mask used to determine the address range. The legal values
for the type field of IA32_MTRR_PHYSBASEn are 0,1,4,5, and 6. However,
IA32_MTRR_PHYSMASKn don't have type field. This patch avoid check if
the type field is legal for IA32_MTRR_PHYSMASKn.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5f5edb6ddc83..fb3ea7aad0c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1747,7 +1747,13 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	}
 
 	/* variable MTRRs */
-	return valid_mtrr_type(data & 0xff);
+	WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
+
+	if ((msr & 1) == 0)
+		/* MTRR base */
+		return valid_mtrr_type(data & 0xff);
+	/* MTRR mask */
+	return true;
 }
 
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-- 
cgit v1.2.3


From d7a2a246a1b5a0b0c803e800019600051e1e6f1a Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Tue, 19 Aug 2014 17:04:40 +0800
Subject: KVM: x86: #GP when attempts to write reserved bits of Variable Range
 MTRRs

Section 11.11.2.3 of the SDM mentions "All other bits in the IA32_MTRR_PHYSBASEn
and IA32_MTRR_PHYSMASKn registers are reserved; the processor generates a
general-protection exception(#GP) if software attempts to write to them". This
patch do it in kvm.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fb3ea7aad0c1..737b4bdac41c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1726,6 +1726,7 @@ static bool valid_mtrr_type(unsigned t)
 static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	int i;
+	u64 mask = 0;
 
 	if (!msr_mtrr_valid(msr))
 		return false;
@@ -1749,10 +1750,21 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	/* variable MTRRs */
 	WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
 
-	if ((msr & 1) == 0)
+	for (i = 63; i > boot_cpu_data.x86_phys_bits; i--)
+		mask |= (1ULL << i);
+	if ((msr & 1) == 0) {
 		/* MTRR base */
-		return valid_mtrr_type(data & 0xff);
-	/* MTRR mask */
+		if (!valid_mtrr_type(data & 0xff))
+			return false;
+		mask |= 0xf00;
+	} else
+		/* MTRR mask */
+		mask |= 0x7ff;
+	if (data & mask) {
+		kvm_inject_gp(vcpu, 0);
+		return false;
+	}
+
 	return true;
 }
 
-- 
cgit v1.2.3


From fae0ba2157340635fd99912c0c3b7a28c355c588 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Mon, 18 Aug 2014 22:42:13 +0300
Subject: KVM: x86: Clear apic tsc-deadline after deadline

Intel SDM 10.5.4.1 says "When the timer generates an interrupt, it disarms
itself and clears the IA32_TSC_DEADLINE MSR".

This patch clears the MSR upon timer interrupt delivery which delivered on
deadline mode.  Since the MSR may be reconfigured while an interrupt is
pending, causing the new value to be overriden, pending timer interrupts are
checked before setting a new deadline.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 08e8a899e005..666c086c82d4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1352,6 +1352,9 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
 		return;
 
 	hrtimer_cancel(&apic->lapic_timer.timer);
+	/* Inject here so clearing tscdeadline won't override new value */
+	if (apic_has_pending_timer(vcpu))
+		kvm_inject_apic_timer_irqs(vcpu);
 	apic->lapic_timer.tscdeadline = data;
 	start_apic_timer(apic);
 }
@@ -1639,6 +1642,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 
 	if (atomic_read(&apic->lapic_timer.pending) > 0) {
 		kvm_apic_local_deliver(apic, APIC_LVTT);
+		if (apic_lvtt_tscdeadline(apic))
+			apic->lapic_timer.tscdeadline = 0;
 		atomic_set(&apic->lapic_timer.pending, 0);
 	}
 }
-- 
cgit v1.2.3


From 1e1b6c26443547b05925ae4a4494884c92eb7d95 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Tue, 19 Aug 2014 00:03:00 +0300
Subject: KVM: x86: recalculate_apic_map after enabling apic

Currently, recalculate_apic_map ignores vcpus whose lapic is software disabled
through the spurious interrupt vector. However, once it is re-enabled, the map
is not recalculated. Therefore, if the guest OS configured DFR while lapic is
software-disabled, the map may be incorrect. This patch recalculates apic map
after software enabling the lapic.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 666c086c82d4..fb919c574e23 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -112,17 +112,6 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
 struct static_key_deferred apic_hw_disabled __read_mostly;
 struct static_key_deferred apic_sw_disabled __read_mostly;
 
-static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
-{
-	if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
-		if (val & APIC_SPIV_APIC_ENABLED)
-			static_key_slow_dec_deferred(&apic_sw_disabled);
-		else
-			static_key_slow_inc(&apic_sw_disabled.key);
-	}
-	apic_set_reg(apic, APIC_SPIV, val);
-}
-
 static inline int apic_enabled(struct kvm_lapic *apic)
 {
 	return kvm_apic_sw_enabled(apic) &&	kvm_apic_hw_enabled(apic);
@@ -210,6 +199,20 @@ out:
 	kvm_vcpu_request_scan_ioapic(kvm);
 }
 
+static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
+{
+	u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+
+	apic_set_reg(apic, APIC_SPIV, val);
+	if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
+		if (val & APIC_SPIV_APIC_ENABLED) {
+			static_key_slow_dec_deferred(&apic_sw_disabled);
+			recalculate_apic_map(apic->vcpu->kvm);
+		} else
+			static_key_slow_inc(&apic_sw_disabled.key);
+	}
+}
+
 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
 {
 	apic_set_reg(apic, APIC_ID, id << 24);
-- 
cgit v1.2.3


From a32e84594ddf018cc618a8781298804c3e6131ce Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Wed, 20 Aug 2014 15:31:53 +0800
Subject: KVM: vmx: fix ept reserved bits for 1-GByte page

EPT misconfig handler in kvm will check which reason lead to EPT
misconfiguration after vmexit. One of the reasons is that an EPT
paging-structure entry is configured with settings reserved for
future functionality. However, the handler can't identify if
paging-structure entry of reserved bits for 1-GByte page are
configured, since PDPTE which point to 1-GByte page will reserve
bits 29:12 instead of bits 7:3 which are reserved for PDPTE that
references an EPT Page Directory. This patch fix it by reserve
bits 29:12 for 1-GByte page.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cad37d57cc47..286c2835e25c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5521,17 +5521,18 @@ static u64 ept_rsvd_mask(u64 spte, int level)
 	for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
 		mask |= (1ULL << i);
 
-	if (level > 2)
+	if (level == 4)
 		/* bits 7:3 reserved */
 		mask |= 0xf8;
-	else if (level == 2) {
-		if (spte & (1ULL << 7))
-			/* 2MB ref, bits 20:12 reserved */
-			mask |= 0x1ff000;
-		else
-			/* bits 6:3 reserved */
-			mask |= 0x78;
-	}
+	else if (spte & (1ULL << 7))
+		/*
+		 * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
+		 * level == 1 if the hypervisor is using the ignored bit 7.
+		 */
+		mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
+	else if (level > 1)
+		/* bits 6:3 reserved */
+		mask |= 0x78;
 
 	return mask;
 }
@@ -5561,7 +5562,8 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
 			WARN_ON(1);
 		}
 
-		if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) {
+		/* bits 5:3 are _not_ reserved for large page or leaf page */
+		if ((rsvd_bits & 0x38) == 0) {
 			u64 ept_mem_type = (spte & 0x38) >> 3;
 
 			if (ept_mem_type == 2 || ept_mem_type == 3 ||
-- 
cgit v1.2.3


From d27aa7f15c3b1105c8cd8c2d190ab354f877cac5 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Wed, 20 Aug 2014 13:25:52 +0300
Subject: KVM: x86: Clarify PMU related features bit manipulation

kvm_pmu_cpuid_update makes a lot of bit manuiplation operations, when in fact
there are already unions that can be used instead. Changing the bit
manipulation to the union for clarity. This patch does not change the
functionality.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/pmu.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 3dd6accb64ec..8e6b7d869d2f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/kvm_host.h>
 #include <linux/perf_event.h>
+#include <asm/perf_event.h>
 #include "x86.h"
 #include "cpuid.h"
 #include "lapic.h"
@@ -463,7 +464,8 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
 	struct kvm_cpuid_entry2 *entry;
-	unsigned bitmap_len;
+	union cpuid10_eax eax;
+	union cpuid10_edx edx;
 
 	pmu->nr_arch_gp_counters = 0;
 	pmu->nr_arch_fixed_counters = 0;
@@ -475,25 +477,27 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
 	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
 	if (!entry)
 		return;
+	eax.full = entry->eax;
+	edx.full = entry->edx;
 
-	pmu->version = entry->eax & 0xff;
+	pmu->version = eax.split.version_id;
 	if (!pmu->version)
 		return;
 
-	pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff,
-			INTEL_PMC_MAX_GENERIC);
-	pmu->counter_bitmask[KVM_PMC_GP] =
-		((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;
-	bitmap_len = (entry->eax >> 24) & 0xff;
-	pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
+	pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
+					INTEL_PMC_MAX_GENERIC);
+	pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+	pmu->available_event_types = ~entry->ebx &
+					((1ull << eax.split.mask_length) - 1);
 
 	if (pmu->version == 1) {
 		pmu->nr_arch_fixed_counters = 0;
 	} else {
-		pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
+		pmu->nr_arch_fixed_counters =
+			min_t(int, edx.split.num_counters_fixed,
 				INTEL_PMC_MAX_FIXED);
 		pmu->counter_bitmask[KVM_PMC_FIXED] =
-			((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
+			((u64)1 << edx.split.bit_width_fixed) - 1;
 	}
 
 	pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
-- 
cgit v1.2.3


From 592f085847f4ea753586dfe6ce75ba37d5992a45 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 20 Aug 2014 10:05:08 +0200
Subject: KVM: emulate: do not return X86EMUL_PROPAGATE_FAULT explicitly

Always get it through emulate_exception or emulate_ts.  This
ensures that the ctxt->exception fields have been populated.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ef297919a691..4fbf4b598f92 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1549,8 +1549,7 @@ load:
 	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
 	return X86EMUL_CONTINUE;
 exception:
-	emulate_exception(ctxt, err_vec, err_code, true);
-	return X86EMUL_PROPAGATE_FAULT;
+	return emulate_exception(ctxt, err_vec, err_code, true);
 }
 
 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -2723,8 +2722,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 	if (!next_tss_desc.p ||
 	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
 	     desc_limit < 0x2b)) {
-		emulate_ts(ctxt, tss_selector & 0xfffc);
-		return X86EMUL_PROPAGATE_FAULT;
+		return emulate_ts(ctxt, tss_selector & 0xfffc);
 	}
 
 	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
@@ -3016,7 +3014,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
 		ctxt->dst.val = swab64(ctxt->src.val);
 		break;
 	default:
-		return X86EMUL_PROPAGATE_FAULT;
+		BUG();
 	}
 	return X86EMUL_CONTINUE;
 }
-- 
cgit v1.2.3


From e0ad0b477c36fde6b0923670647495d07bf42f94 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 20 Aug 2014 10:08:23 +0200
Subject: KVM: emulate: warn on invalid or uninitialized exception numbers

These were reported when running Jailhouse on AMD processors.

Initialize ctxt->exception.vector with an invalid exception number,
and warn if it remained invalid even though the emulator got
an X86EMUL_PROPAGATE_FAULT return code.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 5 ++++-
 arch/x86/kvm/x86.c     | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4fbf4b598f92..e5bf13003cd2 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -527,6 +527,7 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
 			     u32 error, bool valid)
 {
+	WARN_ON(vec > 0x1f);
 	ctxt->exception.vector = vec;
 	ctxt->exception.error_code = error;
 	ctxt->exception.error_code_valid = valid;
@@ -4827,8 +4828,10 @@ writeback:
 	ctxt->eip = ctxt->_eip;
 
 done:
-	if (rc == X86EMUL_PROPAGATE_FAULT)
+	if (rc == X86EMUL_PROPAGATE_FAULT) {
+		WARN_ON(ctxt->exception.vector > 0x1f);
 		ctxt->have_exception = true;
+	}
 	if (rc == X86EMUL_INTERCEPTED)
 		return EMULATION_INTERCEPTED;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 737b4bdac41c..cd718c01cdf1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5248,6 +5248,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 
 		ctxt->interruptibility = 0;
 		ctxt->have_exception = false;
+		ctxt->exception.vector = -1;
 		ctxt->perm_ok = false;
 
 		ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
-- 
cgit v1.2.3


From 6689fbe3cf65b8c0dbbc87c40c085452997ffd8b Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Wed, 20 Aug 2014 16:38:19 +0300
Subject: KVM: x86: Replace X86_FEATURE_NX offset with the definition

Replace reference to X86_FEATURE_NX using bit shift with the defined
X86_FEATURE_NX.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 38a0afe83c6b..f4bad87ef256 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -112,8 +112,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
 			break;
 		}
 	}
-	if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
-		entry->edx &= ~(1 << 20);
+	if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
+		entry->edx &= ~bit(X86_FEATURE_NX);
 		printk(KERN_INFO "kvm: guest NX capability removed\n");
 	}
 }
-- 
cgit v1.2.3


From e790d9ef6405633b007339d746b709aed43a928d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 21 Aug 2014 18:08:05 +0200
Subject: KVM: add kvm_arch_sched_in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce preempt notifiers for architecture specific code.
Advantage over creating a new notifier in every arch is slightly simpler
code and guaranteed call order with respect to kvm_sched_in.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/kvm/arm.c         | 4 ++++
 arch/mips/kvm/mips.c       | 4 ++++
 arch/powerpc/kvm/powerpc.c | 4 ++++
 arch/s390/kvm/kvm-s390.c   | 4 ++++
 arch/x86/kvm/x86.c         | 4 ++++
 5 files changed, 20 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a99e0cdf8ba2..9f788ebac55b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index cd7114147ae7..2362df2a79f9 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1002,6 +1002,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 				  struct kvm_translation *tr)
 {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c79284b58be..cbc432f4f0a6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -720,6 +720,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_BOOKE
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ce81eb2ab76a..a3c324ec4370 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -555,6 +555,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	/* Nothing todo */
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd718c01cdf1..7d43dc7bb906 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7171,6 +7171,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	if (type)
-- 
cgit v1.2.3


From ae97a3b818324b92b5b9cc885c63c3f4bd46ee9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 21 Aug 2014 18:08:06 +0200
Subject: KVM: x86: introduce sched_in to kvm_x86_ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sched_in preempt notifier is available for x86, allow its use in
specific virtualization technlogies as well.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 ++
 arch/x86/kvm/svm.c              | 6 ++++++
 arch/x86/kvm/vmx.c              | 6 ++++++
 arch/x86/kvm/x86.c              | 1 +
 4 files changed, 15 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4bda61b582e6..ac0f90e26a0b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -771,6 +771,8 @@ struct kvm_x86_ops {
 	bool (*mpx_supported)(void);
 
 	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
+
+	void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1f49c867e72e..1703aab84a6d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4305,6 +4305,10 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
 	local_irq_enable();
 }
 
+static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -4405,6 +4409,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 
 	.check_intercept = svm_check_intercept,
 	.handle_external_intr = svm_handle_external_intr,
+
+	.sched_in = svm_sched_in,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 286c2835e25c..7c26533e149c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8848,6 +8848,10 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 	return X86EMUL_CONTINUE;
 }
 
+void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -8952,6 +8956,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.mpx_supported = vmx_mpx_supported,
 
 	.check_nested_events = vmx_check_nested_events,
+
+	.sched_in = vmx_sched_in,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7d43dc7bb906..575d3fc67e7e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7173,6 +7173,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 
 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+	kvm_x86_ops->sched_in(vcpu, cpu);
 }
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
-- 
cgit v1.2.3


From a7653ecdf34c68a1af4fc085511afcf7ff011903 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 21 Aug 2014 18:08:07 +0200
Subject: KVM: VMX: make PLE window per-VCPU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change PLE window into per-VCPU variable, seeded from module parameter,
to allow greater flexibility.

Brings in a small overhead on every vmentry.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7c26533e149c..70175990536b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -484,6 +484,10 @@ struct vcpu_vmx {
 
 	/* Support for a guest hypervisor (nested VMX) */
 	struct nested_vmx nested;
+
+	/* Dynamic PLE window. */
+	int ple_window;
+	bool ple_window_dirty;
 };
 
 enum segment_cache_field {
@@ -4402,7 +4406,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	if (ple_gap) {
 		vmcs_write32(PLE_GAP, ple_gap);
-		vmcs_write32(PLE_WINDOW, ple_window);
+		vmx->ple_window = ple_window;
+		vmx->ple_window_dirty = true;
 	}
 
 	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
@@ -7389,6 +7394,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vmx->emulation_required)
 		return;
 
+	if (vmx->ple_window_dirty) {
+		vmx->ple_window_dirty = false;
+		vmcs_write32(PLE_WINDOW, vmx->ple_window);
+	}
+
 	if (vmx->nested.sync_shadow_vmcs) {
 		copy_vmcs12_to_shadow(vmx);
 		vmx->nested.sync_shadow_vmcs = false;
-- 
cgit v1.2.3


From b4a2d31da812ce03efaf5d30c6b9d39c1cbd18d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 21 Aug 2014 18:08:08 +0200
Subject: KVM: VMX: dynamise PLE window
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Window is increased on every PLE exit and decreased on every sched_in.
The idea is that we don't want to PLE exit if there is no preemption
going on.
We do this with sched_in() because it does not hold rq lock.

There are two new kernel parameters for changing the window:
 ple_window_grow and ple_window_shrink
ple_window_grow affects the window on PLE exit and ple_window_shrink
does it on sched_in;  depending on their value, the window is modifier
like this: (ple_window is kvm_intel's global)

  ple_window_shrink/ |
  ple_window_grow    | PLE exit           | sched_in
  -------------------+--------------------+---------------------
  < 1                |  = ple_window      |  = ple_window
  < ple_window       | *= ple_window_grow | /= ple_window_shrink
  otherwise          | += ple_window_grow | -= ple_window_shrink

A third new parameter, ple_window_max, controls the maximal ple_window;
it is internally rounded down to a closest multiple of ple_window_grow.

VCPU's PLE window is never allowed below ple_window.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 93 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 70175990536b..baeac7f580a7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO);
  * Time is measured based on a counter that runs at the same rate as the TSC,
  * refer SDM volume 3b section 21.6.13 & 22.1.3.
  */
-#define KVM_VMX_DEFAULT_PLE_GAP    128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_GAP           128
+#define KVM_VMX_DEFAULT_PLE_WINDOW        4096
+#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW   2
+#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX    \
+		INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+
 static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
 module_param(ple_gap, int, S_IRUGO);
 
 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, int, S_IRUGO);
 
+/* Default doubles per-vcpu window every exit. */
+static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu window every exit to ple_window. */
+static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, int, S_IRUGO);
+
+/* Default is to compute the maximum so we can never overflow. */
+static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, int, S_IRUGO);
+
 extern const ulong vmx_return;
 
 #define NR_AUTOLOAD_MSRS 8
@@ -5683,12 +5701,81 @@ out:
 	return ret;
 }
 
+static int __grow_ple_window(int val)
+{
+	if (ple_window_grow < 1)
+		return ple_window;
+
+	val = min(val, ple_window_actual_max);
+
+	if (ple_window_grow < ple_window)
+		val *= ple_window_grow;
+	else
+		val += ple_window_grow;
+
+	return val;
+}
+
+static int __shrink_ple_window(int val, int modifier, int minimum)
+{
+	if (modifier < 1)
+		return ple_window;
+
+	if (modifier < ple_window)
+		val /= modifier;
+	else
+		val -= modifier;
+
+	return max(val, minimum);
+}
+
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int old = vmx->ple_window;
+
+	vmx->ple_window = __grow_ple_window(old);
+
+	if (vmx->ple_window != old)
+		vmx->ple_window_dirty = true;
+}
+
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	int old = vmx->ple_window;
+
+	vmx->ple_window = __shrink_ple_window(old,
+	                                      ple_window_shrink, ple_window);
+
+	if (vmx->ple_window != old)
+		vmx->ple_window_dirty = true;
+}
+
+/*
+ * ple_window_actual_max is computed to be one grow_ple_window() below
+ * ple_window_max. (See __grow_ple_window for the reason.)
+ * This prevents overflows, because ple_window_max is int.
+ * ple_window_max effectively rounded down to a multiple of ple_window_grow in
+ * this process.
+ * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
+ */
+static void update_ple_window_actual_max(void)
+{
+	ple_window_actual_max =
+			__shrink_ple_window(max(ple_window_max, ple_window),
+			                    ple_window_grow, INT_MIN);
+}
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
  */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
+	if (ple_gap)
+		grow_ple_window(vcpu);
+
 	skip_emulated_instruction(vcpu);
 	kvm_vcpu_on_spin(vcpu);
 
@@ -8860,6 +8947,8 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 
 void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+	if (ple_gap)
+		shrink_ple_window(vcpu);
 }
 
 static struct kvm_x86_ops vmx_x86_ops = {
@@ -9082,6 +9171,8 @@ static int __init vmx_init(void)
 	} else
 		kvm_disable_tdp();
 
+	update_ple_window_actual_max();
+
 	return 0;
 
 out7:
-- 
cgit v1.2.3


From 7b46268d29543e313e731606d845e65c17f232e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 21 Aug 2014 18:08:09 +0200
Subject: KVM: trace kvm_ple_window grow/shrink
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tracepoint for dynamic PLE window, fired on every potential change.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/trace.h | 30 ++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx.c   |  4 ++++
 arch/x86/kvm/x86.c   |  1 +
 3 files changed, 35 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index e850a7d332be..1742dfbd26b3 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -848,6 +848,36 @@ TRACE_EVENT(kvm_track_tsc,
 		  __print_symbolic(__entry->host_clock, host_clocks))
 );
 
+TRACE_EVENT(kvm_ple_window,
+	TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
+	TP_ARGS(grow, vcpu_id, new, old),
+
+	TP_STRUCT__entry(
+		__field(                bool,      grow         )
+		__field(        unsigned int,   vcpu_id         )
+		__field(                 int,       new         )
+		__field(                 int,       old         )
+	),
+
+	TP_fast_assign(
+		__entry->grow           = grow;
+		__entry->vcpu_id        = vcpu_id;
+		__entry->new            = new;
+		__entry->old            = old;
+	),
+
+	TP_printk("vcpu %u: ple_window %d (%s %d)",
+	          __entry->vcpu_id,
+	          __entry->new,
+	          __entry->grow ? "grow" : "shrink",
+	          __entry->old)
+);
+
+#define trace_kvm_ple_window_grow(vcpu_id, new, old) \
+	trace_kvm_ple_window(true, vcpu_id, new, old)
+#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
+	trace_kvm_ple_window(false, vcpu_id, new, old)
+
 #endif /* CONFIG_X86_64 */
 
 #endif /* _TRACE_KVM_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index baeac7f580a7..661abc2f7049 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5738,6 +5738,8 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
 
 	if (vmx->ple_window != old)
 		vmx->ple_window_dirty = true;
+
+	trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
 }
 
 static void shrink_ple_window(struct kvm_vcpu *vcpu)
@@ -5750,6 +5752,8 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
 
 	if (vmx->ple_window != old)
 		vmx->ple_window_dirty = true;
+
+	trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 575d3fc67e7e..c10408ef9ab1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7673,3 +7673,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
-- 
cgit v1.2.3


From 44c6ca3d1b9c16cb715c21ec15670d27a8950822 Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Wed, 16 Apr 2014 13:57:18 +0200
Subject: KVM: s390: add defines for pfault init delivery code

Get rid of open coded values for pfault init.

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f4c819bfc193..6c9428e71fdd 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -26,6 +26,7 @@
 #define IOINT_SSID_MASK 0x00030000
 #define IOINT_CSSID_MASK 0x03fc0000
 #define IOINT_AI_MASK 0x04000000
+#define PFAULT_INIT 0x0600
 
 static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
 
@@ -376,8 +377,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	case KVM_S390_INT_PFAULT_INIT:
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
 						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR);
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+				   (u16 *) __LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
 		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
 				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-- 
cgit v1.2.3


From 8a2ef71b0bd0060c7095fd2043992b78e23735c4 Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Wed, 23 Jul 2014 16:36:06 +0200
Subject: KVM: s390: factor out get_ilc() function

Let's make this a reusable function.

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 6c9428e71fdd..71bf7e749cf7 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -206,11 +206,30 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 	}
 }
 
+static u16 get_ilc(struct kvm_vcpu *vcpu)
+{
+	const unsigned short table[] = { 2, 4, 4, 6 };
+
+	switch (vcpu->arch.sie_block->icptcode) {
+	case ICPT_INST:
+	case ICPT_INSTPROGI:
+	case ICPT_OPEREXC:
+	case ICPT_PARTEXEC:
+	case ICPT_IOINST:
+		/* last instruction only stored for these icptcodes */
+		return table[vcpu->arch.sie_block->ipa >> 14];
+	case ICPT_PROGI:
+		return vcpu->arch.sie_block->pgmilc;
+	default:
+		return 0;
+	}
+}
+
 static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
 			      struct kvm_s390_pgm_info *pgm_info)
 {
-	const unsigned short table[] = { 2, 4, 4, 6 };
 	int rc = 0;
+	u16 ilc = get_ilc(vcpu);
 
 	switch (pgm_info->code & ~PGM_PER) {
 	case PGM_AFX_TRANSLATION:
@@ -277,25 +296,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
 				   (u8 *) __LC_PER_ACCESS_ID);
 	}
 
-	switch (vcpu->arch.sie_block->icptcode) {
-	case ICPT_INST:
-	case ICPT_INSTPROGI:
-	case ICPT_OPEREXC:
-	case ICPT_PARTEXEC:
-	case ICPT_IOINST:
-		/* last instruction only stored for these icptcodes */
-		rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
-				   (u16 *) __LC_PGM_ILC);
-		break;
-	case ICPT_PROGI:
-		rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
-				   (u16 *) __LC_PGM_ILC);
-		break;
-	default:
-		rc |= put_guest_lc(vcpu, 0,
-				   (u16 *) __LC_PGM_ILC);
-	}
-
+	rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
 	rc |= put_guest_lc(vcpu, pgm_info->code,
 			   (u16 *)__LC_PGM_INT_CODE);
 	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
-- 
cgit v1.2.3


From fbfa304963fa8bf990dac1d05a77800d1e123b66 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Tue, 29 Jul 2014 08:22:33 +0200
Subject: KVM: s390: clear kvm_dirty_regs when dropping to user space

We should make sure that all kvm_dirty_regs bits are cleared before dropping
to user space. Until now, some would remain pending.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 81b0e11521e4..f00d0b02f868 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1319,15 +1319,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
-	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
-		kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
-	}
 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
-		kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
 	}
+	kvm_run->kvm_dirty_regs = 0;
 
 	might_fault();
 	rc = __vcpu_run(vcpu);
-- 
cgit v1.2.3


From c3950b66b9ceff1614db870d2d5a9bd47531a712 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 4 Aug 2014 16:54:22 +0200
Subject: KVM: s390: no special machine check delivery

The load PSW handler does not have to inject pending machine checks.
This can wait until the CPU runs the generic interrupt injection code.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 56 -----------------------------------------------
 arch/s390/kvm/kvm-s390.h  |  1 -
 arch/s390/kvm/priv.c      |  9 --------
 3 files changed, 66 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 71bf7e749cf7..34d741edb50a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -721,62 +721,6 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 	}
 }
 
-void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
-{
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
-	struct kvm_s390_interrupt_info  *n, *inti = NULL;
-	int deliver;
-
-	__reset_intercept_indicators(vcpu);
-	if (atomic_read(&li->active)) {
-		do {
-			deliver = 0;
-			spin_lock(&li->lock);
-			list_for_each_entry_safe(inti, n, &li->list, list) {
-				if ((inti->type == KVM_S390_MCHK) &&
-				    __interrupt_is_deliverable(vcpu, inti)) {
-					list_del(&inti->list);
-					deliver = 1;
-					break;
-				}
-				__set_intercept_indicator(vcpu, inti);
-			}
-			if (list_empty(&li->list))
-				atomic_set(&li->active, 0);
-			spin_unlock(&li->lock);
-			if (deliver) {
-				__do_deliver_interrupt(vcpu, inti);
-				kfree(inti);
-			}
-		} while (deliver);
-	}
-
-	if (atomic_read(&fi->active)) {
-		do {
-			deliver = 0;
-			spin_lock(&fi->lock);
-			list_for_each_entry_safe(inti, n, &fi->list, list) {
-				if ((inti->type == KVM_S390_MCHK) &&
-				    __interrupt_is_deliverable(vcpu, inti)) {
-					list_del(&inti->list);
-					fi->irq_count--;
-					deliver = 1;
-					break;
-				}
-				__set_intercept_indicator(vcpu, inti);
-			}
-			if (list_empty(&fi->list))
-				atomic_set(&fi->active, 0);
-			spin_unlock(&fi->lock);
-			if (deliver) {
-				__do_deliver_interrupt(vcpu, inti);
-				kfree(inti);
-			}
-		} while (deliver);
-	}
-}
-
 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 3862fa2cefe0..894fa4653dfb 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -139,7 +139,6 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
 void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
-void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_float_irqs(struct kvm *kvm);
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f89c1cd67751..d806f2cfde16 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -352,13 +352,6 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void handle_new_psw(struct kvm_vcpu *vcpu)
-{
-	/* Check whether the new psw is enabled for machine checks. */
-	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK)
-		kvm_s390_deliver_pending_machine_checks(vcpu);
-}
-
 #define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
 #define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
 #define PSW_ADDR_24 0x0000000000ffffffUL
@@ -405,7 +398,6 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
 	gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
 	if (!is_valid_psw(gpsw))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	handle_new_psw(vcpu);
 	return 0;
 }
 
@@ -427,7 +419,6 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->gpsw = new_psw;
 	if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	handle_new_psw(vcpu);
 	return 0;
 }
 
-- 
cgit v1.2.3


From b028ee3edd54d338dd811aeafd670a2c682be558 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 17 Jul 2014 10:47:43 +0200
Subject: KVM: s390: synchronize more registers with kvm_run

In order to reduce the number of syscalls when dropping to user space, this
patch enables the synchronization of the following "registers" with kvm_run:
- ARCH0: CPU timer, clock comparator, TOD programmable register,
         guest breaking-event register, program parameter
- PFAULT: pfault parameters (token, select, compare)

The registers are grouped to reduce the overhead when syncing.

As this grows the number of sync registers quite a bit, let's move the code
synchronizing registers with kvm_run from kvm_arch_vcpu_ioctl_run() into
separate helper routines.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/uapi/asm/kvm.h | 10 +++++++
 arch/s390/kvm/kvm-s390.c         | 60 ++++++++++++++++++++++++++++++----------
 2 files changed, 56 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 0fc26430a1e5..48eda3ab4944 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -111,12 +111,22 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_GPRS   (1UL << 1)
 #define KVM_SYNC_ACRS   (1UL << 2)
 #define KVM_SYNC_CRS    (1UL << 3)
+#define KVM_SYNC_ARCH0  (1UL << 4)
+#define KVM_SYNC_PFAULT (1UL << 5)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 	__u64 prefix;	/* prefix register */
 	__u64 gprs[16];	/* general purpose registers */
 	__u32 acrs[16];	/* access registers */
 	__u64 crs[16];	/* control registers */
+	__u64 todpr;	/* tod programmable register [ARCH0] */
+	__u64 cputm;	/* cpu timer [ARCH0] */
+	__u64 ckc;	/* clock comparator [ARCH0] */
+	__u64 pp;	/* program parameter [ARCH0] */
+	__u64 gbea;	/* guest breaking-event address [ARCH0] */
+	__u64 pft;	/* pfault token [PFAULT] */
+	__u64 pfs;	/* pfault select [PFAULT] */
+	__u64 pfc;	/* pfault compare [PFAULT] */
 };
 
 #define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f00d0b02f868..ab7cd645c127 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -546,7 +546,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
 				    KVM_SYNC_GPRS |
 				    KVM_SYNC_ACRS |
-				    KVM_SYNC_CRS;
+				    KVM_SYNC_CRS |
+				    KVM_SYNC_ARCH0 |
+				    KVM_SYNC_PFAULT;
 	return 0;
 }
 
@@ -1296,6 +1298,47 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	return rc;
 }
 
+static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
+	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+	}
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
+		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
+		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
+		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
+	}
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
+		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
+		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
+		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
+	}
+	kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
+	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
+	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
+	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+	kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
+	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
+	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
+	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
+	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+}
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int rc;
@@ -1317,15 +1360,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		return -EINVAL;
 	}
 
-	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
-	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
-	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
-		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
-	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
-		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
-		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
-	}
-	kvm_run->kvm_dirty_regs = 0;
+	sync_regs(vcpu, kvm_run);
 
 	might_fault();
 	rc = __vcpu_run(vcpu);
@@ -1355,10 +1390,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		rc = 0;
 	}
 
-	kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
-	kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
-	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
-	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+	store_regs(vcpu, kvm_run);
 
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-- 
cgit v1.2.3


From d3d692c82e4ed79ae7c85f8825ccfdb7d11819da Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Tue, 29 Jul 2014 08:53:36 +0200
Subject: KVM: s390: implement KVM_REQ_TLB_FLUSH and make use of it

Use the KVM_REQ_TLB_FLUSH request in order to trigger tlb flushes instead
of manipulating the SIE control block whenever we need it. Also trigger it for
a control register sync directly instead of (ab)using kvm_s390_set_prefix().

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 10 ++++++++--
 arch/s390/kvm/kvm-s390.h |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ab7cd645c127..56193becdca5 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1051,6 +1051,11 @@ retry:
 		goto retry;
 	}
 
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+		vcpu->arch.sie_block->ihcpu = 0xffff;
+		goto retry;
+	}
+
 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
 		if (!ibs_enabled(vcpu)) {
 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
@@ -1306,7 +1311,8 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
-		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+		/* some control register changes require a tlb flush */
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	}
 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
 		vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
@@ -1519,7 +1525,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
 	 * Another VCPU might have used IBS while we were offline.
 	 * Let's play safe and flush the VCPU at startup.
 	 */
-	vcpu->arch.sie_block->ihcpu  = 0xffff;
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
 	return;
 }
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 894fa4653dfb..54c25fd5bed6 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -70,7 +70,7 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
 static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
 {
 	vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
-	vcpu->arch.sie_block->ihcpu  = 0xffff;
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 }
 
-- 
cgit v1.2.3


From 7939503147f89f0799ddc89afec2aeae57dd7e2c Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Thu, 17 Apr 2014 10:10:30 +0200
Subject: KVM: s390: return -EFAULT if lowcore is not mapped during irq
 delivery

Currently we just kill the userspace process and exit the thread
immediatly without making sure that we don't hold any locks etc.

Improve this by making KVM_RUN return -EFAULT if the lowcore is not
mapped during interrupt delivery. To achieve this we need to pass
the return code of guest memory access routines used in interrupt
delivery all the way back to the KVM_RUN ioctl.

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 40 ++++++++++++++++++----------------------
 arch/s390/kvm/kvm-s390.c  |  7 +++++--
 arch/s390/kvm/kvm-s390.h  |  2 +-
 3 files changed, 24 insertions(+), 25 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 34d741edb50a..e2f624058457 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -28,7 +28,7 @@
 #define IOINT_AI_MASK 0x04000000
 #define PFAULT_INIT 0x0600
 
-static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+static int deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
 
 static int is_ioint(u64 type)
 {
@@ -307,7 +307,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	return rc;
 }
 
-static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
+static int __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 				   struct kvm_s390_interrupt_info *inti)
 {
 	const unsigned short table[] = { 2, 4, 4, 6 };
@@ -345,7 +345,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	case KVM_S390_INT_CLOCK_COMP:
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
 						 inti->ext.ext_params, 0);
-		deliver_ckc_interrupt(vcpu);
+		rc = deliver_ckc_interrupt(vcpu);
 		break;
 	case KVM_S390_INT_CPU_TIMER:
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
@@ -504,14 +504,11 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	default:
 		BUG();
 	}
-	if (rc) {
-		printk("kvm: The guest lowcore is not mapped during interrupt "
-		       "delivery, killing userspace\n");
-		do_exit(SIGKILL);
-	}
+
+	return rc;
 }
 
-static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+static int deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 {
 	int rc;
 
@@ -521,11 +518,7 @@ static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
 			    &vcpu->arch.sie_block->gpsw,
 			    sizeof(psw_t));
-	if (rc) {
-		printk("kvm: The guest lowcore is not mapped during interrupt "
-			"delivery, killing userspace\n");
-		do_exit(SIGKILL);
-	}
+	return rc;
 }
 
 /* Check whether SIGP interpretation facility has an external call pending */
@@ -664,12 +657,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 			  &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
 }
 
-void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+int kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
 	struct kvm_s390_interrupt_info  *n, *inti = NULL;
 	int deliver;
+	int rc = 0;
 
 	__reset_intercept_indicators(vcpu);
 	if (atomic_read(&li->active)) {
@@ -688,16 +682,16 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 				atomic_set(&li->active, 0);
 			spin_unlock(&li->lock);
 			if (deliver) {
-				__do_deliver_interrupt(vcpu, inti);
+				rc = __do_deliver_interrupt(vcpu, inti);
 				kfree(inti);
 			}
-		} while (deliver);
+		} while (!rc && deliver);
 	}
 
-	if (kvm_cpu_has_pending_timer(vcpu))
-		deliver_ckc_interrupt(vcpu);
+	if (!rc && kvm_cpu_has_pending_timer(vcpu))
+		rc = deliver_ckc_interrupt(vcpu);
 
-	if (atomic_read(&fi->active)) {
+	if (!rc && atomic_read(&fi->active)) {
 		do {
 			deliver = 0;
 			spin_lock(&fi->lock);
@@ -714,11 +708,13 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 				atomic_set(&fi->active, 0);
 			spin_unlock(&fi->lock);
 			if (deliver) {
-				__do_deliver_interrupt(vcpu, inti);
+				rc = __do_deliver_interrupt(vcpu, inti);
 				kfree(inti);
 			}
-		} while (deliver);
+		} while (!rc && deliver);
 	}
+
+	return rc;
 }
 
 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 56193becdca5..c2caa175320c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1198,8 +1198,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 	if (test_cpu_flag(CIF_MCCK_PENDING))
 		s390_handle_mcck();
 
-	if (!kvm_is_ucontrol(vcpu->kvm))
-		kvm_s390_deliver_pending_interrupts(vcpu);
+	if (!kvm_is_ucontrol(vcpu->kvm)) {
+		rc = kvm_s390_deliver_pending_interrupts(vcpu);
+		if (rc)
+			return rc;
+	}
 
 	rc = kvm_s390_handle_requests(vcpu);
 	if (rc)
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 54c25fd5bed6..99abcb56e478 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -138,7 +138,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
-void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+int kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_float_irqs(struct kvm *kvm);
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
-- 
cgit v1.2.3


From 331cbc277ec4ae5827b9ca538d9b5469fdca0947 Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Mon, 11 Aug 2014 15:39:43 +0200
Subject: KVM: s390: don't use kvm lock in interrupt injection code

The kvm lock protects us against vcpus going away, but they only go
away when the virtual machine is shut down. We don't need this
mutex here, so let's get rid of it.

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/interrupt.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index e2f624058457..ba89bbbd2ed5 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -991,7 +991,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
 				   s390int->parm64, 2);
 
-	mutex_lock(&vcpu->kvm->lock);
 	li = &vcpu->arch.local_int;
 	spin_lock(&li->lock);
 	if (inti->type == KVM_S390_PROGRAM_INT)
@@ -1003,7 +1002,6 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 		li->action_bits |= ACTION_STOP_ON_STOP;
 	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
 	spin_unlock(&li->lock);
-	mutex_unlock(&vcpu->kvm->lock);
 	kvm_s390_vcpu_wakeup(vcpu);
 	return 0;
 }
-- 
cgit v1.2.3


From 55dbbdd9a832b3c0546a65df155d9e6eee8c312e Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 30 Apr 2014 14:44:44 +0200
Subject: KVM: s390/mm: readd address parameter to pgste_ipte_notify

Revert git commit 1b7fd6952063 ("remove unecessary parameter from
pgste_ipte_notify")

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index b76317c1f3eb..4a649888cb29 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -859,6 +859,7 @@ int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
 void gmap_do_ipte_notify(struct mm_struct *, pte_t *);
 
 static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+					unsigned long addr,
 					pte_t *ptep, pgste_t pgste)
 {
 #ifdef CONFIG_PGSTE
@@ -1110,7 +1111,7 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
 	pgste_val(pgste) &= ~PGSTE_UC_BIT;
 	pte = *ptep;
 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
-		pgste = pgste_ipte_notify(mm, ptep, pgste);
+		pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
 		__ptep_ipte(addr, ptep);
 		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
 			pte_val(pte) |= _PAGE_PROTECT;
@@ -1132,7 +1133,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 
 	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+		pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
 	}
 
 	pte = *ptep;
@@ -1178,7 +1179,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(mm, ptep, pgste);
+		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 	}
 
 	pte = *ptep;
@@ -1202,7 +1203,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
 
 	if (mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste_ipte_notify(mm, ptep, pgste);
+		pgste_ipte_notify(mm, address, ptep, pgste);
 	}
 
 	pte = *ptep;
@@ -1239,7 +1240,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
 
 	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+		pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
 	}
 
 	pte = *ptep;
@@ -1273,7 +1274,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 
 	if (!full && mm_has_pgste(mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(mm, ptep, pgste);
+		pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 	}
 
 	pte = *ptep;
@@ -1298,7 +1299,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
 	if (pte_write(pte)) {
 		if (mm_has_pgste(mm)) {
 			pgste = pgste_get_lock(ptep);
-			pgste = pgste_ipte_notify(mm, ptep, pgste);
+			pgste = pgste_ipte_notify(mm, address, ptep, pgste);
 		}
 
 		ptep_flush_lazy(mm, address, ptep);
@@ -1324,7 +1325,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
 		return 0;
 	if (mm_has_pgste(vma->vm_mm)) {
 		pgste = pgste_get_lock(ptep);
-		pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+		pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
 	}
 
 	ptep_flush_direct(vma->vm_mm, address, ptep);
-- 
cgit v1.2.3


From 9da4e3807657f3bcd12cfbb5671d80794303dde2 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 30 Apr 2014 14:46:26 +0200
Subject: KVM: s390/mm: readd address parameter to gmap_do_ipte_notify

Revert git commit c3a23b9874c1 ("remove unnecessary parameter from
gmap_do_ipte_notify").

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 4 ++--
 arch/s390/mm/pgtable.c          | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 4a649888cb29..4cd91ac42f46 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -856,7 +856,7 @@ bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
 void gmap_register_ipte_notifier(struct gmap_notifier *);
 void gmap_unregister_ipte_notifier(struct gmap_notifier *);
 int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
-void gmap_do_ipte_notify(struct mm_struct *, pte_t *);
+void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
 
 static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
 					unsigned long addr,
@@ -865,7 +865,7 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
 #ifdef CONFIG_PGSTE
 	if (pgste_val(pgste) & PGSTE_IN_BIT) {
 		pgste_val(pgste) &= ~PGSTE_IN_BIT;
-		gmap_do_ipte_notify(mm, ptep);
+		gmap_do_ipte_notify(mm, addr, ptep);
 	}
 #endif
 	return pgste;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5404a6261db9..c09820dce81c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -809,12 +809,13 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify);
 /**
  * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
  * @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
  * @pte: pointer to the page table entry
  *
  * This function is assumed to be called with the page table lock held
  * for the pte to notify.
  */
-void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
+void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
 {
 	unsigned long segment_offset;
 	struct gmap_notifier *nb;
-- 
cgit v1.2.3


From 6e0a0431bf7d90ed0b8a0a974ad219617a70cc22 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 29 Apr 2014 09:34:41 +0200
Subject: KVM: s390/mm: cleanup gmap function arguments, variable names

Make the order of arguments for the gmap calls more consistent,
if the gmap pointer is passed it is always the first argument.
In addition distinguish between guest address and user address
by naming the variables gaddr for a guest address and vmaddr for
a user address.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h |  14 ++---
 arch/s390/kvm/diag.c            |   8 +--
 arch/s390/kvm/interrupt.c       |   2 +-
 arch/s390/kvm/kvm-s390.c        |   4 +-
 arch/s390/kvm/priv.c            |   2 +-
 arch/s390/mm/fault.c            |   2 +-
 arch/s390/mm/pgtable.c          | 110 ++++++++++++++++++++--------------------
 7 files changed, 72 insertions(+), 70 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 4cd91ac42f46..d95012f9e77f 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -834,7 +834,7 @@ struct gmap_pgtable {
  */
 struct gmap_notifier {
 	struct list_head list;
-	void (*notifier_call)(struct gmap *gmap, unsigned long address);
+	void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
 };
 
 struct gmap *gmap_alloc(struct mm_struct *mm);
@@ -844,12 +844,12 @@ void gmap_disable(struct gmap *gmap);
 int gmap_map_segment(struct gmap *gmap, unsigned long from,
 		     unsigned long to, unsigned long len);
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
-unsigned long __gmap_translate(unsigned long address, struct gmap *);
-unsigned long gmap_translate(unsigned long address, struct gmap *);
-unsigned long __gmap_fault(unsigned long address, struct gmap *);
-unsigned long gmap_fault(unsigned long address, struct gmap *);
-void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
-void __gmap_zap(unsigned long address, struct gmap *);
+unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long __gmap_fault(struct gmap *, unsigned long gaddr);
+unsigned long gmap_fault(struct gmap *, unsigned long gaddr);
+void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+void __gmap_zap(struct gmap *, unsigned long gaddr);
 bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
 
 
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 59bd8f991b98..b374b6cb7785 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -37,13 +37,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 
 	/* we checked for start > end above */
 	if (end < prefix || start >= prefix + 2 * PAGE_SIZE) {
-		gmap_discard(start, end, vcpu->arch.gmap);
+		gmap_discard(vcpu->arch.gmap, start, end);
 	} else {
 		if (start < prefix)
-			gmap_discard(start, prefix, vcpu->arch.gmap);
+			gmap_discard(vcpu->arch.gmap, start, prefix);
 		if (end >= prefix)
-			gmap_discard(prefix + 2 * PAGE_SIZE,
-				     end, vcpu->arch.gmap);
+			gmap_discard(vcpu->arch.gmap,
+				     prefix + 2 * PAGE_SIZE, end);
 	}
 	return 0;
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index ba89bbbd2ed5..60a5cf40d49a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1241,7 +1241,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
 	}
 	INIT_LIST_HEAD(&map->list);
 	map->guest_addr = addr;
-	map->addr = gmap_translate(addr, kvm->arch.gmap);
+	map->addr = gmap_translate(kvm->arch.gmap, addr);
 	if (map->addr == -EFAULT) {
 		ret = -EFAULT;
 		goto out;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c2caa175320c..5c877c8e4494 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1096,7 +1096,7 @@ long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
 	hva_t hva;
 	long rc;
 
-	hva = gmap_fault(gpa, vcpu->arch.gmap);
+	hva = gmap_fault(vcpu->arch.gmap, gpa);
 	if (IS_ERR_VALUE(hva))
 		return (long)hva;
 	down_read(&mm->mmap_sem);
@@ -1683,7 +1683,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	}
 #endif
 	case KVM_S390_VCPU_FAULT: {
-		r = gmap_fault(arg, vcpu->arch.gmap);
+		r = gmap_fault(vcpu->arch.gmap, arg);
 		if (!IS_ERR_VALUE(r))
 			r = 0;
 		break;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index d806f2cfde16..72bb2dd8b9cd 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -729,7 +729,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 			/* invalid entry */
 			break;
 		/* try to free backing */
-		__gmap_zap(cbrle, gmap);
+		__gmap_zap(gmap, cbrle);
 	}
 	up_read(&gmap->mm->mmap_sem);
 	if (i < entries)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3f3b35403d0a..4880399d040e 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -445,7 +445,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
 	gmap = (struct gmap *)
 		((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
 	if (gmap) {
-		address = __gmap_fault(address, gmap);
+		address = __gmap_fault(gmap, address);
 		if (address == -EFAULT) {
 			fault = VM_FAULT_BADMAP;
 			goto out_up;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index c09820dce81c..16ca8617f2e1 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -295,7 +295,7 @@ static int gmap_alloc_table(struct gmap *gmap,
 /**
  * gmap_unmap_segment - unmap segment from the guest address space
  * @gmap: pointer to the guest address space structure
- * @addr: address in the guest address space
+ * @to: address in the guest address space
  * @len: length of the memory area to unmap
  *
  * Returns 0 if the unmap succeeded, -EINVAL if not.
@@ -348,6 +348,7 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
  * @gmap: pointer to the guest address space structure
  * @from: source address in the parent address space
  * @to: target address in the guest address space
+ * @len: length of the memory area to map
  *
  * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
  */
@@ -405,30 +406,30 @@ out_unmap:
 }
 EXPORT_SYMBOL_GPL(gmap_map_segment);
 
-static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
+static unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr)
 {
 	unsigned long *table;
 
-	table = gmap->table + ((address >> 53) & 0x7ff);
+	table = gmap->table + ((gaddr >> 53) & 0x7ff);
 	if (unlikely(*table & _REGION_ENTRY_INVALID))
 		return ERR_PTR(-EFAULT);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	table = table + ((address >> 42) & 0x7ff);
+	table = table + ((gaddr >> 42) & 0x7ff);
 	if (unlikely(*table & _REGION_ENTRY_INVALID))
 		return ERR_PTR(-EFAULT);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	table = table + ((address >> 31) & 0x7ff);
+	table = table + ((gaddr >> 31) & 0x7ff);
 	if (unlikely(*table & _REGION_ENTRY_INVALID))
 		return ERR_PTR(-EFAULT);
 	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	table = table + ((address >> 20) & 0x7ff);
+	table = table + ((gaddr >> 20) & 0x7ff);
 	return table;
 }
 
 /**
  * __gmap_translate - translate a guest address to a user space address
- * @address: guest address
  * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
  *
  * Returns user space address which corresponds to the guest address or
  * -EFAULT if no such mapping exists.
@@ -436,14 +437,14 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
  * The mmap_sem of the mm that belongs to the address space must be held
  * when this function gets called.
  */
-unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
 {
 	unsigned long *segment_ptr, vmaddr, segment;
 	struct gmap_pgtable *mp;
 	struct page *page;
 
-	current->thread.gmap_addr = address;
-	segment_ptr = gmap_table_walk(address, gmap);
+	current->thread.gmap_addr = gaddr;
+	segment_ptr = gmap_table_walk(gmap, gaddr);
 	if (IS_ERR(segment_ptr))
 		return PTR_ERR(segment_ptr);
 	/* Convert the gmap address to an mm address. */
@@ -451,10 +452,10 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
 	if (!(segment & _SEGMENT_ENTRY_INVALID)) {
 		page = pfn_to_page(segment >> PAGE_SHIFT);
 		mp = (struct gmap_pgtable *) page->index;
-		return mp->vmaddr | (address & ~PMD_MASK);
+		return mp->vmaddr | (gaddr & ~PMD_MASK);
 	} else if (segment & _SEGMENT_ENTRY_PROTECT) {
 		vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
-		return vmaddr | (address & ~PMD_MASK);
+		return vmaddr | (gaddr & ~PMD_MASK);
 	}
 	return -EFAULT;
 }
@@ -462,26 +463,27 @@ EXPORT_SYMBOL_GPL(__gmap_translate);
 
 /**
  * gmap_translate - translate a guest address to a user space address
- * @address: guest address
  * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
  *
  * Returns user space address which corresponds to the guest address or
  * -EFAULT if no such mapping exists.
  * This function does not establish potentially missing page table entries.
  */
-unsigned long gmap_translate(unsigned long address, struct gmap *gmap)
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
 {
 	unsigned long rc;
 
 	down_read(&gmap->mm->mmap_sem);
-	rc = __gmap_translate(address, gmap);
+	rc = __gmap_translate(gmap, gaddr);
 	up_read(&gmap->mm->mmap_sem);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(gmap_translate);
 
-static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
-				unsigned long *segment_ptr, struct gmap *gmap)
+static int gmap_connect_pgtable(struct gmap *gmap, unsigned long gaddr,
+				unsigned long segment,
+				unsigned long *segment_ptr)
 {
 	unsigned long vmaddr;
 	struct vm_area_struct *vma;
@@ -521,7 +523,7 @@ static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
 	mp = (struct gmap_pgtable *) page->index;
 	rmap->gmap = gmap;
 	rmap->entry = segment_ptr;
-	rmap->vmaddr = address & PMD_MASK;
+	rmap->vmaddr = gaddr & PMD_MASK;
 	spin_lock(&mm->page_table_lock);
 	if (*segment_ptr == segment) {
 		list_add(&rmap->list, &mp->mapper);
@@ -560,15 +562,15 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
 /*
  * this function is assumed to be called with mmap_sem held
  */
-unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
+unsigned long __gmap_fault(struct gmap *gmap, unsigned long gaddr)
 {
 	unsigned long *segment_ptr, segment;
 	struct gmap_pgtable *mp;
 	struct page *page;
 	int rc;
 
-	current->thread.gmap_addr = address;
-	segment_ptr = gmap_table_walk(address, gmap);
+	current->thread.gmap_addr = gaddr;
+	segment_ptr = gmap_table_walk(gmap, gaddr);
 	if (IS_ERR(segment_ptr))
 		return -EFAULT;
 	/* Convert the gmap address to an mm address. */
@@ -578,24 +580,24 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
 			/* Page table is present */
 			page = pfn_to_page(segment >> PAGE_SHIFT);
 			mp = (struct gmap_pgtable *) page->index;
-			return mp->vmaddr | (address & ~PMD_MASK);
+			return mp->vmaddr | (gaddr & ~PMD_MASK);
 		}
 		if (!(segment & _SEGMENT_ENTRY_PROTECT))
 			/* Nothing mapped in the gmap address space. */
 			break;
-		rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
+		rc = gmap_connect_pgtable(gmap, gaddr, segment, segment_ptr);
 		if (rc)
 			return rc;
 	}
 	return -EFAULT;
 }
 
-unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
+unsigned long gmap_fault(struct gmap *gmap, unsigned long gaddr)
 {
 	unsigned long rc;
 
 	down_read(&gmap->mm->mmap_sem);
-	rc = __gmap_fault(address, gmap);
+	rc = __gmap_fault(gmap, gaddr);
 	up_read(&gmap->mm->mmap_sem);
 
 	return rc;
@@ -620,14 +622,14 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
 /**
  * The mm->mmap_sem lock must be held
  */
-static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
+static void gmap_zap_unused(struct mm_struct *mm, unsigned long vmaddr)
 {
 	unsigned long ptev, pgstev;
 	spinlock_t *ptl;
 	pgste_t pgste;
 	pte_t *ptep, pte;
 
-	ptep = get_locked_pte(mm, address, &ptl);
+	ptep = get_locked_pte(mm, vmaddr, &ptl);
 	if (unlikely(!ptep))
 		return;
 	pte = *ptep;
@@ -640,7 +642,7 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
 	if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
 	    ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
 		gmap_zap_swap_entry(pte_to_swp_entry(pte), mm);
-		pte_clear(mm, address, ptep);
+		pte_clear(mm, vmaddr, ptep);
 	}
 	pgste_set_unlock(ptep, pgste);
 out_pte:
@@ -650,14 +652,14 @@ out_pte:
 /*
  * this function is assumed to be called with mmap_sem held
  */
-void __gmap_zap(unsigned long address, struct gmap *gmap)
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
 {
 	unsigned long *table, *segment_ptr;
-	unsigned long segment, pgstev, ptev;
+	unsigned long segment, vmaddr, pgstev, ptev;
 	struct gmap_pgtable *mp;
 	struct page *page;
 
-	segment_ptr = gmap_table_walk(address, gmap);
+	segment_ptr = gmap_table_walk(gmap, gaddr);
 	if (IS_ERR(segment_ptr))
 		return;
 	segment = *segment_ptr;
@@ -665,61 +667,61 @@ void __gmap_zap(unsigned long address, struct gmap *gmap)
 		return;
 	page = pfn_to_page(segment >> PAGE_SHIFT);
 	mp = (struct gmap_pgtable *) page->index;
-	address = mp->vmaddr | (address & ~PMD_MASK);
+	vmaddr = mp->vmaddr | (gaddr & ~PMD_MASK);
 	/* Page table is present */
 	table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
-	table = table + ((address >> 12) & 0xff);
+	table = table + ((vmaddr >> 12) & 0xff);
 	pgstev = table[PTRS_PER_PTE];
 	ptev = table[0];
 	/* quick check, checked again with locks held */
 	if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
 	    ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
-		gmap_zap_unused(gmap->mm, address);
+		gmap_zap_unused(gmap->mm, vmaddr);
 }
 EXPORT_SYMBOL_GPL(__gmap_zap);
 
-void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
 {
 
-	unsigned long *table, address, size;
+	unsigned long *table, gaddr, size;
 	struct vm_area_struct *vma;
 	struct gmap_pgtable *mp;
 	struct page *page;
 
 	down_read(&gmap->mm->mmap_sem);
-	address = from;
-	while (address < to) {
+	gaddr = from;
+	while (gaddr < to) {
 		/* Walk the gmap address space page table */
-		table = gmap->table + ((address >> 53) & 0x7ff);
+		table = gmap->table + ((gaddr >> 53) & 0x7ff);
 		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
-			address = (address + PMD_SIZE) & PMD_MASK;
+			gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
 			continue;
 		}
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + ((address >> 42) & 0x7ff);
+		table = table + ((gaddr >> 42) & 0x7ff);
 		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
-			address = (address + PMD_SIZE) & PMD_MASK;
+			gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
 			continue;
 		}
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + ((address >> 31) & 0x7ff);
+		table = table + ((gaddr >> 31) & 0x7ff);
 		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
-			address = (address + PMD_SIZE) & PMD_MASK;
+			gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
 			continue;
 		}
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + ((address >> 20) & 0x7ff);
+		table = table + ((gaddr >> 20) & 0x7ff);
 		if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
-			address = (address + PMD_SIZE) & PMD_MASK;
+			gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
 			continue;
 		}
 		page = pfn_to_page(*table >> PAGE_SHIFT);
 		mp = (struct gmap_pgtable *) page->index;
 		vma = find_vma(gmap->mm, mp->vmaddr);
-		size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
-		zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
+		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+		zap_page_range(vma, mp->vmaddr | (gaddr & ~PMD_MASK),
 			       size, NULL);
-		address = (address + PMD_SIZE) & PMD_MASK;
+		gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
 	}
 	up_read(&gmap->mm->mmap_sem);
 }
@@ -755,7 +757,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
 /**
  * gmap_ipte_notify - mark a range of ptes for invalidation notification
  * @gmap: pointer to guest mapping meta data structure
- * @start: virtual address in the guest address space
+ * @gaddr: virtual address in the guest address space
  * @len: size of area
  *
  * Returns 0 if for each page in the given range a gmap mapping exists and
@@ -763,7 +765,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
  * for one or more pages -EFAULT is returned. If no memory could be allocated
  * -ENOMEM is returned. This function establishes missing page table entries.
  */
-int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
+int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
 {
 	unsigned long addr;
 	spinlock_t *ptl;
@@ -771,12 +773,12 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
 	pgste_t pgste;
 	int rc = 0;
 
-	if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK))
+	if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
 		return -EINVAL;
 	down_read(&gmap->mm->mmap_sem);
 	while (len) {
 		/* Convert gmap address and connect the page tables */
-		addr = __gmap_fault(start, gmap);
+		addr = __gmap_fault(gmap, gaddr);
 		if (IS_ERR_VALUE(addr)) {
 			rc = addr;
 			break;
@@ -796,7 +798,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
 			pgste = pgste_get_lock(ptep);
 			pgste_val(pgste) |= PGSTE_IN_BIT;
 			pgste_set_unlock(ptep, pgste);
-			start += PAGE_SIZE;
+			gaddr += PAGE_SIZE;
 			len -= PAGE_SIZE;
 		}
 		spin_unlock(ptl);
-- 
cgit v1.2.3


From 54ad89b05ec49b90790de814647b244d3d2cc5ca Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 25 Aug 2014 16:08:21 +0200
Subject: kvm: x86: fix tracing for 32-bit

Fix commit 7b46268d29543e313e731606d845e65c17f232e4, which mistakenly
included the new tracepoint under #ifdef CONFIG_X86_64.

Reported-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/trace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 1742dfbd26b3..4c2868f36808 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -848,6 +848,8 @@ TRACE_EVENT(kvm_track_tsc,
 		  __print_symbolic(__entry->host_clock, host_clocks))
 );
 
+#endif /* CONFIG_X86_64 */
+
 TRACE_EVENT(kvm_ple_window,
 	TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
 	TP_ARGS(grow, vcpu_id, new, old),
@@ -878,8 +880,6 @@ TRACE_EVENT(kvm_ple_window,
 #define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
 	trace_kvm_ple_window(false, vcpu_id, new, old)
 
-#endif /* CONFIG_X86_64 */
-
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v1.2.3


From 527e30b41d8b86e9ae7f5b740de416958c0e574e Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 30 Apr 2014 16:04:25 +0200
Subject: KVM: s390/mm: use radix trees for guest to host mappings

Store the target address for the gmap segments in a radix tree
instead of using invalid segment table entries. gmap_translate
becomes a simple radix_tree_lookup, gmap_fault is split into the
address translation with gmap_translate and the part that does
the linking of the gmap shadow page table with the process page
table.
A second radix tree is used to keep the pointers to the segment
table entries for segments that are mapped in the guest address
space. On unmap of a segment the pointer is retrieved from the
radix tree and is used to carry out the segment invalidation in
the gmap shadow page table. As the radix tree can only store one
pointer, each host segment may only be mapped to exactly one
guest location.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgalloc.h |   8 +-
 arch/s390/include/asm/pgtable.h |  15 +-
 arch/s390/include/asm/tlb.h     |   2 +-
 arch/s390/kvm/kvm-s390.c        |  18 +-
 arch/s390/mm/fault.c            |  25 +-
 arch/s390/mm/pgtable.c          | 621 +++++++++++++++++-----------------------
 arch/s390/mm/vmem.c             |   2 +-
 7 files changed, 308 insertions(+), 383 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 9e18a61d3df3..d39a31c3cdf2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -18,9 +18,9 @@
 unsigned long *crst_table_alloc(struct mm_struct *);
 void crst_table_free(struct mm_struct *, unsigned long *);
 
-unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
+unsigned long *page_table_alloc(struct mm_struct *);
 void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mmu_gather *, unsigned long *);
+void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
 
 void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
 			    bool init_skey);
@@ -145,8 +145,8 @@ static inline void pmd_populate(struct mm_struct *mm,
 /*
  * page table entry allocation/free routines.
  */
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
+#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
 
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index d95012f9e77f..9bfdbca14f95 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -30,6 +30,7 @@
 #include <linux/sched.h>
 #include <linux/mm_types.h>
 #include <linux/page-flags.h>
+#include <linux/radix-tree.h>
 #include <asm/bug.h>
 #include <asm/page.h>
 
@@ -789,19 +790,25 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
 
 /**
  * struct gmap_struct - guest address space
+ * @crst_list: list of all crst tables used in the guest address space
  * @mm: pointer to the parent mm_struct
+ * @guest_to_host: radix tree with guest to host address translation
+ * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_table_lock: spinlock to protect all entries in the guest page table
  * @table: pointer to the page directory
  * @asce: address space control element for gmap page table
- * @crst_list: list of all crst tables used in the guest address space
  * @pfault_enabled: defines if pfaults are applicable for the guest
  */
 struct gmap {
 	struct list_head list;
+	struct list_head crst_list;
 	struct mm_struct *mm;
+	struct radix_tree_root guest_to_host;
+	struct radix_tree_root host_to_guest;
+	spinlock_t guest_table_lock;
 	unsigned long *table;
 	unsigned long asce;
 	void *private;
-	struct list_head crst_list;
 	bool pfault_enabled;
 };
 
@@ -846,8 +853,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
 unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
 unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
-unsigned long __gmap_fault(struct gmap *, unsigned long gaddr);
-unsigned long gmap_fault(struct gmap *, unsigned long gaddr);
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
 void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
 void __gmap_zap(struct gmap *, unsigned long gaddr);
 bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index a25f09fbaf36..572c59949004 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -105,7 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 				unsigned long address)
 {
-	page_table_free_rcu(tlb, (unsigned long *) pte);
+	page_table_free_rcu(tlb, (unsigned long *) pte, address);
 }
 
 /*
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5c877c8e4494..543c24baf1eb 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1092,18 +1092,8 @@ retry:
  */
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
 {
-	struct mm_struct *mm = current->mm;
-	hva_t hva;
-	long rc;
-
-	hva = gmap_fault(vcpu->arch.gmap, gpa);
-	if (IS_ERR_VALUE(hva))
-		return (long)hva;
-	down_read(&mm->mmap_sem);
-	rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
-	up_read(&mm->mmap_sem);
-
-	return rc < 0 ? rc : 0;
+	return gmap_fault(vcpu->arch.gmap, gpa,
+			  writable ? FAULT_FLAG_WRITE : 0);
 }
 
 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
@@ -1683,9 +1673,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	}
 #endif
 	case KVM_S390_VCPU_FAULT: {
-		r = gmap_fault(vcpu->arch.gmap, arg);
-		if (!IS_ERR_VALUE(r))
-			r = 0;
+		r = gmap_fault(vcpu->arch.gmap, arg, 0);
 		break;
 	}
 	case KVM_ENABLE_CAP:
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 4880399d040e..a2b81d6ce8a5 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -442,18 +442,15 @@ static inline int do_exception(struct pt_regs *regs, int access)
 	down_read(&mm->mmap_sem);
 
 #ifdef CONFIG_PGSTE
-	gmap = (struct gmap *)
-		((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
+	gmap = (current->flags & PF_VCPU) ?
+		(struct gmap *) S390_lowcore.gmap : NULL;
 	if (gmap) {
-		address = __gmap_fault(gmap, address);
+		current->thread.gmap_addr = address;
+		address = __gmap_translate(gmap, address);
 		if (address == -EFAULT) {
 			fault = VM_FAULT_BADMAP;
 			goto out_up;
 		}
-		if (address == -ENOMEM) {
-			fault = VM_FAULT_OOM;
-			goto out_up;
-		}
 		if (gmap->pfault_enabled)
 			flags |= FAULT_FLAG_RETRY_NOWAIT;
 	}
@@ -530,6 +527,20 @@ retry:
 			goto retry;
 		}
 	}
+#ifdef CONFIG_PGSTE
+	if (gmap) {
+		address =  __gmap_link(gmap, current->thread.gmap_addr,
+				       address);
+		if (address == -EFAULT) {
+			fault = VM_FAULT_BADMAP;
+			goto out_up;
+		}
+		if (address == -ENOMEM) {
+			fault = VM_FAULT_OOM;
+			goto out_up;
+		}
+	}
+#endif
 	fault = 0;
 out_up:
 	up_read(&mm->mmap_sem);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 16ca8617f2e1..74dfd9eaa300 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -158,17 +158,23 @@ struct gmap *gmap_alloc(struct mm_struct *mm)
 	if (!gmap)
 		goto out;
 	INIT_LIST_HEAD(&gmap->crst_list);
+	INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
+	INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+	spin_lock_init(&gmap->guest_table_lock);
 	gmap->mm = mm;
 	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
 	if (!page)
 		goto out_free;
+	page->index = 0;
 	list_add(&page->lru, &gmap->crst_list);
 	table = (unsigned long *) page_to_phys(page);
 	crst_table_init(table, _REGION1_ENTRY_EMPTY);
 	gmap->table = table;
 	gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
 		     _ASCE_USER_BITS | __pa(table);
+	down_write(&mm->mmap_sem);
 	list_add(&gmap->list, &mm->context.gmap_list);
+	up_write(&mm->mmap_sem);
 	return gmap;
 
 out_free:
@@ -178,27 +184,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(gmap_alloc);
 
-static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
-{
-	struct gmap_pgtable *mp;
-	struct gmap_rmap *rmap;
-	struct page *page;
-
-	if (*table & _SEGMENT_ENTRY_INVALID)
-		return 0;
-	page = pfn_to_page(*table >> PAGE_SHIFT);
-	mp = (struct gmap_pgtable *) page->index;
-	list_for_each_entry(rmap, &mp->mapper, list) {
-		if (rmap->entry != table)
-			continue;
-		list_del(&rmap->list);
-		kfree(rmap);
-		break;
-	}
-	*table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
-	return 1;
-}
-
 static void gmap_flush_tlb(struct gmap *gmap)
 {
 	if (MACHINE_HAS_IDTE)
@@ -208,6 +193,30 @@ static void gmap_flush_tlb(struct gmap *gmap)
 		__tlb_flush_global();
 }
 
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+	struct radix_tree_iter iter;
+	unsigned long indices[16];
+	unsigned long index;
+	void **slot;
+	int i, nr;
+
+	/* A radix tree is freed by deleting all of its entries */
+	index = 0;
+	do {
+		nr = 0;
+		radix_tree_for_each_slot(slot, root, &iter, index) {
+			indices[nr] = iter.index;
+			if (++nr == 16)
+				break;
+		}
+		for (i = 0; i < nr; i++) {
+			index = indices[i];
+			radix_tree_delete(root, index);
+		}
+	} while (nr > 0);
+}
+
 /**
  * gmap_free - free a guest address space
  * @gmap: pointer to the guest address space structure
@@ -215,9 +224,6 @@ static void gmap_flush_tlb(struct gmap *gmap)
 void gmap_free(struct gmap *gmap)
 {
 	struct page *page, *next;
-	unsigned long *table;
-	int i;
-
 
 	/* Flush tlb. */
 	if (MACHINE_HAS_IDTE)
@@ -227,19 +233,13 @@ void gmap_free(struct gmap *gmap)
 		__tlb_flush_global();
 
 	/* Free all segment & region tables. */
-	down_read(&gmap->mm->mmap_sem);
-	spin_lock(&gmap->mm->page_table_lock);
-	list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
-		table = (unsigned long *) page_to_phys(page);
-		if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
-			/* Remove gmap rmap structures for segment table. */
-			for (i = 0; i < PTRS_PER_PMD; i++, table++)
-				gmap_unlink_segment(gmap, table);
+	list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
 		__free_pages(page, ALLOC_ORDER);
-	}
-	spin_unlock(&gmap->mm->page_table_lock);
-	up_read(&gmap->mm->mmap_sem);
+	gmap_radix_tree_free(&gmap->guest_to_host);
+	gmap_radix_tree_free(&gmap->host_to_guest);
+	down_write(&gmap->mm->mmap_sem);
 	list_del(&gmap->list);
+	up_write(&gmap->mm->mmap_sem);
 	kfree(gmap);
 }
 EXPORT_SYMBOL_GPL(gmap_free);
@@ -267,31 +267,87 @@ EXPORT_SYMBOL_GPL(gmap_disable);
 /*
  * gmap_alloc_table is assumed to be called with mmap_sem held
  */
-static int gmap_alloc_table(struct gmap *gmap,
-			    unsigned long *table, unsigned long init)
-	__releases(&gmap->mm->page_table_lock)
-	__acquires(&gmap->mm->page_table_lock)
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+			    unsigned long init, unsigned long gaddr)
 {
 	struct page *page;
 	unsigned long *new;
 
 	/* since we dont free the gmap table until gmap_free we can unlock */
-	spin_unlock(&gmap->mm->page_table_lock);
 	page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
-	spin_lock(&gmap->mm->page_table_lock);
 	if (!page)
 		return -ENOMEM;
 	new = (unsigned long *) page_to_phys(page);
 	crst_table_init(new, init);
+	spin_lock(&gmap->mm->page_table_lock);
 	if (*table & _REGION_ENTRY_INVALID) {
 		list_add(&page->lru, &gmap->crst_list);
 		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
 			(*table & _REGION_ENTRY_TYPE_MASK);
-	} else
+		page->index = gaddr;
+		page = NULL;
+	}
+	spin_unlock(&gmap->mm->page_table_lock);
+	if (page)
 		__free_pages(page, ALLOC_ORDER);
 	return 0;
 }
 
+/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+	struct page *page;
+	unsigned long offset;
+
+	offset = (unsigned long) entry / sizeof(unsigned long);
+	offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+	page = pmd_to_page((pmd_t *) entry);
+	return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+	unsigned long *entry;
+	int flush = 0;
+
+	spin_lock(&gmap->guest_table_lock);
+	entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+	if (entry) {
+		flush = (*entry != _SEGMENT_ENTRY_INVALID);
+		*entry = _SEGMENT_ENTRY_INVALID;
+	}
+	spin_unlock(&gmap->guest_table_lock);
+	return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long vmaddr;
+
+	vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+						   gaddr >> PMD_SHIFT);
+	return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
 /**
  * gmap_unmap_segment - unmap segment from the guest address space
  * @gmap: pointer to the guest address space structure
@@ -302,7 +358,6 @@ static int gmap_alloc_table(struct gmap *gmap,
  */
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
 {
-	unsigned long *table;
 	unsigned long off;
 	int flush;
 
@@ -312,31 +367,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
 		return -EINVAL;
 
 	flush = 0;
-	down_read(&gmap->mm->mmap_sem);
-	spin_lock(&gmap->mm->page_table_lock);
-	for (off = 0; off < len; off += PMD_SIZE) {
-		/* Walk the guest addr space page table */
-		table = gmap->table + (((to + off) >> 53) & 0x7ff);
-		if (*table & _REGION_ENTRY_INVALID)
-			goto out;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + (((to + off) >> 42) & 0x7ff);
-		if (*table & _REGION_ENTRY_INVALID)
-			goto out;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + (((to + off) >> 31) & 0x7ff);
-		if (*table & _REGION_ENTRY_INVALID)
-			goto out;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + (((to + off) >> 20) & 0x7ff);
-
-		/* Clear segment table entry in guest address space. */
-		flush |= gmap_unlink_segment(gmap, table);
-		*table = _SEGMENT_ENTRY_INVALID;
-	}
-out:
-	spin_unlock(&gmap->mm->page_table_lock);
-	up_read(&gmap->mm->mmap_sem);
+	down_write(&gmap->mm->mmap_sem);
+	for (off = 0; off < len; off += PMD_SIZE)
+		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+	up_write(&gmap->mm->mmap_sem);
 	if (flush)
 		gmap_flush_tlb(gmap);
 	return 0;
@@ -355,7 +389,6 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
 int gmap_map_segment(struct gmap *gmap, unsigned long from,
 		     unsigned long to, unsigned long len)
 {
-	unsigned long *table;
 	unsigned long off;
 	int flush;
 
@@ -366,66 +399,26 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 		return -EINVAL;
 
 	flush = 0;
-	down_read(&gmap->mm->mmap_sem);
-	spin_lock(&gmap->mm->page_table_lock);
+	down_write(&gmap->mm->mmap_sem);
 	for (off = 0; off < len; off += PMD_SIZE) {
-		/* Walk the gmap address space page table */
-		table = gmap->table + (((to + off) >> 53) & 0x7ff);
-		if ((*table & _REGION_ENTRY_INVALID) &&
-		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
-			goto out_unmap;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + (((to + off) >> 42) & 0x7ff);
-		if ((*table & _REGION_ENTRY_INVALID) &&
-		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
-			goto out_unmap;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + (((to + off) >> 31) & 0x7ff);
-		if ((*table & _REGION_ENTRY_INVALID) &&
-		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
-			goto out_unmap;
-		table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
-		table = table + (((to + off) >> 20) & 0x7ff);
-
-		/* Store 'from' address in an invalid segment table entry. */
-		flush |= gmap_unlink_segment(gmap, table);
-		*table =  (from + off) | (_SEGMENT_ENTRY_INVALID |
-					  _SEGMENT_ENTRY_PROTECT);
+		/* Remove old translation */
+		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+		/* Store new translation */
+		if (radix_tree_insert(&gmap->guest_to_host,
+				      (to + off) >> PMD_SHIFT,
+				      (void *) from + off))
+			break;
 	}
-	spin_unlock(&gmap->mm->page_table_lock);
-	up_read(&gmap->mm->mmap_sem);
+	up_write(&gmap->mm->mmap_sem);
 	if (flush)
 		gmap_flush_tlb(gmap);
-	return 0;
-
-out_unmap:
-	spin_unlock(&gmap->mm->page_table_lock);
-	up_read(&gmap->mm->mmap_sem);
+	if (off >= len)
+		return 0;
 	gmap_unmap_segment(gmap, to, len);
 	return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(gmap_map_segment);
 
-static unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr)
-{
-	unsigned long *table;
-
-	table = gmap->table + ((gaddr >> 53) & 0x7ff);
-	if (unlikely(*table & _REGION_ENTRY_INVALID))
-		return ERR_PTR(-EFAULT);
-	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	table = table + ((gaddr >> 42) & 0x7ff);
-	if (unlikely(*table & _REGION_ENTRY_INVALID))
-		return ERR_PTR(-EFAULT);
-	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	table = table + ((gaddr >> 31) & 0x7ff);
-	if (unlikely(*table & _REGION_ENTRY_INVALID))
-		return ERR_PTR(-EFAULT);
-	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	table = table + ((gaddr >> 20) & 0x7ff);
-	return table;
-}
-
 /**
  * __gmap_translate - translate a guest address to a user space address
  * @gmap: pointer to guest mapping meta data structure
@@ -439,25 +432,11 @@ static unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr)
  */
 unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
 {
-	unsigned long *segment_ptr, vmaddr, segment;
-	struct gmap_pgtable *mp;
-	struct page *page;
+	unsigned long vmaddr;
 
-	current->thread.gmap_addr = gaddr;
-	segment_ptr = gmap_table_walk(gmap, gaddr);
-	if (IS_ERR(segment_ptr))
-		return PTR_ERR(segment_ptr);
-	/* Convert the gmap address to an mm address. */
-	segment = *segment_ptr;
-	if (!(segment & _SEGMENT_ENTRY_INVALID)) {
-		page = pfn_to_page(segment >> PAGE_SHIFT);
-		mp = (struct gmap_pgtable *) page->index;
-		return mp->vmaddr | (gaddr & ~PMD_MASK);
-	} else if (segment & _SEGMENT_ENTRY_PROTECT) {
-		vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
-		return vmaddr | (gaddr & ~PMD_MASK);
-	}
-	return -EFAULT;
+	vmaddr = (unsigned long)
+		radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+	return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
 }
 EXPORT_SYMBOL_GPL(__gmap_translate);
 
@@ -481,125 +460,124 @@ unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
 }
 EXPORT_SYMBOL_GPL(gmap_translate);
 
-static int gmap_connect_pgtable(struct gmap *gmap, unsigned long gaddr,
-				unsigned long segment,
-				unsigned long *segment_ptr)
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+			unsigned long vmaddr)
+{
+	struct gmap *gmap;
+	int flush;
+
+	list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+		flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+		if (flush)
+			gmap_flush_tlb(gmap);
+	}
+}
+
+/**
+ * gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 {
-	unsigned long vmaddr;
-	struct vm_area_struct *vma;
-	struct gmap_pgtable *mp;
-	struct gmap_rmap *rmap;
 	struct mm_struct *mm;
-	struct page *page;
+	unsigned long *table;
+	spinlock_t *ptl;
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
+	int rc;
 
-	mm = gmap->mm;
-	vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
-	vma = find_vma(mm, vmaddr);
-	if (!vma || vma->vm_start > vmaddr)
-		return -EFAULT;
-	/* Walk the parent mm page table */
-	pgd = pgd_offset(mm, vmaddr);
-	pud = pud_alloc(mm, pgd, vmaddr);
-	if (!pud)
+	/* Create higher level tables in the gmap page table */
+	table = gmap->table + ((gaddr >> 53) & 0x7ff);
+	if ((*table & _REGION_ENTRY_INVALID) &&
+	    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+			     gaddr & 0xffe0000000000000))
 		return -ENOMEM;
-	pmd = pmd_alloc(mm, pud, vmaddr);
-	if (!pmd)
+	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	table = table + ((gaddr >> 42) & 0x7ff);
+	if ((*table & _REGION_ENTRY_INVALID) &&
+	    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+			     gaddr & 0xfffffc0000000000))
 		return -ENOMEM;
-	if (!pmd_present(*pmd) &&
-	    __pte_alloc(mm, vma, pmd, vmaddr))
+	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	table = table + ((gaddr >> 31) & 0x7ff);
+	if ((*table & _REGION_ENTRY_INVALID) &&
+	    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+			     gaddr & 0xffffffff80000000))
 		return -ENOMEM;
+	table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
+	table = table + ((gaddr >> 20) & 0x7ff);
+	/* Walk the parent mm page table */
+	mm = gmap->mm;
+	pgd = pgd_offset(mm, vmaddr);
+	VM_BUG_ON(pgd_none(*pgd));
+	pud = pud_offset(pgd, vmaddr);
+	VM_BUG_ON(pud_none(*pud));
+	pmd = pmd_offset(pud, vmaddr);
+	VM_BUG_ON(pmd_none(*pmd));
 	/* large pmds cannot yet be handled */
 	if (pmd_large(*pmd))
 		return -EFAULT;
-	/* pmd now points to a valid segment table entry. */
-	rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
-	if (!rmap)
-		return -ENOMEM;
 	/* Link gmap segment table entry location to page table. */
-	page = pmd_page(*pmd);
-	mp = (struct gmap_pgtable *) page->index;
-	rmap->gmap = gmap;
-	rmap->entry = segment_ptr;
-	rmap->vmaddr = gaddr & PMD_MASK;
-	spin_lock(&mm->page_table_lock);
-	if (*segment_ptr == segment) {
-		list_add(&rmap->list, &mp->mapper);
-		/* Set gmap segment table entry to page table. */
-		*segment_ptr = pmd_val(*pmd) & PAGE_MASK;
-		rmap = NULL;
-	}
-	spin_unlock(&mm->page_table_lock);
-	kfree(rmap);
-	return 0;
-}
-
-static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
-{
-	struct gmap_rmap *rmap, *next;
-	struct gmap_pgtable *mp;
-	struct page *page;
-	int flush;
-
-	flush = 0;
-	spin_lock(&mm->page_table_lock);
-	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	mp = (struct gmap_pgtable *) page->index;
-	list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
-		*rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
-					     _SEGMENT_ENTRY_PROTECT);
-		list_del(&rmap->list);
-		kfree(rmap);
-		flush = 1;
-	}
-	spin_unlock(&mm->page_table_lock);
-	if (flush)
-		__tlb_flush_global();
+	rc = radix_tree_preload(GFP_KERNEL);
+	if (rc)
+		return rc;
+	ptl = pmd_lock(mm, pmd);
+	spin_lock(&gmap->guest_table_lock);
+	if (*table == _SEGMENT_ENTRY_INVALID) {
+		rc = radix_tree_insert(&gmap->host_to_guest,
+				       vmaddr >> PMD_SHIFT, table);
+		if (!rc)
+			*table = pmd_val(*pmd);
+	} else
+		rc = 0;
+	spin_unlock(&gmap->guest_table_lock);
+	spin_unlock(ptl);
+	radix_tree_preload_end();
+	return rc;
 }
 
-/*
- * this function is assumed to be called with mmap_sem held
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
  */
-unsigned long __gmap_fault(struct gmap *gmap, unsigned long gaddr)
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+	       unsigned int fault_flags)
 {
-	unsigned long *segment_ptr, segment;
-	struct gmap_pgtable *mp;
-	struct page *page;
+	unsigned long vmaddr;
 	int rc;
 
-	current->thread.gmap_addr = gaddr;
-	segment_ptr = gmap_table_walk(gmap, gaddr);
-	if (IS_ERR(segment_ptr))
-		return -EFAULT;
-	/* Convert the gmap address to an mm address. */
-	while (1) {
-		segment = *segment_ptr;
-		if (!(segment & _SEGMENT_ENTRY_INVALID)) {
-			/* Page table is present */
-			page = pfn_to_page(segment >> PAGE_SHIFT);
-			mp = (struct gmap_pgtable *) page->index;
-			return mp->vmaddr | (gaddr & ~PMD_MASK);
-		}
-		if (!(segment & _SEGMENT_ENTRY_PROTECT))
-			/* Nothing mapped in the gmap address space. */
-			break;
-		rc = gmap_connect_pgtable(gmap, gaddr, segment, segment_ptr);
-		if (rc)
-			return rc;
-	}
-	return -EFAULT;
-}
-
-unsigned long gmap_fault(struct gmap *gmap, unsigned long gaddr)
-{
-	unsigned long rc;
-
 	down_read(&gmap->mm->mmap_sem);
-	rc = __gmap_fault(gmap, gaddr);
+	vmaddr = __gmap_translate(gmap, gaddr);
+	if (IS_ERR_VALUE(vmaddr)) {
+		rc = vmaddr;
+		goto out_up;
+	}
+	if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
+		rc = -EFAULT;
+		goto out_up;
+	}
+	rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
 	up_read(&gmap->mm->mmap_sem);
-
 	return rc;
 }
 EXPORT_SYMBOL_GPL(gmap_fault);
@@ -619,17 +597,24 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
 	free_swap_and_cache(entry);
 }
 
-/**
- * The mm->mmap_sem lock must be held
+/*
+ * this function is assumed to be called with mmap_sem held
  */
-static void gmap_zap_unused(struct mm_struct *mm, unsigned long vmaddr)
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
 {
-	unsigned long ptev, pgstev;
+	unsigned long vmaddr, ptev, pgstev;
+	pte_t *ptep, pte;
 	spinlock_t *ptl;
 	pgste_t pgste;
-	pte_t *ptep, pte;
 
-	ptep = get_locked_pte(mm, vmaddr, &ptl);
+	/* Find the vm address for the guest address */
+	vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+						   gaddr >> PMD_SHIFT);
+	if (!vmaddr)
+		return;
+	vmaddr |= gaddr & ~PMD_MASK;
+	/* Get pointer to the page table entry */
+	ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
 	if (unlikely(!ptep))
 		return;
 	pte = *ptep;
@@ -641,87 +626,34 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long vmaddr)
 	ptev = pte_val(pte);
 	if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
 	    ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
-		gmap_zap_swap_entry(pte_to_swp_entry(pte), mm);
-		pte_clear(mm, vmaddr, ptep);
+		gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
+		pte_clear(gmap->mm, vmaddr, ptep);
 	}
 	pgste_set_unlock(ptep, pgste);
 out_pte:
 	pte_unmap_unlock(*ptep, ptl);
 }
-
-/*
- * this function is assumed to be called with mmap_sem held
- */
-void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
-{
-	unsigned long *table, *segment_ptr;
-	unsigned long segment, vmaddr, pgstev, ptev;
-	struct gmap_pgtable *mp;
-	struct page *page;
-
-	segment_ptr = gmap_table_walk(gmap, gaddr);
-	if (IS_ERR(segment_ptr))
-		return;
-	segment = *segment_ptr;
-	if (segment & _SEGMENT_ENTRY_INVALID)
-		return;
-	page = pfn_to_page(segment >> PAGE_SHIFT);
-	mp = (struct gmap_pgtable *) page->index;
-	vmaddr = mp->vmaddr | (gaddr & ~PMD_MASK);
-	/* Page table is present */
-	table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
-	table = table + ((vmaddr >> 12) & 0xff);
-	pgstev = table[PTRS_PER_PTE];
-	ptev = table[0];
-	/* quick check, checked again with locks held */
-	if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
-	    ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
-		gmap_zap_unused(gmap->mm, vmaddr);
-}
 EXPORT_SYMBOL_GPL(__gmap_zap);
 
 void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
 {
-
-	unsigned long *table, gaddr, size;
+	unsigned long gaddr, vmaddr, size;
 	struct vm_area_struct *vma;
-	struct gmap_pgtable *mp;
-	struct page *page;
 
 	down_read(&gmap->mm->mmap_sem);
-	gaddr = from;
-	while (gaddr < to) {
-		/* Walk the gmap address space page table */
-		table = gmap->table + ((gaddr >> 53) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
-			gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
-			continue;
-		}
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + ((gaddr >> 42) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
-			gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
-			continue;
-		}
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + ((gaddr >> 31) & 0x7ff);
-		if (unlikely(*table & _REGION_ENTRY_INVALID)) {
-			gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
-			continue;
-		}
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		table = table + ((gaddr >> 20) & 0x7ff);
-		if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
-			gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
+	for (gaddr = from; gaddr < to;
+	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+		/* Find the vm address for the guest address */
+		vmaddr = (unsigned long)
+			radix_tree_lookup(&gmap->guest_to_host,
+					  gaddr >> PMD_SHIFT);
+		if (!vmaddr)
 			continue;
-		}
-		page = pfn_to_page(*table >> PAGE_SHIFT);
-		mp = (struct gmap_pgtable *) page->index;
-		vma = find_vma(gmap->mm, mp->vmaddr);
+		vmaddr |= gaddr & ~PMD_MASK;
+		/* Find vma in the parent mm */
+		vma = find_vma(gmap->mm, vmaddr);
 		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
-		zap_page_range(vma, mp->vmaddr | (gaddr & ~PMD_MASK),
-			       size, NULL);
-		gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
+		zap_page_range(vma, vmaddr, size, NULL);
 	}
 	up_read(&gmap->mm->mmap_sem);
 }
@@ -778,7 +710,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
 	down_read(&gmap->mm->mmap_sem);
 	while (len) {
 		/* Convert gmap address and connect the page tables */
-		addr = __gmap_fault(gmap, gaddr);
+		addr = __gmap_translate(gmap, gaddr);
 		if (IS_ERR_VALUE(addr)) {
 			rc = addr;
 			break;
@@ -788,6 +720,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
 			rc = -EFAULT;
 			break;
 		}
+		rc = __gmap_link(gmap, gaddr, addr);
+		if (rc)
+			break;
 		/* Walk the process page table, lock and get pte pointer */
 		ptep = get_locked_pte(gmap->mm, addr, &ptl);
 		if (unlikely(!ptep))
@@ -817,23 +752,24 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify);
  * This function is assumed to be called with the page table lock held
  * for the pte to notify.
  */
-void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
+void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
 {
-	unsigned long segment_offset;
+	unsigned long offset, gaddr;
+	unsigned long *table;
 	struct gmap_notifier *nb;
-	struct gmap_pgtable *mp;
-	struct gmap_rmap *rmap;
-	struct page *page;
+	struct gmap *gmap;
 
-	segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
-	segment_offset = segment_offset * (4096 / sizeof(pte_t));
-	page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
-	mp = (struct gmap_pgtable *) page->index;
+	offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+	offset = offset * (4096 / sizeof(pte_t));
 	spin_lock(&gmap_notifier_lock);
-	list_for_each_entry(rmap, &mp->mapper, list) {
+	list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+		table = radix_tree_lookup(&gmap->host_to_guest,
+					  vmaddr >> PMD_SHIFT);
+		if (!table)
+			continue;
+		gaddr = __gmap_segment_gaddr(table) + offset;
 		list_for_each_entry(nb, &gmap_notifier_list, list)
-			nb->notifier_call(rmap->gmap,
-					  rmap->vmaddr + segment_offset);
+			nb->notifier_call(gmap, gaddr);
 	}
 	spin_unlock(&gmap_notifier_lock);
 }
@@ -844,29 +780,18 @@ static inline int page_table_with_pgste(struct page *page)
 	return atomic_read(&page->_mapcount) == 0;
 }
 
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
-						    unsigned long vmaddr)
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
 {
 	struct page *page;
 	unsigned long *table;
-	struct gmap_pgtable *mp;
 
 	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
 	if (!page)
 		return NULL;
-	mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
-	if (!mp) {
-		__free_page(page);
-		return NULL;
-	}
 	if (!pgtable_page_ctor(page)) {
-		kfree(mp);
 		__free_page(page);
 		return NULL;
 	}
-	mp->vmaddr = vmaddr & PMD_MASK;
-	INIT_LIST_HEAD(&mp->mapper);
-	page->index = (unsigned long) mp;
 	atomic_set(&page->_mapcount, 0);
 	table = (unsigned long *) page_to_phys(page);
 	clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
@@ -877,14 +802,10 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
 static inline void page_table_free_pgste(unsigned long *table)
 {
 	struct page *page;
-	struct gmap_pgtable *mp;
 
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	mp = (struct gmap_pgtable *) page->index;
-	BUG_ON(!list_empty(&mp->mapper));
 	pgtable_page_dtor(page);
 	atomic_set(&page->_mapcount, -1);
-	kfree(mp);
 	__free_page(page);
 }
 
@@ -1041,8 +962,7 @@ static inline int page_table_with_pgste(struct page *page)
 	return 0;
 }
 
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
-						    unsigned long vmaddr)
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
 {
 	return NULL;
 }
@@ -1056,8 +976,8 @@ static inline void page_table_free_pgste(unsigned long *table)
 {
 }
 
-static inline void gmap_disconnect_pgtable(struct mm_struct *mm,
-					   unsigned long *table)
+static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+			unsigned long vmaddr)
 {
 }
 
@@ -1077,14 +997,14 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
 /*
  * page table entry allocation/free routines.
  */
-unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
+unsigned long *page_table_alloc(struct mm_struct *mm)
 {
 	unsigned long *uninitialized_var(table);
 	struct page *uninitialized_var(page);
 	unsigned int mask, bit;
 
 	if (mm_has_pgste(mm))
-		return page_table_alloc_pgste(mm, vmaddr);
+		return page_table_alloc_pgste(mm);
 	/* Allocate fragments of a 4K page as 1K/2K page table */
 	spin_lock_bh(&mm->context.list_lock);
 	mask = FRAG_MASK;
@@ -1126,10 +1046,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	unsigned int bit, mask;
 
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	if (page_table_with_pgste(page)) {
-		gmap_disconnect_pgtable(mm, table);
+	if (page_table_with_pgste(page))
 		return page_table_free_pgste(table);
-	}
 	/* Free 1K/2K page table fragment of a 4K page */
 	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
 	spin_lock_bh(&mm->context.list_lock);
@@ -1161,7 +1079,8 @@ static void __page_table_free_rcu(void *table, unsigned bit)
 	}
 }
 
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+			 unsigned long vmaddr)
 {
 	struct mm_struct *mm;
 	struct page *page;
@@ -1170,7 +1089,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
 	mm = tlb->mm;
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	if (page_table_with_pgste(page)) {
-		gmap_disconnect_pgtable(mm, table);
+		gmap_unlink(mm, table, vmaddr);
 		table = (unsigned long *) (__pa(table) | FRAG_MASK);
 		tlb_remove_table(tlb, table);
 		return;
@@ -1306,7 +1225,7 @@ again:
 		if (page_table_with_pgste(page))
 			continue;
 		/* Allocate new page table with pgstes */
-		new = page_table_alloc_pgste(mm, addr);
+		new = page_table_alloc_pgste(mm);
 		if (!new)
 			return -ENOMEM;
 
@@ -1321,7 +1240,7 @@ again:
 			/* Establish new table */
 			pmd_populate(mm, pmd, (pte_t *) new);
 			/* Free old table with rcu, there might be a walker! */
-			page_table_free_rcu(tlb, table);
+			page_table_free_rcu(tlb, table, addr);
 			new = NULL;
 		}
 		spin_unlock(ptl);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index fe9012a49aa5..fdbd7888cb07 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -65,7 +65,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
 	pte_t *pte;
 
 	if (slab_is_available())
-		pte = (pte_t *) page_table_alloc(&init_mm, address);
+		pte = (pte_t *) page_table_alloc(&init_mm);
 	else
 		pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
 					  PTRS_PER_PTE * sizeof(pte_t));
-- 
cgit v1.2.3


From c6c956b80bdf151cf41d3e7e5c54755d930a212c Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 1 Jul 2014 14:36:04 +0200
Subject: KVM: s390/mm: support gmap page tables with less than 5 levels

Add an addressing limit to the gmap address spaces and only allocate
the page table levels that are needed for the given limit. The limit
is fixed and can not be changed after a gmap has been created.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h |  3 +-
 arch/s390/kvm/kvm-s390.c        |  4 +-
 arch/s390/mm/pgtable.c          | 85 ++++++++++++++++++++++++++---------------
 3 files changed, 59 insertions(+), 33 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 9bfdbca14f95..7705180e906d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -808,6 +808,7 @@ struct gmap {
 	spinlock_t guest_table_lock;
 	unsigned long *table;
 	unsigned long asce;
+	unsigned long asce_end;
 	void *private;
 	bool pfault_enabled;
 };
@@ -844,7 +845,7 @@ struct gmap_notifier {
 	void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
 };
 
-struct gmap *gmap_alloc(struct mm_struct *mm);
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
 void gmap_free(struct gmap *gmap);
 void gmap_enable(struct gmap *gmap);
 void gmap_disable(struct gmap *gmap);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 543c24baf1eb..82065dc7948d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -451,7 +451,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (type & KVM_VM_S390_UCONTROL) {
 		kvm->arch.gmap = NULL;
 	} else {
-		kvm->arch.gmap = gmap_alloc(current->mm);
+		kvm->arch.gmap = gmap_alloc(current->mm, -1UL);
 		if (!kvm->arch.gmap)
 			goto out_nogmap;
 		kvm->arch.gmap->private = kvm;
@@ -535,7 +535,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
 	kvm_clear_async_pf_completion_queue(vcpu);
 	if (kvm_is_ucontrol(vcpu->kvm)) {
-		vcpu->arch.gmap = gmap_alloc(current->mm);
+		vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
 		if (!vcpu->arch.gmap)
 			return -ENOMEM;
 		vcpu->arch.gmap->private = vcpu->kvm;
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 74dfd9eaa300..665714b08c0d 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -145,15 +145,34 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
 /**
  * gmap_alloc - allocate a guest address space
  * @mm: pointer to the parent mm_struct
+ * @limit: maximum size of the gmap address space
  *
  * Returns a guest address space structure.
  */
-struct gmap *gmap_alloc(struct mm_struct *mm)
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
 {
 	struct gmap *gmap;
 	struct page *page;
 	unsigned long *table;
-
+	unsigned long etype, atype;
+
+	if (limit < (1UL << 31)) {
+		limit = (1UL << 31) - 1;
+		atype = _ASCE_TYPE_SEGMENT;
+		etype = _SEGMENT_ENTRY_EMPTY;
+	} else if (limit < (1UL << 42)) {
+		limit = (1UL << 42) - 1;
+		atype = _ASCE_TYPE_REGION3;
+		etype = _REGION3_ENTRY_EMPTY;
+	} else if (limit < (1UL << 53)) {
+		limit = (1UL << 53) - 1;
+		atype = _ASCE_TYPE_REGION2;
+		etype = _REGION2_ENTRY_EMPTY;
+	} else {
+		limit = -1UL;
+		atype = _ASCE_TYPE_REGION1;
+		etype = _REGION1_ENTRY_EMPTY;
+	}
 	gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
 	if (!gmap)
 		goto out;
@@ -168,10 +187,11 @@ struct gmap *gmap_alloc(struct mm_struct *mm)
 	page->index = 0;
 	list_add(&page->lru, &gmap->crst_list);
 	table = (unsigned long *) page_to_phys(page);
-	crst_table_init(table, _REGION1_ENTRY_EMPTY);
+	crst_table_init(table, etype);
 	gmap->table = table;
-	gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
-		     _ASCE_USER_BITS | __pa(table);
+	gmap->asce = atype | _ASCE_TABLE_LENGTH |
+		_ASCE_USER_BITS | __pa(table);
+	gmap->asce_end = limit;
 	down_write(&mm->mmap_sem);
 	list_add(&gmap->list, &mm->context.gmap_list);
 	up_write(&mm->mmap_sem);
@@ -187,8 +207,7 @@ EXPORT_SYMBOL_GPL(gmap_alloc);
 static void gmap_flush_tlb(struct gmap *gmap)
 {
 	if (MACHINE_HAS_IDTE)
-		__tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
-				 _ASCE_TYPE_REGION1);
+		__tlb_flush_asce(gmap->mm, gmap->asce);
 	else
 		__tlb_flush_global();
 }
@@ -227,8 +246,7 @@ void gmap_free(struct gmap *gmap)
 
 	/* Flush tlb. */
 	if (MACHINE_HAS_IDTE)
-		__tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
-				 _ASCE_TYPE_REGION1);
+		__tlb_flush_asce(gmap->mm, gmap->asce);
 	else
 		__tlb_flush_global();
 
@@ -394,8 +412,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 
 	if ((from | to | len) & (PMD_SIZE - 1))
 		return -EINVAL;
-	if (len == 0 || from + len > TASK_MAX_SIZE ||
-	    from + len < from || to + len < to)
+	if (len == 0 || from + len < from || to + len < to ||
+	    from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
 		return -EINVAL;
 
 	flush = 0;
@@ -501,25 +519,32 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 	int rc;
 
 	/* Create higher level tables in the gmap page table */
-	table = gmap->table + ((gaddr >> 53) & 0x7ff);
-	if ((*table & _REGION_ENTRY_INVALID) &&
-	    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
-			     gaddr & 0xffe0000000000000))
-		return -ENOMEM;
-	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	table = table + ((gaddr >> 42) & 0x7ff);
-	if ((*table & _REGION_ENTRY_INVALID) &&
-	    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
-			     gaddr & 0xfffffc0000000000))
-		return -ENOMEM;
-	table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-	table = table + ((gaddr >> 31) & 0x7ff);
-	if ((*table & _REGION_ENTRY_INVALID) &&
-	    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
-			     gaddr & 0xffffffff80000000))
-		return -ENOMEM;
-	table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
-	table = table + ((gaddr >> 20) & 0x7ff);
+	table = gmap->table;
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+		table += (gaddr >> 53) & 0x7ff;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+				     gaddr & 0xffe0000000000000))
+			return -ENOMEM;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	}
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+		table += (gaddr >> 42) & 0x7ff;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+				     gaddr & 0xfffffc0000000000))
+			return -ENOMEM;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	}
+	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+		table += (gaddr >> 31) & 0x7ff;
+		if ((*table & _REGION_ENTRY_INVALID) &&
+		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+				     gaddr & 0xffffffff80000000))
+			return -ENOMEM;
+		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+	}
+	table += (gaddr >> 20) & 0x7ff;
 	/* Walk the parent mm page table */
 	mm = gmap->mm;
 	pgd = pgd_offset(mm, vmaddr);
-- 
cgit v1.2.3


From f079e9521464aa522d56af2a58a1666ca126bf6f Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 1 Aug 2014 15:03:33 +0200
Subject: KVM: s390/mm: remove outdated gmap data structures

The radix tree rework removed all code that uses the gmap_rmap
and gmap_pgtable data structures. Remove these outdated definitions.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/pgtable.h | 23 -----------------------
 1 file changed, 23 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 7705180e906d..0242588ded67 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -813,29 +813,6 @@ struct gmap {
 	bool pfault_enabled;
 };
 
-/**
- * struct gmap_rmap - reverse mapping for segment table entries
- * @gmap: pointer to the gmap_struct
- * @entry: pointer to a segment table entry
- * @vmaddr: virtual address in the guest address space
- */
-struct gmap_rmap {
-	struct list_head list;
-	struct gmap *gmap;
-	unsigned long *entry;
-	unsigned long vmaddr;
-};
-
-/**
- * struct gmap_pgtable - gmap information attached to a page table
- * @vmaddr: address of the 1MB segment in the process virtual memory
- * @mapper: list of segment table entries mapping a page table
- */
-struct gmap_pgtable {
-	unsigned long vmaddr;
-	struct list_head mapper;
-};
-
 /**
  * struct gmap_notifier - notify function block for page invalidation
  * @notifier_call: address of callback function
-- 
cgit v1.2.3


From 98047888bb9fd57734028c44ec17413ddd623958 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Aug 2014 12:18:04 +0200
Subject: arm/arm64: KVM: Support KVM_CAP_READONLY_MEM

When userspace loads code and data in a read-only memory regions, KVM
needs to be able to handle this on arm and arm64.  Specifically this is
used when running code directly from a read-only flash device; the
common scenario is a UEFI blob loaded with the -bios option in QEMU.

Note that the MMIO exit on writes to a read-only memory is ABI and can
be used to emulate block-erase style flash devices.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/uapi/asm/kvm.h   |  1 +
 arch/arm/kvm/arm.c                |  1 +
 arch/arm/kvm/mmu.c                | 22 ++++++++--------------
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 4 files changed, 11 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index e6ebdd3471e5..51257fda254b 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@
 
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9f788ebac55b..ac306b4dbe1d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -188,6 +188,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_ARM_PSCI:
 	case KVM_CAP_ARM_PSCI_0_2:
+	case KVM_CAP_READONLY_MEM:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 16e7994bf347..62f5642153f9 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -747,14 +747,13 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-			  struct kvm_memory_slot *memslot,
+			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
 {
 	int ret;
 	bool write_fault, writable, hugetlb = false, force_pte = false;
 	unsigned long mmu_seq;
 	gfn_t gfn = fault_ipa >> PAGE_SHIFT;
-	unsigned long hva = gfn_to_hva(vcpu->kvm, gfn);
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
 	struct vm_area_struct *vma;
@@ -863,7 +862,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	unsigned long fault_status;
 	phys_addr_t fault_ipa;
 	struct kvm_memory_slot *memslot;
-	bool is_iabt;
+	unsigned long hva;
+	bool is_iabt, write_fault, writable;
 	gfn_t gfn;
 	int ret, idx;
 
@@ -884,7 +884,10 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 	gfn = fault_ipa >> PAGE_SHIFT;
-	if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+	memslot = gfn_to_memslot(vcpu->kvm, gfn);
+	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
+	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
 		if (is_iabt) {
 			/* Prefetch Abort on I/O address */
 			kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
@@ -892,13 +895,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			goto out_unlock;
 		}
 
-		if (fault_status != FSC_FAULT) {
-			kvm_err("Unsupported fault status on io memory: %#lx\n",
-				fault_status);
-			ret = -EFAULT;
-			goto out_unlock;
-		}
-
 		/*
 		 * The IPA is reported as [MAX:12], so we need to
 		 * complement it with the bottom 12 bits from the
@@ -910,9 +906,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		goto out_unlock;
 	}
 
-	memslot = gfn_to_memslot(vcpu->kvm, gfn);
-
-	ret = user_mem_abort(vcpu, fault_ipa, memslot, fault_status);
+	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
 	if (ret == 0)
 		ret = 1;
 out_unlock:
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index e633ff8cdec8..f4ec5a674d05 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -37,6 +37,7 @@
 
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-- 
cgit v1.2.3


From 6951e48bff0b55d2a8e825a953fc1f8e3a34bf1c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:20 +0100
Subject: KVM: ARM/arm64: fix non-const declaration of function returning const

Sparse kicks up about a type mismatch for kvm_target_cpu:

arch/arm64/kvm/guest.c:271:25: error: symbol 'kvm_target_cpu' redeclared with different type (originally declared at ./arch/arm64/include/asm/kvm_host.h:45) - different modifiers

so fix this by adding the missing const attribute to the function
declaration.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   | 2 +-
 arch/arm64/include/asm/kvm_host.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6dfb404f6c46..fcb12a6f7db5 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -42,7 +42,7 @@
 
 struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_reset_coprocs(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e10c45a578e3..44094e559848 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -42,7 +42,7 @@
 #define KVM_VCPU_MAX_FEATURES 3
 
 struct kvm_vcpu;
-int kvm_target_cpu(void);
+int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(long ext);
 
-- 
cgit v1.2.3


From 4000be423cb01a8d09de878bb8184511c49d4238 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:21 +0100
Subject: KVM: ARM/arm64: fix broken __percpu annotation

Running sparse results in a bunch of noisy address space mismatches
thanks to the broken __percpu annotation on kvm_get_running_vcpus.

This function returns a pcpu pointer to a pointer, not a pointer to a
pcpu pointer. This patch fixes the annotation, which kills the warnings
from sparse.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c                | 2 +-
 arch/arm64/include/asm/kvm_host.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ac306b4dbe1d..53ee31b23961 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -82,7 +82,7 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void)
 /**
  * kvm_arm_get_running_vcpus - get the per-CPU array of currently running vcpus.
  */
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void)
 {
 	return &kvm_arm_running_vcpu;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 44094e559848..50431d36732b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -193,7 +193,7 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
 }
 
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
-struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
+struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
 
-- 
cgit v1.2.3


From 18d457661fb9fa69352822ab98d39331c3d0e571 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:22 +0100
Subject: KVM: ARM/arm64: avoid returning negative error code as bool

is_valid_cache returns true if the specified cache is valid.
Unfortunately, if the parameter passed it out of range, we return
-ENOENT, which ends up as true leading to potential hilarity.

This patch returns false on the failure path instead.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/coproc.c     | 2 +-
 arch/arm64/kvm/sys_regs.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 37a0fe1bb9bb..7928dbdf2102 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -791,7 +791,7 @@ static bool is_valid_cache(u32 val)
 	u32 level, ctype;
 
 	if (val >= CSSELR_MAX)
-		return -ENOENT;
+		return false;
 
 	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
         level = (val >> 1);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5805e7c4a4dd..4cc3b719208e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1218,7 +1218,7 @@ static bool is_valid_cache(u32 val)
 	u32 level, ctype;
 
 	if (val >= CSSELR_MAX)
-		return -ENOENT;
+		return false;
 
 	/* Bottom bit is Instruction or Data bit.  Next 3 bits are level. */
 	level = (val >> 1);
-- 
cgit v1.2.3


From bd218bce92d3868ba4fe5e9e3eb8199d2aa614af Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 26 Aug 2014 15:13:23 +0100
Subject: KVM: ARM/arm64: return -EFAULT if copy_from_user fails in
 set_timer_reg

We currently return the number of bytes not copied if set_timer_reg
fails, which is almost certainly not what userspace would like.

This patch returns -EFAULT instead.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/guest.c   | 2 +-
 arch/arm64/kvm/guest.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 813e49258690..cc0b78769bd8 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -163,7 +163,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 
 	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
 	if (ret != 0)
-		return ret;
+		return -EFAULT;
 
 	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
 }
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 8d1ec2887a26..76794692c20b 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -174,7 +174,7 @@ static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 
 	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
 	if (ret != 0)
-		return ret;
+		return -EFAULT;
 
 	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
 }
-- 
cgit v1.2.3


From dc77d344b41f3ffdd3b02317597e717b0b799f46 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 27 Aug 2014 12:20:02 +0200
Subject: KVM: s390/mm: fix up indentation of set_guest_storage_key

commit ab3f285f227f ("KVM: s390/mm: try a cow on read only pages for
key ops")' misaligned a code block. Let's fixup the indentation.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/s390/mm/pgtable.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 665714b08c0d..296b61a4af59 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -943,13 +943,13 @@ retry:
 	}
 	if (!(pte_val(*ptep) & _PAGE_INVALID) &&
 	     (pte_val(*ptep) & _PAGE_PROTECT)) {
-			pte_unmap_unlock(*ptep, ptl);
-			if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
-				up_read(&mm->mmap_sem);
-				return -EFAULT;
-			}
-			goto retry;
+		pte_unmap_unlock(*ptep, ptl);
+		if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
+			up_read(&mm->mmap_sem);
+			return -EFAULT;
 		}
+		goto retry;
+	}
 
 	new = old = pgste_get_lock(ptep);
 	pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
-- 
cgit v1.2.3


From a2bcba5035bb3d7fb3099e1893026316365f4b5d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Thu, 21 Aug 2014 19:46:49 +0800
Subject: KVM: nVMX: introduce nested_get_vmcs12_pages

Introduce function nested_get_vmcs12_pages() to check the valid
of nested apic access page and virtual apic page earlier.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 661abc2f7049..70516e11f051 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7927,6 +7927,30 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 		kvm_inject_page_fault(vcpu, fault);
 }
 
+static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
+					struct vmcs12 *vmcs12)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+		if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
+			/*TODO: Also verify bits beyond physical address width are 0*/
+			return false;
+
+		/*
+		 * Translate L1 physical address to host physical
+		 * address for vmcs02. Keep the page pinned, so this
+		 * physical address remains valid. We keep a reference
+		 * to it so we can release it later.
+		 */
+		if (vmx->nested.apic_access_page) /* shouldn't happen */
+			nested_release_page(vmx->nested.apic_access_page);
+		vmx->nested.apic_access_page =
+			nested_get_page(vcpu, vmcs12->apic_access_addr);
+	}
+	return true;
+}
+
 static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
 {
 	u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
@@ -8072,16 +8096,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 			exec_control |= vmcs12->secondary_vm_exec_control;
 
 		if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) {
-			/*
-			 * Translate L1 physical address to host physical
-			 * address for vmcs02. Keep the page pinned, so this
-			 * physical address remains valid. We keep a reference
-			 * to it so we can release it later.
-			 */
-			if (vmx->nested.apic_access_page) /* shouldn't happen */
-				nested_release_page(vmx->nested.apic_access_page);
-			vmx->nested.apic_access_page =
-				nested_get_page(vcpu, vmcs12->apic_access_addr);
 			/*
 			 * If translation failed, no matter: This feature asks
 			 * to exit when accessing the given address, and if it
@@ -8288,8 +8302,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		return 1;
 	}
 
-	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
-			!PAGE_ALIGNED(vmcs12->apic_access_addr)) {
+	if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
 		/*TODO: Also verify bits beyond physical address width are 0*/
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
-- 
cgit v1.2.3


From a7c0b07d570848e50fce4d31ac01313484d6b844 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Thu, 21 Aug 2014 19:46:50 +0800
Subject: KVM: nVMX: nested TPR shadow/threshold emulation

This patch fix bug https://bugzilla.kernel.org/show_bug.cgi?id=61411

TPR shadow/threshold feature is important to speed up the Windows guest.
Besides, it is a must feature for certain VMM.

We map virtual APIC page address and TPR threshold from L1 VMCS. If
TPR_BELOW_THRESHOLD VM exit is triggered by L2 guest and L1 interested
in, we inject it into L1 VMM for handling.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
[Add PAGE_ALIGNED check, do not write useless virtual APIC page address
 if TPR shadowing is disabled. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 70516e11f051..73ba2a265f85 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -397,6 +397,7 @@ struct nested_vmx {
 	 * we must keep them pinned while L2 runs.
 	 */
 	struct page *apic_access_page;
+	struct page *virtual_apic_page;
 	u64 msr_ia32_feature_control;
 
 	struct hrtimer preemption_timer;
@@ -555,6 +556,7 @@ static int max_shadow_read_only_fields =
 	ARRAY_SIZE(shadow_read_only_fields);
 
 static unsigned long shadow_read_write_fields[] = {
+	TPR_THRESHOLD,
 	GUEST_RIP,
 	GUEST_RSP,
 	GUEST_CR0,
@@ -2352,7 +2354,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
 		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
 		CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
-		CPU_BASED_PAUSE_EXITING |
+		CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
 		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	/*
 	 * We can allow some features even when not supported by the
@@ -6246,6 +6248,10 @@ static void free_nested(struct vcpu_vmx *vmx)
 		nested_release_page(vmx->nested.apic_access_page);
 		vmx->nested.apic_access_page = 0;
 	}
+	if (vmx->nested.virtual_apic_page) {
+		nested_release_page(vmx->nested.virtual_apic_page);
+		vmx->nested.virtual_apic_page = 0;
+	}
 
 	nested_free_all_saved_vmcss(vmx);
 }
@@ -7034,7 +7040,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_MCE_DURING_VMENTRY:
 		return 0;
 	case EXIT_REASON_TPR_BELOW_THRESHOLD:
-		return 1;
+		return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
 	case EXIT_REASON_APIC_ACCESS:
 		return nested_cpu_has2(vmcs12,
 			SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
@@ -7155,6 +7161,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+	if (is_guest_mode(vcpu) &&
+		nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
+		return;
+
 	if (irr == -1 || tpr < irr) {
 		vmcs_write32(TPR_THRESHOLD, 0);
 		return;
@@ -7933,8 +7945,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+		/* TODO: Also verify bits beyond physical address width are 0 */
 		if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
-			/*TODO: Also verify bits beyond physical address width are 0*/
 			return false;
 
 		/*
@@ -7948,6 +7960,31 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
 		vmx->nested.apic_access_page =
 			nested_get_page(vcpu, vmcs12->apic_access_addr);
 	}
+
+	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
+		/* TODO: Also verify bits beyond physical address width are 0 */
+		if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr))
+			return false;
+
+		if (vmx->nested.virtual_apic_page) /* shouldn't happen */
+			nested_release_page(vmx->nested.virtual_apic_page);
+		vmx->nested.virtual_apic_page =
+			nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
+
+		/*
+		 * Failing the vm entry is _not_ what the processor does
+		 * but it's basically the only possibility we have.
+		 * We could still enter the guest if CR8 load exits are
+		 * enabled, CR8 store exits are enabled, and virtualize APIC
+		 * access is disabled; in this case the processor would never
+		 * use the TPR shadow and we could simply clear the bit from
+		 * the execution control.  But such a configuration is useless,
+		 * so let's keep the code simple.
+		 */
+		if (!vmx->nested.virtual_apic_page)
+			return false;
+	}
+
 	return true;
 }
 
@@ -8141,6 +8178,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
 	exec_control &= ~CPU_BASED_TPR_SHADOW;
 	exec_control |= vmcs12->cpu_based_vm_exec_control;
+
+	if (exec_control & CPU_BASED_TPR_SHADOW) {
+		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
+				page_to_phys(vmx->nested.virtual_apic_page));
+		vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+	}
+
 	/*
 	 * Merging of IO and MSR bitmaps not currently supported.
 	 * Rather, exit every time.
@@ -8908,6 +8952,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 		nested_release_page(vmx->nested.apic_access_page);
 		vmx->nested.apic_access_page = 0;
 	}
+	if (vmx->nested.virtual_apic_page) {
+		nested_release_page(vmx->nested.virtual_apic_page);
+		vmx->nested.virtual_apic_page = 0;
+	}
 
 	/*
 	 * Exiting from L2 to L1, we're now back to L1 which thinks it just
-- 
cgit v1.2.3


From 48d89b92609a66bc41f479c560640bc413add3b4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 26 Aug 2014 13:27:46 +0200
Subject: KVM: x86: fix some sparse warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sparse reports the following easily fixed warnings:

   arch/x86/kvm/vmx.c:8795:48: sparse: Using plain integer as NULL pointer
   arch/x86/kvm/vmx.c:2138:5: sparse: symbol vmx_read_l1_tsc was not declared. Should it be static?
   arch/x86/kvm/vmx.c:6151:48: sparse: Using plain integer as NULL pointer
   arch/x86/kvm/vmx.c:8851:6: sparse: symbol vmx_sched_in was not declared. Should it be static?

   arch/x86/kvm/svm.c:2162:5: sparse: symbol svm_read_l1_tsc was not declared. Should it be static?

Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c |  2 +-
 arch/x86/kvm/vmx.c | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1703aab84a6d..7cd230e55118 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3031,7 +3031,7 @@ static int cr8_write_interception(struct vcpu_svm *svm)
 	return 0;
 }
 
-u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
+static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 {
 	struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
 	return vmcb->control.tsc_offset +
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 73ba2a265f85..d70550d0bcff 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2159,7 +2159,7 @@ static u64 guest_read_tsc(void)
  * Like guest_read_tsc, but always returns L1's notion of the timestamp
  * counter, even if a nested guest (L2) is currently running.
  */
-u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
+static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 {
 	u64 tsc_offset;
 
@@ -6246,11 +6246,11 @@ static void free_nested(struct vcpu_vmx *vmx)
 	/* Unpin physical memory we referred to in current vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		nested_release_page(vmx->nested.apic_access_page);
-		vmx->nested.apic_access_page = 0;
+		vmx->nested.apic_access_page = NULL;
 	}
 	if (vmx->nested.virtual_apic_page) {
 		nested_release_page(vmx->nested.virtual_apic_page);
-		vmx->nested.virtual_apic_page = 0;
+		vmx->nested.virtual_apic_page = NULL;
 	}
 
 	nested_free_all_saved_vmcss(vmx);
@@ -8950,11 +8950,11 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	/* Unpin physical memory we referred to in vmcs02 */
 	if (vmx->nested.apic_access_page) {
 		nested_release_page(vmx->nested.apic_access_page);
-		vmx->nested.apic_access_page = 0;
+		vmx->nested.apic_access_page = NULL;
 	}
 	if (vmx->nested.virtual_apic_page) {
 		nested_release_page(vmx->nested.virtual_apic_page);
-		vmx->nested.virtual_apic_page = 0;
+		vmx->nested.virtual_apic_page = NULL;
 	}
 
 	/*
@@ -9010,7 +9010,7 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 	return X86EMUL_CONTINUE;
 }
 
-void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
+static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
 	if (ple_gap)
 		shrink_ple_window(vcpu);
-- 
cgit v1.2.3


From 0f54a321302dfbdbd707ba989b2f468e58b9a363 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Fri, 29 Aug 2014 11:26:55 +0300
Subject: KVM: vmx: VMXOFF emulation in vm86 should cause #UD

Unlike VMCALL, the instructions VMXOFF, VMLAUNCH and VMRESUME should cause a UD
exception in real-mode or vm86.  However, the emulator considers all these
instructions the same for the matter of mode checks, and emulation upon exit
due to #UD exception.

As a result, the hypervisor behaves incorrectly on vm86 mode. VMXOFF, VMLAUNCH
or VMRESUME cause on vm86 exit due to #UD. The hypervisor then emulates these
instruction and inject #GP to the guest instead of #UD.

This patch creates a new group for these instructions and mark only VMCALL as
an instruction which can be emulated.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e5bf13003cd2..a240fac29e76 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3139,12 +3139,8 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
 
 static int em_vmcall(struct x86_emulate_ctxt *ctxt)
 {
-	int rc;
-
-	if (ctxt->modrm_mod != 3 || ctxt->modrm_rm != 1)
-		return X86EMUL_UNHANDLEABLE;
+	int rc = ctxt->ops->fix_hypercall(ctxt);
 
-	rc = ctxt->ops->fix_hypercall(ctxt);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -3562,6 +3558,12 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 		F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
 		F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
 
+static const struct opcode group7_rm0[] = {
+	N,
+	I(SrcNone | Priv | EmulateOnUD,	em_vmcall),
+	N, N, N, N, N, N,
+};
+
 static const struct opcode group7_rm1[] = {
 	DI(SrcNone | Priv, monitor),
 	DI(SrcNone | Priv, mwait),
@@ -3655,7 +3657,7 @@ static const struct group_dual group7 = { {
 	II(SrcMem16 | Mov | Priv,		em_lmsw, lmsw),
 	II(SrcMem | ByteOp | Priv | NoAccess,	em_invlpg, invlpg),
 }, {
-	I(SrcNone | Priv | EmulateOnUD,	em_vmcall),
+	EXT(0, group7_rm0),
 	EXT(0, group7_rm1),
 	N, EXT(0, group7_rm3),
 	II(SrcNone | DstMem | Mov,		em_smsw, smsw), N,
-- 
cgit v1.2.3


From 0a37027e83f867793af0ccb9176a6b383dd0b7c8 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 11 Jul 2014 11:56:31 -0600
Subject: KVM: x86 emulator: emulate MOVNTDQ

Windows 8.1 guest with NVIDIA driver and GPU fails to boot with an
emulation failure.  The KVM spew suggests the fault is with lack of
movntdq emulation (courtesy of Paolo):

Code=02 00 00 b8 08 00 00 00 f3 0f 6f 44 0a f0 f3 0f 6f 4c 0a e0 <66> 0f e7 41 f0 66 0f e7 49 e0 48 83 e9 40 f3 0f 6f 44 0a 10 f3 0f 6f 0c 0a 66 0f e7 41 10

$ as -o a.out
        .section .text
        .byte 0x66, 0x0f, 0xe7, 0x41, 0xf0
        .byte 0x66, 0x0f, 0xe7, 0x49, 0xe0
$ objdump -d a.out
    0:  66 0f e7 41 f0          movntdq %xmm0,-0x10(%rcx)
    5:  66 0f e7 49 e0          movntdq %xmm1,-0x20(%rcx)

Add the necessary emulation.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a240fac29e76..0892622f9258 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3696,6 +3696,10 @@ static const struct gprefix pfx_0f_28_0f_29 = {
 	I(Aligned, em_mov), I(Aligned, em_mov), N, N,
 };
 
+static const struct gprefix pfx_0f_e7 = {
+	N, I(Sse, em_mov), N, N,
+};
+
 static const struct escape escape_d9 = { {
 	N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
 }, {
@@ -3966,7 +3970,8 @@ static const struct opcode twobyte_table[256] = {
 	/* 0xD0 - 0xDF */
 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
 	/* 0xE0 - 0xEF */
-	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
+	N, N, N, N, N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_e7),
+	N, N, N, N, N, N, N, N,
 	/* 0xF0 - 0xFF */
 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
 };
-- 
cgit v1.2.3


From d5b77069701600b8189d3b4409b69f23ac4f5bc2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 14 Jul 2014 12:54:48 +0200
Subject: KVM: x86: remove Aligned bit from movntps/movntpd

These are not explicitly aligned, and do not require alignment on AVX.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0892622f9258..20d91873d831 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3688,8 +3688,8 @@ static const struct gprefix pfx_0f_6f_0f_7f = {
 	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
 };
 
-static const struct gprefix pfx_vmovntpx = {
-	I(0, em_mov), N, N, N,
+static const struct gprefix pfx_0f_2b = {
+	I(0, em_mov), I(0, em_mov), N, N,
 };
 
 static const struct gprefix pfx_0f_28_0f_29 = {
@@ -3906,7 +3906,7 @@ static const struct opcode twobyte_table[256] = {
 	N, N, N, N,
 	GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
 	GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
-	N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
+	N, GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_2b),
 	N, N, N, N,
 	/* 0x30 - 0x3F */
 	II(ImplicitOps | Priv, em_wrmsr, wrmsr),
-- 
cgit v1.2.3


From 656473003bc7e056c3bbd4a4d9832dad01e86f76 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 29 Aug 2014 14:01:17 +0200
Subject: KVM: forward declare structs in kvm_types.h

Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid
"'struct foo' declared inside parameter list" warnings (and consequent
breakage due to conflicting types).

Move them from individual files to a generic place in linux/kvm_types.h.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     | 7 ++-----
 arch/arm64/include/asm/kvm_host.h   | 6 ++----
 arch/ia64/include/asm/kvm_host.h    | 3 ---
 arch/mips/include/asm/kvm_host.h    | 5 -----
 arch/powerpc/include/asm/kvm_host.h | 5 -----
 arch/s390/include/asm/kvm_host.h    | 5 +++--
 arch/x86/include/asm/kvm_host.h     | 4 ----
 7 files changed, 7 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6dfb404f6c46..4843397b812c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
 #ifndef __ARM_KVM_HOST_H__
 #define __ARM_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -40,7 +42,6 @@
 
 #include <kvm/arm_vgic.h>
 
-struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -149,20 +150,17 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 u64 kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
@@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
 int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
 unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-struct kvm_one_reg;
 int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e10c45a578e3..766147baae0b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
 #ifndef __ARM64_KVM_HOST_H__
 #define __ARM64_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -41,7 +43,6 @@
 
 #define KVM_VCPU_MAX_FEATURES 3
 
-struct kvm_vcpu;
 int kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(long ext);
@@ -164,18 +165,15 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index db95f570705f..fccc09d04649 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -234,9 +234,6 @@ struct kvm_vm_data {
 #define KVM_REQ_PTC_G		32
 #define KVM_REQ_RESUME		33
 
-struct kvm;
-struct kvm_vcpu;
-
 struct kvm_mmio_req {
 	uint64_t addr;          /*  physical address		*/
 	uint64_t size;          /*  size in bytes		*/
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 7a3fc67bd7f9..b93bc80ed7e7 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -96,11 +96,6 @@
 #define CAUSEB_DC			27
 #define CAUSEF_DC			(_ULCAST_(1) << 27)
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-struct kvm_interrupt;
-
 extern atomic_t kvm_mips_instance;
 extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn);
 extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 98d9dd50d063..0e597283c5c6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,7 +53,6 @@
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
-struct kvm;
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
@@ -76,10 +75,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 /* Physical Address Mask - allowed range of real mode RAM access */
 #define KVM_PAM			0x0fffffffffffffffULL
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-
 struct lppaca;
 struct slb_shadow;
 struct dtl_entry;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 773bef7614d8..d71291d3fb6f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
 
 #ifndef ASM_KVM_HOST_H
 #define ASM_KVM_HOST_H
+
+#include <linux/types.h>
 #include <linux/hrtimer.h>
 #include <linux/interrupt.h>
+#include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 #include <asm/debug.h>
@@ -431,8 +434,6 @@ static inline bool kvm_is_error_hva(unsigned long addr)
 }
 
 #define ASYNC_PF_PER_VCPU	64
-struct kvm_vcpu;
-struct kvm_async_pf;
 struct kvm_arch_async_pf {
 	unsigned long pfault_token;
 };
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ac0f90e26a0b..567fface45f8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 
 #define ASYNC_PF_PER_VCPU 64
 
-struct kvm_vcpu;
-struct kvm;
-struct kvm_async_pf;
-
 enum kvm_reg {
 	VCPU_REGS_RAX = 0,
 	VCPU_REGS_RCX = 1,
-- 
cgit v1.2.3


From 0865e636aef751966e6e0f8950a26bc7391e923c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 28 Aug 2014 15:13:02 +0200
Subject: KVM: static inline empty kvm_arch functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using static inline is going to save few bytes and cycles.
For example on powerpc, the difference is 700 B after stripping.
(5 kB before)

This patch also deals with two overlooked empty functions:
kvm_arch_flush_shadow was not removed from arch/mips/kvm/mips.c
  2df72e9bc KVM: split kvm_arch_flush_shadow
and kvm_arch_sched_in never made it into arch/ia64/kvm/kvm-ia64.c.
  e790d9ef6 KVM: add kvm_arch_sched_in

Signed-off-by: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  6 ++++++
 arch/arm/kvm/arm.c                  | 19 ----------------
 arch/arm64/include/asm/kvm_host.h   |  6 ++++++
 arch/ia64/include/asm/kvm_host.h    | 12 +++++++++++
 arch/ia64/kvm/kvm-ia64.c            | 30 --------------------------
 arch/mips/include/asm/kvm_host.h    | 11 ++++++++++
 arch/mips/kvm/mips.c                | 42 ------------------------------------
 arch/powerpc/include/asm/kvm_host.h |  8 +++++++
 arch/powerpc/kvm/powerpc.c          | 29 -------------------------
 arch/s390/include/asm/kvm_host.h    | 14 ++++++++++++
 arch/s390/kvm/kvm-s390.c            | 43 -------------------------------------
 11 files changed, 57 insertions(+), 163 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 4843397b812c..aea259e9431f 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,4 +230,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9f788ebac55b..132bb0d9c5ad 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -97,27 +97,16 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 void kvm_arch_check_processor_compat(void *rtn)
 {
 	*(int *)rtn = 0;
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
 
 /**
  * kvm_arch_init_vm - initializes a VM data structure
@@ -284,14 +273,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 766147baae0b..b5045e3e05f8 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -242,4 +242,10 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 	}
 }
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index fccc09d04649..4729752b7256 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -592,6 +592,18 @@ void kvm_sal_emul(struct kvm_vcpu *vcpu);
 struct kvm *kvm_arch_alloc_vm(void);
 void kvm_arch_free_vm(struct kvm *kvm);
 
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		const struct kvm_memory_slot *old,
+		enum kvm_mr_change change) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+
 #endif /* __ASSEMBLY__*/
 
 #endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0729ba6acddf..5e14dcaf844e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1364,10 +1364,6 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 	}
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_iommu_unmap_guest(kvm);
@@ -1376,10 +1372,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_release_vm_pages(kvm);
 }
 
-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	if (cpu != vcpu->cpu) {
@@ -1468,7 +1460,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kfree(vcpu->arch.apic);
 }
 
-
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -1551,21 +1542,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
 	return 0;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		struct kvm_memory_slot *memslot,
 		struct kvm_userspace_memory_region *mem,
@@ -1597,14 +1579,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	return 0;
 }
 
-void kvm_arch_commit_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
-		const struct kvm_memory_slot *old,
-		enum kvm_mr_change change)
-{
-	return;
-}
-
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
 	kvm_flush_remote_tlbs(kvm);
@@ -1853,10 +1827,6 @@ int kvm_arch_hardware_setup(void)
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 {
 	return __apic_accept_irq(vcpu, irq->vector);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index b93bc80ed7e7..0b24d6622ec1 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -762,5 +762,16 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
 extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 2362df2a79f9..0ec7490d70bd 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -82,19 +82,11 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 void kvm_arch_check_processor_compat(void *rtn)
 {
 	*(int *)rtn = 0;
@@ -163,10 +155,6 @@ void kvm_mips_free_vcpus(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 static void kvm_mips_uninit_tlbs(void *arg)
 {
 	/* Restore wired count */
@@ -194,21 +182,12 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl,
 	return -ENOIOCTLCMD;
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
 	return 0;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_userspace_memory_region *mem,
@@ -254,19 +233,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	}
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-}
-
-void kvm_arch_flush_shadow(struct kvm *kvm)
-{
-}
-
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
 	int err, size, offset;
@@ -998,14 +964,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 				  struct kvm_translation *tr)
 {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0e597283c5c6..237cc0cc80a2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -682,4 +682,12 @@ struct kvm_vcpu_arch {
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
 
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_unsetup(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_exit(void) {}
+
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index cbc432f4f0a6..72c3fc085207 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -389,19 +389,11 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 int kvm_arch_hardware_setup(void)
 {
 	return 0;
 }
 
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
 void kvm_arch_check_processor_compat(void *rtn)
 {
 	*(int *)rtn = kvmppc_core_check_processor_compat();
@@ -462,10 +454,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	module_put(kvm->arch.kvm_ops->owner);
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
@@ -608,10 +596,6 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 	return kvmppc_core_create_memslot(kvm, slot, npages);
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
 				   struct kvm_userspace_memory_region *mem,
@@ -628,10 +612,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	kvmppc_core_commit_memory_region(kvm, mem, old);
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot)
 {
@@ -720,10 +700,6 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_BOOKE
@@ -1347,9 +1323,4 @@ int kvm_arch_init(void *opaque)
 	return 0;
 }
 
-void kvm_arch_exit(void)
-{
-
-}
-
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d71291d3fb6f..f6dd90684b97 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -451,4 +451,18 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
+
+static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_check_processor_compat(void *rtn) {}
+static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+		struct kvm_memory_slot *slot) {}
+
 #endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 36209969bf98..b8fe1ae777db 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -106,10 +106,6 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 
 int kvm_arch_hardware_setup(void)
@@ -124,19 +120,11 @@ void kvm_arch_hardware_unsetup(void)
 	gmap_unregister_ipte_notifier(&gmap_notifier);
 }
 
-void kvm_arch_check_processor_compat(void *rtn)
-{
-}
-
 int kvm_arch_init(void *opaque)
 {
 	return 0;
 }
 
-void kvm_arch_exit(void)
-{
-}
-
 /* Section: device related */
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg)
@@ -514,10 +502,6 @@ static void kvm_free_vcpus(struct kvm *kvm)
 	mutex_unlock(&kvm->lock);
 }
 
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_free_vcpus(kvm);
@@ -552,15 +536,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-	/* Nothing todo */
-}
-
-void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
-{
-}
-
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
@@ -1708,21 +1683,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
-			   struct kvm_memory_slot *dont)
-{
-}
-
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
 	return 0;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
@@ -1768,15 +1734,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	return;
 }
 
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-}
-
 static int __init kvm_s390_init(void)
 {
 	int ret;
-- 
cgit v1.2.3


From 13a34e067eab24fec882e1834fbf2cc31911d474 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 28 Aug 2014 15:13:03 +0200
Subject: KVM: remove garbage arg to *hardware_{en,dis}able
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the beggining was on_each_cpu(), which required an unused argument to
kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten.

Remove unnecessary arguments that stem from this.

Signed-off-by: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  2 +-
 arch/arm/kvm/arm.c                  |  2 +-
 arch/arm64/include/asm/kvm_host.h   |  2 +-
 arch/ia64/kvm/kvm-ia64.c            |  4 ++--
 arch/mips/include/asm/kvm_host.h    |  2 +-
 arch/mips/kvm/mips.c                |  2 +-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/kvm/powerpc.c          |  2 +-
 arch/s390/include/asm/kvm_host.h    |  2 +-
 arch/s390/kvm/kvm-s390.c            |  2 +-
 arch/x86/include/asm/kvm_host.h     |  4 ++--
 arch/x86/kvm/svm.c                  |  4 ++--
 arch/x86/kvm/vmx.c                  |  4 ++--
 arch/x86/kvm/x86.c                  | 12 ++++++------
 14 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index aea259e9431f..032a8538318a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 132bb0d9c5ad..005a7b5fd0aa 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -87,7 +87,7 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index b5045e3e05f8..be9970a59497 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 	}
 }
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5e14dcaf844e..ec6b9acb6bea 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	long  status;
 	long  tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
 
 	long status;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 0b24d6622ec1..f2c249796ea8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -762,7 +762,7 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
 extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 0ec7490d70bd..e3b21e51ff7e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -77,7 +77,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 237cc0cc80a2..604000882352 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -682,7 +682,7 @@ struct kvm_vcpu_arch {
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 72c3fc085207..da505237a664 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -384,7 +384,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 }
 EXPORT_SYMBOL_GPL(kvmppc_ld);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index f6dd90684b97..a76a124dff48 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -452,7 +452,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_check_processor_compat(void *rtn) {}
 static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b8fe1ae777db..628e992eeded 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -100,7 +100,7 @@ int test_vfacility(unsigned long nr)
 }
 
 /* Section: not file related */
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	/* every s390 is virtualization enabled ;-) */
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 567fface45f8..73e4149eda33 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -661,8 +661,8 @@ struct msr_data {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	int (*hardware_enable)(void *dummy);
-	void (*hardware_disable)(void *dummy);
+	int (*hardware_enable)(void);
+	void (*hardware_disable)(void);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7cd230e55118..8ef704037370 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -622,7 +622,7 @@ static int has_svm(void)
 	return 1;
 }
 
-static void svm_hardware_disable(void *garbage)
+static void svm_hardware_disable(void)
 {
 	/* Make sure we clean up behind us */
 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage)
 	amd_pmu_disable_virt();
 }
 
-static int svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void)
 {
 
 	struct svm_cpu_data *sd;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d70550d0bcff..671ca5edc709 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2728,7 +2728,7 @@ static void kvm_cpu_vmxon(u64 addr)
 			: "memory", "cc");
 }
 
-static int hardware_enable(void *garbage)
+static int hardware_enable(void)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2792,7 +2792,7 @@ static void kvm_cpu_vmxoff(void)
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 }
 
-static void hardware_disable(void *garbage)
+static void hardware_disable(void)
 {
 	if (vmm_exclusive) {
 		vmclear_local_loaded_vmcss();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c10408ef9ab1..a375dfc42f6a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 }
 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
 
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
 {
 	unsigned int cpu = smp_processor_id();
 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -6959,7 +6959,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
 	kvm_rip_write(vcpu, 0);
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
@@ -6970,7 +6970,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	bool stable, backwards_tsc = false;
 
 	kvm_shared_msr_cpu_online();
-	ret = kvm_x86_ops->hardware_enable(garbage);
+	ret = kvm_x86_ops->hardware_enable();
 	if (ret != 0)
 		return ret;
 
@@ -7051,10 +7051,10 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
-	kvm_x86_ops->hardware_disable(garbage);
-	drop_user_return_notifiers(garbage);
+	kvm_x86_ops->hardware_disable();
+	drop_user_return_notifiers();
 }
 
 int kvm_arch_hardware_setup(void)
-- 
cgit v1.2.3


From fd2752352bbc98850d83b5448a288d8991590317 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 29 Aug 2014 18:56:01 +0200
Subject: KVM: x86: use guest maxphyaddr to check MTRR values

The check introduced in commit d7a2a246a1b5 (KVM: x86: #GP when attempts to write reserved bits of Variable Range MTRRs, 2014-08-19)
will break if the guest maxphyaddr is higher than the host's (which
sometimes happens depending on your hardware and how QEMU is
configured).

To fix this, use cpuid_maxphyaddr similar to how the APIC_BASE MSR
does already.

Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
Tested-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a375dfc42f6a..916e89515210 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1726,7 +1726,7 @@ static bool valid_mtrr_type(unsigned t)
 static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	int i;
-	u64 mask = 0;
+	u64 mask;
 
 	if (!msr_mtrr_valid(msr))
 		return false;
@@ -1750,8 +1750,7 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	/* variable MTRRs */
 	WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
 
-	for (i = 63; i > boot_cpu_data.x86_phys_bits; i--)
-		mask |= (1ULL << i);
+	mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
 	if ((msr & 1) == 0) {
 		/* MTRR base */
 		if (!valid_mtrr_type(data & 0xff))
-- 
cgit v1.2.3


From 00f034a12fdd81210d58116326d92780aac5c238 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 20 Aug 2014 14:29:21 +0200
Subject: KVM: do not bias the generation number in kvm_current_mmio_generation

The next patch will give a meaning (a la seqcount) to the low bit of the
generation number.  Ensure that it matches between kvm->memslots->generation
and kvm_current_mmio_generation().

Cc: stable@vger.kernel.org
Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 931467881da7..323c3f5f5c84 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -236,12 +236,7 @@ static unsigned int get_mmio_spte_generation(u64 spte)
 
 static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
 {
-	/*
-	 * Init kvm generation close to MMIO_MAX_GEN to easily test the
-	 * code of handling generation number wrap-around.
-	 */
-	return (kvm_memslots(kvm)->generation +
-		      MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
+	return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
 }
 
 static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
-- 
cgit v1.2.3


From ee3d1570b58677885b4552bce8217fda7b226a68 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Mon, 18 Aug 2014 15:46:06 -0700
Subject: kvm: fix potentially corrupt mmio cache

vcpu exits and memslot mutations can run concurrently as long as the
vcpu does not aquire the slots mutex. Thus it is theoretically possible
for memslots to change underneath a vcpu that is handling an exit.

If we increment the memslot generation number again after
synchronize_srcu_expedited(), vcpus can safely cache memslot generation
without maintaining a single rcu_dereference through an entire vm exit.
And much of the x86/kvm code does not maintain a single rcu_dereference
of the current memslots during each exit.

We can prevent the following case:

   vcpu (CPU 0)                             | thread (CPU 1)
--------------------------------------------+--------------------------
1  vm exit                                  |
2  srcu_read_unlock(&kvm->srcu)             |
3  decide to cache something based on       |
     old memslots                           |
4                                           | change memslots
                                            | (increments generation)
5                                           | synchronize_srcu(&kvm->srcu);
6  retrieve generation # from new memslots  |
7  tag cache with new memslot generation    |
8  srcu_read_unlock(&kvm->srcu)             |
...                                         |
   <action based on cache occurs even       |
    though the caching decision was based   |
    on the old memslots>                    |
...                                         |
   <action *continues* to occur until next  |
    memslot generation change, which may    |
    be never>                               |
                                            |

By incrementing the generation after synchronizing with kvm->srcu readers,
we ensure that the generation retrieved in (6) will become invalid soon
after (8).

Keeping the existing increment is not strictly necessary, but we
do keep it and just move it for consistency from update_memslots to
install_new_memslots.  It invalidates old cached MMIOs immediately,
instead of having to wait for the end of synchronize_srcu_expedited,
which makes the code more clearly correct in case CPU 1 is preempted
right after synchronize_srcu() returns.

To avoid halving the generation space in SPTEs, always presume that the
low bit of the generation is zero when reconstructing a generation number
out of an SPTE.  This effectively disables MMIO caching in SPTEs during
the call to synchronize_srcu_expedited.  Using the low bit this way is
somewhat like a seqcount---where the protected thing is a cache, and
instead of retrying we can simply punt if we observe the low bit to be 1.

Cc: stable@vger.kernel.org
Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 323c3f5f5c84..96515957ba82 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -199,16 +199,20 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask)
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 
 /*
- * spte bits of bit 3 ~ bit 11 are used as low 9 bits of generation number,
- * the bits of bits 52 ~ bit 61 are used as high 10 bits of generation
- * number.
+ * the low bit of the generation number is always presumed to be zero.
+ * This disables mmio caching during memslot updates.  The concept is
+ * similar to a seqcount but instead of retrying the access we just punt
+ * and ignore the cache.
+ *
+ * spte bits 3-11 are used as bits 1-9 of the generation number,
+ * the bits 52-61 are used as bits 10-19 of the generation number.
  */
-#define MMIO_SPTE_GEN_LOW_SHIFT		3
+#define MMIO_SPTE_GEN_LOW_SHIFT		2
 #define MMIO_SPTE_GEN_HIGH_SHIFT	52
 
-#define MMIO_GEN_SHIFT			19
-#define MMIO_GEN_LOW_SHIFT		9
-#define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 1)
+#define MMIO_GEN_SHIFT			20
+#define MMIO_GEN_LOW_SHIFT		10
+#define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 2)
 #define MMIO_GEN_MASK			((1 << MMIO_GEN_SHIFT) - 1)
 #define MMIO_MAX_GEN			((1 << MMIO_GEN_SHIFT) - 1)
 
@@ -4428,7 +4432,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
 	 * The very rare case: if the generation-number is round,
 	 * zap all shadow pages.
 	 */
-	if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) {
+	if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
 		printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
 		kvm_mmu_invalidate_zap_all_pages(kvm);
 	}
-- 
cgit v1.2.3


From 56f17dd3fbc44adcdbc3340fe3988ddb833a47a7 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Mon, 18 Aug 2014 15:46:07 -0700
Subject: kvm: x86: fix stale mmio cache bug

The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
up to userspace:

(1) Guest accesses gpa X without a memory slot. The gfn is cached in
struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
the SPTE write-execute-noread so that future accesses cause
EPT_MISCONFIGs.

(2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
covering the page just accessed.

(3) Guest attempts to read or write to gpa X again. On Intel, this
generates an EPT_MISCONFIG. The memory slot generation number that
was incremented in (2) would normally take care of this but we fast
path mmio faults through quickly_check_mmio_pf(), which only checks
the per-vcpu mmio cache. Since we hit the cache, KVM passes a
KVM_EXIT_MMIO up to userspace.

This patch fixes the issue by using the memslot generation number
to validate the mmio cache.

Cc: stable@vger.kernel.org
Signed-off-by: David Matlack <dmatlack@google.com>
[xiaoguangrong: adjust the code to make it simpler for stable-tree fix.]
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Tested-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu.c              |  2 +-
 arch/x86/kvm/x86.h              | 20 +++++++++++++++-----
 3 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 73e4149eda33..08cc299ec6f4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -477,6 +477,7 @@ struct kvm_vcpu_arch {
 	u64 mmio_gva;
 	unsigned access;
 	gfn_t mmio_gfn;
+	u64 mmio_gen;
 
 	struct kvm_pmu pmu;
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 96515957ba82..1cd2a5fbde07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3162,7 +3162,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 		return;
 
-	vcpu_clear_mmio_info(vcpu, ~0ul);
+	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
 	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
 	if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 306a1b77581f..985fb2c006fa 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -88,15 +88,23 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
 	vcpu->arch.mmio_gva = gva & PAGE_MASK;
 	vcpu->arch.access = access;
 	vcpu->arch.mmio_gfn = gfn;
+	vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation;
+}
+
+static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.mmio_gen == kvm_memslots(vcpu->kvm)->generation;
 }
 
 /*
- * Clear the mmio cache info for the given gva,
- * specially, if gva is ~0ul, we clear all mmio cache info.
+ * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY, we
+ * clear all mmio cache info.
  */
+#define MMIO_GVA_ANY (~(gva_t)0)
+
 static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
+	if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
 		return;
 
 	vcpu->arch.mmio_gva = 0;
@@ -104,7 +112,8 @@ static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
 
 static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
 {
-	if (vcpu->arch.mmio_gva && vcpu->arch.mmio_gva == (gva & PAGE_MASK))
+	if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gva &&
+	      vcpu->arch.mmio_gva == (gva & PAGE_MASK))
 		return true;
 
 	return false;
@@ -112,7 +121,8 @@ static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
 
 static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 {
-	if (vcpu->arch.mmio_gfn && vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
+	if (vcpu_match_mmio_gen(vcpu) && vcpu->arch.mmio_gfn &&
+	      vcpu->arch.mmio_gfn == gpa >> PAGE_SHIFT)
 		return true;
 
 	return false;
-- 
cgit v1.2.3


From d143148383d0395539073dd6c2f25ddf6656bdcc Mon Sep 17 00:00:00 2001
From: Tiejun Chen <tiejun.chen@intel.com>
Date: Mon, 1 Sep 2014 18:44:04 +0800
Subject: KVM: mmio: cleanup kvm_set_mmio_spte_mask

Just reuse rsvd_bits() inside kvm_set_mmio_spte_mask()
for slightly better code.

Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 5 -----
 arch/x86/kvm/mmu.h | 5 +++++
 arch/x86/kvm/x86.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1cd2a5fbde07..6b6df0c5be3d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -295,11 +295,6 @@ static bool check_mmio_spte(struct kvm *kvm, u64 spte)
 	return likely(kvm_gen == spte_gen);
 }
 
-static inline u64 rsvd_bits(int s, int e)
-{
-	return ((1ULL << (e - s + 1)) - 1) << s;
-}
-
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 		u64 dirty_mask, u64 nx_mask, u64 x_mask)
 {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b982112d2ca5..bde8ee725754 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -56,6 +56,11 @@
 #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
 #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
 
+static inline u64 rsvd_bits(int s, int e)
+{
+	return ((1ULL << (e - s + 1)) - 1) << s;
+}
+
 int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 916e89515210..e4ed85e07a01 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5569,7 +5569,7 @@ static void kvm_set_mmio_spte_mask(void)
 	 * entry to generate page fault with PFER.RSV = 1.
 	 */
 	 /* Mask the reserved physical address bits. */
-	mask = ((1ull << (51 - maxphyaddr + 1)) - 1) << maxphyaddr;
+	mask = rsvd_bits(maxphyaddr, 51);
 
 	/* Bit 62 is always reserved for 32bit host. */
 	mask |= 0x3ull << 62;
-- 
cgit v1.2.3


From a0c0feb57992c5caed170feab8a68c51306eb7c3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 2 Sep 2014 13:24:12 +0200
Subject: KVM: x86: reserve bit 8 of non-leaf PDPEs and PML4Es in 64-bit mode
 on AMD

Bit 8 would be the "global" bit, which does not quite make sense for non-leaf
page table entries.  Intel ignores it; AMD ignores it in PDEs, but reserves it
in PDPEs and PML4Es.  The SVM test is relying on this behavior, so enforce it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.h |  8 ++++++++
 arch/x86/kvm/mmu.c   | 13 +++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a5380590ab0e..43b33e301e68 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -88,6 +88,14 @@ static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu)
 	return best && (best->ecx & bit(X86_FEATURE_X2APIC));
 }
 
+static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0, 0);
+	return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
+}
+
 static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6b6df0c5be3d..5b93a597e0c8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3512,6 +3512,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	int maxphyaddr = cpuid_maxphyaddr(vcpu);
 	u64 exb_bit_rsvd = 0;
 	u64 gbpages_bit_rsvd = 0;
+	u64 nonleaf_bit8_rsvd = 0;
 
 	context->bad_mt_xwr = 0;
 
@@ -3519,6 +3520,14 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 		exb_bit_rsvd = rsvd_bits(63, 63);
 	if (!guest_cpuid_has_gbpages(vcpu))
 		gbpages_bit_rsvd = rsvd_bits(7, 7);
+
+	/*
+	 * Non-leaf PML4Es and PDPEs reserve bit 8 (which would be the G bit for
+	 * leaf entries) on AMD CPUs only.
+	 */
+	if (guest_cpuid_is_amd(vcpu))
+		nonleaf_bit8_rsvd = rsvd_bits(8, 8);
+
 	switch (context->root_level) {
 	case PT32_ROOT_LEVEL:
 		/* no rsvd bits for 2 level 4K page table entries */
@@ -3553,9 +3562,9 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 		break;
 	case PT64_ROOT_LEVEL:
 		context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
-			rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 7);
+			nonleaf_bit8_rsvd | rsvd_bits(7, 7) | rsvd_bits(maxphyaddr, 51);
 		context->rsvd_bits_mask[0][2] = exb_bit_rsvd |
-			gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
+			nonleaf_bit8_rsvd | gbpages_bit_rsvd | rsvd_bits(maxphyaddr, 51);
 		context->rsvd_bits_mask[0][1] = exb_bit_rsvd |
 			rsvd_bits(maxphyaddr, 51);
 		context->rsvd_bits_mask[0][0] = exb_bit_rsvd |
-- 
cgit v1.2.3


From 5e352519519623a0b62587c606280e534d0cf1d9 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 2 Sep 2014 13:18:37 +0200
Subject: KVM: nSVM: propagate the NPF EXITINFO to the guest

This is similar to what the EPT code does with the exit qualification.
This allows the guest to see a valid value for bits 33:32.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 12 ++++++++++++
 arch/x86/kvm/svm.c         | 24 ++++++++++++++++++++----
 2 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 410776528265..df1a044d92de 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -322,8 +322,20 @@ retry_walk:
 
 		real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
 					      PFERR_USER_MASK|PFERR_WRITE_MASK);
+
+		/*
+		 * FIXME: This can happen if emulation (for of an INS/OUTS
+		 * instruction) triggers a nested page fault.  The exit
+		 * qualification / exit info field will incorrectly have
+		 * "guest page access" as the nested page fault's cause,
+		 * instead of "guest page structure access".  To fix this,
+		 * the x86_exception struct should be augmented with enough
+		 * information to fix the exit_qualification or exit_info_1
+		 * fields.
+		 */
 		if (unlikely(real_gfn == UNMAPPED_GVA))
 			goto error;
+
 		real_gfn = gpa_to_gfn(real_gfn);
 
 		host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8ef704037370..d0a61a05a891 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1974,10 +1974,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	svm->vmcb->control.exit_code = SVM_EXIT_NPF;
-	svm->vmcb->control.exit_code_hi = 0;
-	svm->vmcb->control.exit_info_1 = fault->error_code;
-	svm->vmcb->control.exit_info_2 = fault->address;
+	if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
+		/*
+		 * TODO: track the cause of the nested page fault, and
+		 * correctly fill in the high bits of exit_info_1.
+		 */
+		svm->vmcb->control.exit_code = SVM_EXIT_NPF;
+		svm->vmcb->control.exit_code_hi = 0;
+		svm->vmcb->control.exit_info_1 = (1ULL << 32);
+		svm->vmcb->control.exit_info_2 = fault->address;
+	}
+
+	svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
+	svm->vmcb->control.exit_info_1 |= fault->error_code;
+
+	/*
+	 * The present bit is always zero for page structure faults on real
+	 * hardware.
+	 */
+	if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
+		svm->vmcb->control.exit_info_1 &= ~1;
 
 	nested_svm_vmexit(svm);
 }
-- 
cgit v1.2.3


From ef54bcfeea6c8b04e2a4f9396e16d88558aa2eee Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 4 Sep 2014 19:46:15 +0200
Subject: KVM: x86: skip writeback on injection of nested exception

If a nested page fault happens during emulation, we will inject a vmexit,
not a page fault.  However because writeback happens after the injection,
we will write ctxt->eip from L2 into the L1 EIP.  We do not write back
if an instruction caused an interception vmexit---do the same for page
faults.

Suggested-by: Gleb Natapov <gleb@kernel.org>
Reviewed-by: Gleb Natapov <gleb@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/x86.c              | 15 ++++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 08cc299ec6f4..c9896518e54d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -893,7 +893,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
 int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			    gfn_t gfn, void *data, int offset, int len,
 			    u32 access);
-void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
 
 static inline int __kvm_irq_line_state(unsigned long *irq_state,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e4ed85e07a01..354194671902 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -408,12 +408,14 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 }
 EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
 
-void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 {
 	if (mmu_is_nested(vcpu) && !fault->nested_page_fault)
 		vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault);
 	else
 		vcpu->arch.mmu.inject_page_fault(vcpu, fault);
+
+	return fault->nested_page_fault;
 }
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
@@ -4929,16 +4931,18 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
 	}
 }
 
-static void inject_emulated_exception(struct kvm_vcpu *vcpu)
+static bool inject_emulated_exception(struct kvm_vcpu *vcpu)
 {
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 	if (ctxt->exception.vector == PF_VECTOR)
-		kvm_propagate_fault(vcpu, &ctxt->exception);
-	else if (ctxt->exception.error_code_valid)
+		return kvm_propagate_fault(vcpu, &ctxt->exception);
+
+	if (ctxt->exception.error_code_valid)
 		kvm_queue_exception_e(vcpu, ctxt->exception.vector,
 				      ctxt->exception.error_code);
 	else
 		kvm_queue_exception(vcpu, ctxt->exception.vector);
+	return false;
 }
 
 static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
@@ -5300,8 +5304,9 @@ restart:
 	}
 
 	if (ctxt->have_exception) {
-		inject_emulated_exception(vcpu);
 		r = EMULATE_DONE;
+		if (inject_emulated_exception(vcpu))
+			return r;
 	} else if (vcpu->arch.pio.count) {
 		if (!vcpu->arch.pio.in) {
 			/* FIXME: return into emulator if single-stepping.  */
-- 
cgit v1.2.3


From 54987b7afa902e886b3a751c056c2a4d4701020e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 2 Sep 2014 13:23:06 +0200
Subject: KVM: x86: propagate exception from permission checks on the nested
 page fault

Currently, if a permission error happens during the translation of
the final GPA to HPA, walk_addr_generic returns 0 but does not fill
in walker->fault.  To avoid this, add an x86_exception* argument
to the translate_gpa function, and let it fill in walker->fault.
The nested_page_fault field will be true, since the walk_mmu is the
nested_mmu and translate_gpu instead operates on the "outer" (NPT)
instance.

Reported-by: Valentine Sinitsyn <valentine.sinitsyn@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 9 ++++++---
 arch/x86/kvm/mmu.c              | 2 +-
 arch/x86/kvm/paging_tmpl.h      | 7 ++++---
 arch/x86/kvm/x86.c              | 9 +++++----
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c9896518e54d..2dbde3b7446c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -262,7 +262,8 @@ struct kvm_mmu {
 				  struct x86_exception *fault);
 	gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
 			    struct x86_exception *exception);
-	gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
+	gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+			       struct x86_exception *exception);
 	int (*sync_page)(struct kvm_vcpu *vcpu,
 			 struct kvm_mmu_page *sp);
 	void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva);
@@ -923,7 +924,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access);
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+			   struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
 			      struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
@@ -943,7 +945,8 @@ void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
 void kvm_enable_tdp(void);
 void kvm_disable_tdp(void);
 
-static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+static inline gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+				  struct x86_exception *exception)
 {
 	return gpa;
 }
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5b93a597e0c8..76398fe15df2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3200,7 +3200,7 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr,
 {
 	if (exception)
 		exception->error_code = 0;
-	return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access);
+	return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
 }
 
 static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index df1a044d92de..0ab6c65a2821 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -321,7 +321,8 @@ retry_walk:
 		walker->pte_gpa[walker->level - 1] = pte_gpa;
 
 		real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
-					      PFERR_USER_MASK|PFERR_WRITE_MASK);
+					      PFERR_USER_MASK|PFERR_WRITE_MASK,
+					      &walker->fault);
 
 		/*
 		 * FIXME: This can happen if emulation (for of an INS/OUTS
@@ -334,7 +335,7 @@ retry_walk:
 		 * fields.
 		 */
 		if (unlikely(real_gfn == UNMAPPED_GVA))
-			goto error;
+			return 0;
 
 		real_gfn = gpa_to_gfn(real_gfn);
 
@@ -376,7 +377,7 @@ retry_walk:
 	if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
 		gfn += pse36_gfn_delta(pte);
 
-	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
+	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
 	if (real_gpa == UNMAPPED_GVA)
 		return 0;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 354194671902..7b25aa2725f8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -459,11 +459,12 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			    gfn_t ngfn, void *data, int offset, int len,
 			    u32 access)
 {
+	struct x86_exception exception;
 	gfn_t real_gfn;
 	gpa_t ngpa;
 
 	ngpa     = gfn_to_gpa(ngfn);
-	real_gfn = mmu->translate_gpa(vcpu, ngpa, access);
+	real_gfn = mmu->translate_gpa(vcpu, ngpa, access, &exception);
 	if (real_gfn == UNMAPPED_GVA)
 		return -EFAULT;
 
@@ -4065,16 +4066,16 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
 	kvm_x86_ops->get_segment(vcpu, var, seg);
 }
 
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access)
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+			   struct x86_exception *exception)
 {
 	gpa_t t_gpa;
-	struct x86_exception exception;
 
 	BUG_ON(!mmu_is_nested(vcpu));
 
 	/* NPT walks are always user-walks */
 	access |= PFERR_USER_MASK;
-	t_gpa  = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, &exception);
+	t_gpa  = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception);
 
 	return t_gpa;
 }
-- 
cgit v1.2.3


From 5102ee879539ebd2e0de1eb93290e3d691973e79 Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
Date: Fri, 27 Jun 2014 14:46:01 -0400
Subject: KVM: CPACF: Enable MSA4 instructions for kvm guest

We have to provide a per guest crypto block for the CPUs to
enable MSA4 instructions. According to icainfo on z196 or
later this enables CCM-AES-128, CMAC-AES-128, CMAC-AES-192
and CMAC-AES-256.

Signed-off-by: Tony Krowiak <akrowiak@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Michael Mueller <mimu@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
[split MSA4/protected key into two patches]
---
 arch/s390/include/asm/kvm_host.h | 14 +++++++++++++-
 arch/s390/kvm/kvm-s390.c         | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a76a124dff48..1a6f6fd8bd34 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -157,7 +157,9 @@ struct kvm_s390_sie_block {
 	__u8	armid;			/* 0x00e3 */
 	__u8	reservede4[4];		/* 0x00e4 */
 	__u64	tecmc;			/* 0x00e8 */
-	__u8	reservedf0[16];		/* 0x00f0 */
+	__u8	reservedf0[12];		/* 0x00f0 */
+#define CRYCB_FORMAT1 0x00000001
+	__u32	crycbd;			/* 0x00fc */
 	__u64	gcr[16];		/* 0x0100 */
 	__u64	gbea;			/* 0x0180 */
 	__u8	reserved188[24];	/* 0x0188 */
@@ -410,6 +412,15 @@ struct s390_io_adapter {
 #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
 #define MAX_S390_ADAPTER_MAPS 256
 
+struct kvm_s390_crypto {
+	struct kvm_s390_crypto_cb *crycb;
+	__u32 crycbd;
+};
+
+struct kvm_s390_crypto_cb {
+	__u8    reserved00[128];                /* 0x0000 */
+};
+
 struct kvm_arch{
 	struct sca_block *sca;
 	debug_info_t *dbf;
@@ -423,6 +434,7 @@ struct kvm_arch{
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
 	spinlock_t start_stop_lock;
+	struct kvm_s390_crypto crypto;
 };
 
 #define KVM_HVA_ERR_BAD		(-1UL)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 628e992eeded..2037738d01a0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -392,6 +392,22 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	return r;
 }
 
+static int kvm_s390_crypto_init(struct kvm *kvm)
+{
+	if (!test_vfacility(76))
+		return 0;
+
+	kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
+					 GFP_KERNEL | GFP_DMA);
+	if (!kvm->arch.crypto.crycb)
+		return -ENOMEM;
+
+	kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb |
+				  CRYCB_FORMAT1;
+
+	return 0;
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	int rc;
@@ -429,6 +445,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (!kvm->arch.dbf)
 		goto out_nodbf;
 
+	if (kvm_s390_crypto_init(kvm) < 0)
+		goto out_crypto;
+
 	spin_lock_init(&kvm->arch.float_int.lock);
 	INIT_LIST_HEAD(&kvm->arch.float_int.list);
 	init_waitqueue_head(&kvm->arch.ipte_wq);
@@ -453,6 +472,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	return 0;
 out_nogmap:
+	kfree(kvm->arch.crypto.crycb);
+out_crypto:
 	debug_unregister(kvm->arch.dbf);
 out_nodbf:
 	free_page((unsigned long)(kvm->arch.sca));
@@ -507,6 +528,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_free_vcpus(kvm);
 	free_page((unsigned long)(kvm->arch.sca));
 	debug_unregister(kvm->arch.dbf);
+	kfree(kvm->arch.crypto.crycb);
 	if (!kvm_is_ucontrol(kvm))
 		gmap_free(kvm->arch.gmap);
 	kvm_s390_destroy_adapters(kvm);
@@ -588,6 +610,14 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
+{
+	if (!test_vfacility(76))
+		return;
+
+	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
+}
+
 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
 {
 	free_page(vcpu->arch.sie_block->cbrlo);
@@ -634,6 +664,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 	get_cpu_id(&vcpu->arch.cpu_id);
 	vcpu->arch.cpu_id.version = 0xff;
+
+	kvm_s390_vcpu_crypto_setup(vcpu);
+
 	return rc;
 }
 
-- 
cgit v1.2.3


From 614aeab4dcd0aafb1538d5035eb9855f15b84014 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 25 Aug 2014 12:27:29 +0200
Subject: KVM: s390: add __must_check to interrupt deliver functions

We now propagate interrupt injection errors back to the ioctl. We
should mark functions that might fail with __must_check.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
---
 arch/s390/kvm/interrupt.c | 12 ++++++------
 arch/s390/kvm/kvm-s390.h  |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 60a5cf40d49a..d56da1db8ba0 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -28,7 +28,7 @@
 #define IOINT_AI_MASK 0x04000000
 #define PFAULT_INIT 0x0600
 
-static int deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
 
 static int is_ioint(u64 type)
 {
@@ -77,7 +77,7 @@ static u64 int_word_to_isc_bits(u32 int_word)
 	return (0x80 >> isc) << 24;
 }
 
-static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
+static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_interrupt_info *inti)
 {
 	switch (inti->type) {
@@ -225,7 +225,7 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
 	}
 }
 
-static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
+static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
 			      struct kvm_s390_pgm_info *pgm_info)
 {
 	int rc = 0;
@@ -307,7 +307,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
 	return rc;
 }
 
-static int __do_deliver_interrupt(struct kvm_vcpu *vcpu,
+static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 				   struct kvm_s390_interrupt_info *inti)
 {
 	const unsigned short table[] = { 2, 4, 4, 6 };
@@ -508,7 +508,7 @@ static int __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 	return rc;
 }
 
-static int deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 {
 	int rc;
 
@@ -657,7 +657,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 			  &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
 }
 
-int kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 99abcb56e478..b1a77669137b 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -138,7 +138,7 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
 enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
-int kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_float_irqs(struct kvm *kvm);
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
-- 
cgit v1.2.3


From 0349985add77ef5c9da8a75f4a9855977f4197d9 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 25 Aug 2014 12:38:57 +0200
Subject: KVM: s390: Limit guest size to 16TB

Currently we fill up a full 5 level page table to hold the guest
mapping. Since commit "support gmap page tables with less than 5
levels" we can do better.
Having more than 4 TB might be useful for some testing scenarios,
so let's just limit ourselves to 16TB guest size.
Having more than that is totally untested as I do not have enough
swap space/memory.

We continue to allow ucontrol the full size.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2037738d01a0..b95d4a481b0c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -458,7 +458,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (type & KVM_VM_S390_UCONTROL) {
 		kvm->arch.gmap = NULL;
 	} else {
-		kvm->arch.gmap = gmap_alloc(current->mm, -1UL);
+		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
 		if (!kvm->arch.gmap)
 			goto out_nogmap;
 		kvm->arch.gmap->private = kvm;
-- 
cgit v1.2.3


From f346026e55f1efd3949a67ddd1dcea7c1b9a615e Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 3 Sep 2014 16:21:32 +0200
Subject: KVM: s390: unintended fallthrough for external call

We must not fallthrough if the conditions for external call are not met.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Cc: stable@vger.kernel.org
---
 arch/s390/kvm/interrupt.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index d56da1db8ba0..4abf819a11a5 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -86,6 +86,7 @@ static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 			return 0;
 		if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
 			return 1;
+		return 0;
 	case KVM_S390_INT_EMERGENCY:
 		if (psw_extint_disabled(vcpu))
 			return 0;
-- 
cgit v1.2.3


From 6b331952f1bc2df61c98954e25578629c439e417 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 3 Sep 2014 21:17:03 +0200
Subject: KVM: s390: get rid of constant condition in ipte_unlock_simple

Due to the earlier check we know that ipte_lock_count must be 0.
No need to add a useless if. Let's make clear that we are going
to always wakeup when we execute that code.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/s390/kvm/gaccess.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4653ac6e182b..0f961a1c64b3 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -254,8 +254,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
 		new = old = ACCESS_ONCE(*ic);
 		new.k = 0;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
-	if (!ipte_lock_count)
-		wake_up(&vcpu->kvm->arch.ipte_wq);
+	wake_up(&vcpu->kvm->arch.ipte_wq);
 out:
 	mutex_unlock(&ipte_mutex);
 }
-- 
cgit v1.2.3


From f7a960affc6e5a33e8c7fcef065affc4f0461041 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 3 Sep 2014 21:23:13 +0200
Subject: KVM: s390/cmm: Fix prefix handling for diag 10 balloon

The old handling of prefix pages was broken in the diag10 ballooner.
We now rely on gmap_discard to check for start > end and do a
slow path if the prefix swap pages are affected:
1. discard the pages from start to prefix
2. discard the absolute 0 pages
3. discard the pages after prefix swap to end

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
---
 arch/s390/kvm/diag.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index b374b6cb7785..9254afff250c 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -28,22 +28,32 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
 	start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
 
-	if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end
+	if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
 	    || start < 2 * PAGE_SIZE)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
 	vcpu->stat.diagnose_10++;
 
-	/* we checked for start > end above */
-	if (end < prefix || start >= prefix + 2 * PAGE_SIZE) {
+	/*
+	 * We checked for start >= end above, so lets check for the
+	 * fast path (no prefix swap page involved)
+	 */
+	if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
 		gmap_discard(vcpu->arch.gmap, start, end);
 	} else {
-		if (start < prefix)
-			gmap_discard(vcpu->arch.gmap, start, prefix);
-		if (end >= prefix)
-			gmap_discard(vcpu->arch.gmap,
-				     prefix + 2 * PAGE_SIZE, end);
+		/*
+		 * This is slow path.  gmap_discard will check for start
+		 * so lets split this into before prefix, prefix, after
+		 * prefix and let gmap_discard make some of these calls
+		 * NOPs.
+		 */
+		gmap_discard(vcpu->arch.gmap, start, prefix);
+		if (start <= prefix)
+			gmap_discard(vcpu->arch.gmap, 0, 4096);
+		if (end > prefix + 4096)
+			gmap_discard(vcpu->arch.gmap, 4096, 8192);
+		gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From bfac1f59a1afb13a3cf225bffd04be99a49c51a6 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 3 Sep 2014 16:16:47 +0200
Subject: KVM: s390/interrupt: remove double assignment

r is already initialized to 0.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
---
 arch/s390/kvm/interrupt.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4abf819a11a5..4cad00adef93 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1352,7 +1352,6 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 		r = enqueue_floating_irq(dev, attr);
 		break;
 	case KVM_DEV_FLIC_CLEAR_IRQS:
-		r = 0;
 		kvm_s390_clear_float_irqs(dev->kvm);
 		break;
 	case KVM_DEV_FLIC_APF_ENABLE:
-- 
cgit v1.2.3


From 73a6d9416279f138833574f11dc82134fb56908d Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Thu, 11 Sep 2014 13:38:00 +0800
Subject: kvm: Use APIC_DEFAULT_PHYS_BASE macro as the apic access page
 address.

We have APIC_DEFAULT_PHYS_BASE defined as 0xfee00000, which is also the address of
apic access page. So use this macro.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Gleb Natapov <gleb@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 3 ++-
 arch/x86/kvm/vmx.c | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d0a61a05a891..f7f6a4a157a6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1257,7 +1257,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	svm->asid_generation = 0;
 	init_vmcb(svm);
 
-	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+	svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
+				   MSR_IA32_APICBASE_ENABLE;
 	if (kvm_vcpu_is_bsp(&svm->vcpu))
 		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 671ca5edc709..5db47db6574b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4023,13 +4023,13 @@ static int alloc_apic_access_page(struct kvm *kvm)
 		goto out;
 	kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
 	kvm_userspace_mem.flags = 0;
-	kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
+	kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
 	kvm_userspace_mem.memory_size = PAGE_SIZE;
 	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
 	if (r)
 		goto out;
 
-	page = gfn_to_page(kvm, 0xfee00);
+	page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
 	if (is_error_page(page)) {
 		r = -EFAULT;
 		goto out;
@@ -4502,7 +4502,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	kvm_set_cr8(&vmx->vcpu, 0);
-	apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
+	apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
 	if (kvm_vcpu_is_bsp(&vmx->vcpu))
 		apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
 	apic_base_msr.host_initiated = true;
-- 
cgit v1.2.3


From a183b638b61c104920a42b1eb7668953f8ada5cb Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Sep 2014 11:51:02 +0200
Subject: KVM: x86: make apic_accept_irq tracepoint more generic

Initially the tracepoint was added only to the APIC_DM_FIXED case,
also because it reported coalesced interrupts that only made sense
for that case.  However, the coalesced argument is not used anymore
and tracing other delivery modes is useful, so hoist the call out
of the switch statement.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c |  4 ++--
 arch/x86/kvm/trace.h | 11 ++++-------
 2 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fb919c574e23..b8345dd41b25 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -709,6 +709,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	int result = 0;
 	struct kvm_vcpu *vcpu = apic->vcpu;
 
+	trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
+				  trig_mode, vector);
 	switch (delivery_mode) {
 	case APIC_DM_LOWEST:
 		vcpu->arch.apic_arb_prio++;
@@ -730,8 +732,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			kvm_make_request(KVM_REQ_EVENT, vcpu);
 			kvm_vcpu_kick(vcpu);
 		}
-		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
-					  trig_mode, vector, false);
 		break;
 
 	case APIC_DM_REMRD:
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4c2868f36808..6b06ab8748dd 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -415,15 +415,14 @@ TRACE_EVENT(kvm_apic_ipi,
 );
 
 TRACE_EVENT(kvm_apic_accept_irq,
-	    TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced),
-	    TP_ARGS(apicid, dm, tm, vec, coalesced),
+	    TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec),
+	    TP_ARGS(apicid, dm, tm, vec),
 
 	TP_STRUCT__entry(
 		__field(	__u32,		apicid		)
 		__field(	__u16,		dm		)
 		__field(	__u8,		tm		)
 		__field(	__u8,		vec		)
-		__field(	bool,		coalesced	)
 	),
 
 	TP_fast_assign(
@@ -431,14 +430,12 @@ TRACE_EVENT(kvm_apic_accept_irq,
 		__entry->dm		= dm;
 		__entry->tm		= tm;
 		__entry->vec		= vec;
-		__entry->coalesced	= coalesced;
 	),
 
-	TP_printk("apicid %x vec %u (%s|%s)%s",
+	TP_printk("apicid %x vec %u (%s|%s)",
 		  __entry->apicid, __entry->vec,
 		  __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
-		  __entry->tm ? "level" : "edge",
-		  __entry->coalesced ? " (coalesced)" : "")
+		  __entry->tm ? "level" : "edge")
 );
 
 TRACE_EVENT(kvm_eoi,
-- 
cgit v1.2.3


From a7d079cea2dffb112e26da2566dd84c0ef1fce97 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 9 Sep 2014 11:27:09 +0100
Subject: ARM/arm64: KVM: fix use of WnR bit in kvm_is_write_fault()

The ISS encoding for an exception from a Data Abort has a WnR
bit[6] that indicates whether the Data Abort was caused by a
read or a write instruction. While there are several fields
in the encoding that are only valid if the ISV bit[24] is set,
WnR is not one of them, so we can read it unconditionally.

Instead of fixing both implementations of kvm_is_write_fault()
in place, reimplement it just once using kvm_vcpu_dabt_iswrite(),
which already does the right thing with respect to the WnR bit.
Also fix up the callers to pass 'vcpu'

Acked-by: Laszlo Ersek <lersek@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/asm/kvm_mmu.h   | 11 -----------
 arch/arm/kvm/mmu.c               | 12 ++++++++++--
 arch/arm64/include/asm/kvm_mmu.h | 13 -------------
 3 files changed, 10 insertions(+), 26 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 5cc0b0f5f72f..3f688b458143 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -78,17 +78,6 @@ static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
 	flush_pmd_entry(pte);
 }
 
-static inline bool kvm_is_write_fault(unsigned long hsr)
-{
-	unsigned long hsr_ec = hsr >> HSR_EC_SHIFT;
-	if (hsr_ec == HSR_EC_IABT)
-		return false;
-	else if ((hsr & HSR_ISV) && !(hsr & HSR_WNR))
-		return false;
-	else
-		return true;
-}
-
 static inline void kvm_clean_pgd(pgd_t *pgd)
 {
 	clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t));
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 62f5642153f9..bb06f76a8f89 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -746,6 +746,14 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap)
 	return false;
 }
 
+static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_trap_is_iabt(vcpu))
+		return false;
+
+	return kvm_vcpu_dabt_iswrite(vcpu);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -760,7 +768,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	pfn_t pfn;
 	pgprot_t mem_type = PAGE_S2;
 
-	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
 		kvm_err("Unexpected L2 read permission error\n");
 		return -EFAULT;
@@ -886,7 +894,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	gfn = fault_ipa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
 	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
-	write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu));
+	write_fault = kvm_is_write_fault(vcpu);
 	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
 		if (is_iabt) {
 			/* Prefetch Abort on I/O address */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 8e138c7c53ac..737da742b293 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -93,19 +93,6 @@ void kvm_clear_hyp_idmap(void);
 #define	kvm_set_pte(ptep, pte)		set_pte(ptep, pte)
 #define	kvm_set_pmd(pmdp, pmd)		set_pmd(pmdp, pmd)
 
-static inline bool kvm_is_write_fault(unsigned long esr)
-{
-	unsigned long esr_ec = esr >> ESR_EL2_EC_SHIFT;
-
-	if (esr_ec == ESR_EL2_EC_IABT)
-		return false;
-
-	if ((esr & ESR_EL2_ISV) && !(esr & ESR_EL2_WNR))
-		return false;
-
-	return true;
-}
-
 static inline void kvm_clean_pgd(pgd_t *pgd) {}
 static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
 static inline void kvm_clean_pte(pte_t *pte) {}
-- 
cgit v1.2.3


From 105b21bbf67bb50000a0dc4b547a2f99b6376136 Mon Sep 17 00:00:00 2001
From: Guo Hui Liu <liuguohui@gmail.com>
Date: Fri, 12 Sep 2014 13:43:19 +0800
Subject: KVM: x86: Use kvm_make_request when applicable

This patch replace the set_bit method by kvm_make_request
to make code more readable and consistent.

Signed-off-by: Guo Hui Liu <liuguohui@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7b25aa2725f8..68660b31457e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1521,7 +1521,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 	pvclock_update_vm_gtod_copy(kvm);
 
 	kvm_for_each_vcpu(i, vcpu, kvm)
-		set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
 	/* guest entries allowed */
 	kvm_for_each_vcpu(i, vcpu, kvm)
@@ -1664,7 +1664,7 @@ static void kvmclock_update_fn(struct work_struct *work)
 	struct kvm_vcpu *vcpu;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
-		set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 		kvm_vcpu_kick(vcpu);
 	}
 }
@@ -1673,7 +1673,7 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
 {
 	struct kvm *kvm = v->kvm;
 
-	set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests);
+	kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
 	schedule_delayed_work(&kvm->arch.kvmclock_update_work,
 					KVMCLOCK_UPDATE_DELAY);
 }
@@ -2849,7 +2849,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
 		adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
 		vcpu->arch.tsc_offset_adjustment = 0;
-		set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+		kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 	}
 
 	if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
@@ -5606,7 +5606,7 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
 	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
 		kvm_for_each_vcpu(i, vcpu, kvm)
-			set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
+			kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 	atomic_set(&kvm_guest_has_master_clock, 0);
 	spin_unlock(&kvm_lock);
 }
@@ -6984,7 +6984,7 @@ int kvm_arch_hardware_enable(void)
 	list_for_each_entry(kvm, &vm_list, vm_list) {
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (!stable && vcpu->cpu == smp_processor_id())
-				set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+				kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 			if (stable && vcpu->arch.last_host_tsc > local_tsc) {
 				backwards_tsc = true;
 				if (vcpu->arch.last_host_tsc > max_tsc)
@@ -7038,8 +7038,7 @@ int kvm_arch_hardware_enable(void)
 			kvm_for_each_vcpu(i, vcpu, kvm) {
 				vcpu->arch.tsc_offset_adjustment += delta_cyc;
 				vcpu->arch.last_host_tsc = local_tsc;
-				set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
-					&vcpu->requests);
+				kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 			}
 
 			/*
-- 
cgit v1.2.3


From 84877d93336de21a6251db00b841468a83c65906 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Tue, 2 Sep 2014 10:27:35 +0100
Subject: KVM: s390: register flic ops dynamically

Using the new kvm_register_device_ops() interface makes us get rid of
an #ifdef in common code.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/s390/kvm/kvm-s390.c | 3 ++-
 arch/s390/kvm/kvm-s390.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b95d4a481b0c..56a411c0245a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -122,7 +122,8 @@ void kvm_arch_hardware_unsetup(void)
 
 int kvm_arch_init(void *opaque)
 {
-	return 0;
+	/* Register floating interrupt controller interface. */
+	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 }
 
 /* Section: device related */
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index b1a77669137b..244d02303182 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -227,6 +227,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
 void kvm_s390_destroy_adapters(struct kvm *kvm);
 int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
+extern struct kvm_device_ops kvm_flic_ops;
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From a255d4795f83cf3e6a1c7d5ab998392d9413298c Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Tue, 16 Sep 2014 18:41:58 +0800
Subject: kvm: Remove ept_identity_pagetable from struct kvm_arch.

kvm_arch->ept_identity_pagetable holds the ept identity pagetable page. But
it is never used to refer to the page at all.

In vcpu initialization, it indicates two things:
1. indicates if ept page is allocated
2. indicates if a memory slot for identity page is initialized

Actually, kvm_arch->ept_identity_pagetable_done is enough to tell if the ept
identity pagetable is initialized. So we can remove ept_identity_pagetable.

NOTE: In the original code, ept identity pagetable page is pinned in memroy.
      As a result, it cannot be migrated/hot-removed. After this patch, since
      kvm_arch->ept_identity_pagetable is removed, ept identity pagetable page
      is no longer pinned in memory. And it can be migrated/hot-removed.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Gleb Natapov <gleb@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/vmx.c              | 47 +++++++++++++++++++----------------------
 arch/x86/kvm/x86.c              |  2 --
 3 files changed, 22 insertions(+), 28 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2dbde3b7446c..028df8dc538e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -578,7 +578,6 @@ struct kvm_arch {
 
 	gpa_t wall_clock;
 
-	struct page *ept_identity_pagetable;
 	bool ept_identity_pagetable_done;
 	gpa_t ept_identity_map_addr;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5db47db6574b..5de0d7b1d8a9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -767,6 +767,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
 static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
+static int alloc_identity_pagetable(struct kvm *kvm);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -3962,21 +3963,27 @@ out:
 
 static int init_rmode_identity_map(struct kvm *kvm)
 {
-	int i, idx, r, ret;
+	int i, idx, r, ret = 0;
 	pfn_t identity_map_pfn;
 	u32 tmp;
 
 	if (!enable_ept)
 		return 1;
-	if (unlikely(!kvm->arch.ept_identity_pagetable)) {
-		printk(KERN_ERR "EPT: identity-mapping pagetable "
-			"haven't been allocated!\n");
-		return 0;
+
+	/* Protect kvm->arch.ept_identity_pagetable_done. */
+	mutex_lock(&kvm->slots_lock);
+
+	if (likely(kvm->arch.ept_identity_pagetable_done)) {
+		ret = 1;
+		goto out2;
 	}
-	if (likely(kvm->arch.ept_identity_pagetable_done))
-		return 1;
-	ret = 0;
+
 	identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
+
+	r = alloc_identity_pagetable(kvm);
+	if (r)
+		goto out2;
+
 	idx = srcu_read_lock(&kvm->srcu);
 	r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
 	if (r < 0)
@@ -3994,6 +4001,9 @@ static int init_rmode_identity_map(struct kvm *kvm)
 	ret = 1;
 out:
 	srcu_read_unlock(&kvm->srcu, idx);
+
+out2:
+	mutex_unlock(&kvm->slots_lock);
 	return ret;
 }
 
@@ -4043,31 +4053,20 @@ out:
 
 static int alloc_identity_pagetable(struct kvm *kvm)
 {
-	struct page *page;
+	/* Called with kvm->slots_lock held. */
+
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	mutex_lock(&kvm->slots_lock);
-	if (kvm->arch.ept_identity_pagetable)
-		goto out;
+	BUG_ON(kvm->arch.ept_identity_pagetable_done);
+
 	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
 	kvm_userspace_mem.flags = 0;
 	kvm_userspace_mem.guest_phys_addr =
 		kvm->arch.ept_identity_map_addr;
 	kvm_userspace_mem.memory_size = PAGE_SIZE;
 	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem);
-	if (r)
-		goto out;
-
-	page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
-	if (is_error_page(page)) {
-		r = -EFAULT;
-		goto out;
-	}
 
-	kvm->arch.ept_identity_pagetable = page;
-out:
-	mutex_unlock(&kvm->slots_lock);
 	return r;
 }
 
@@ -7758,8 +7757,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 			kvm->arch.ept_identity_map_addr =
 				VMX_EPT_IDENTITY_PAGETABLE_ADDR;
 		err = -ENOMEM;
-		if (alloc_identity_pagetable(kvm) != 0)
-			goto free_vmcs;
 		if (!init_rmode_identity_map(kvm))
 			goto free_vmcs;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 68660b31457e..2d7f65daa8d0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7273,8 +7273,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_free_vcpus(kvm);
 	if (kvm->arch.apic_access_page)
 		put_page(kvm->arch.apic_access_page);
-	if (kvm->arch.ept_identity_pagetable)
-		put_page(kvm->arch.ept_identity_pagetable);
 	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
-- 
cgit v1.2.3


From f51770ed465e6eb41da7fa16fd92eb67069600cf Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Tue, 16 Sep 2014 18:41:59 +0800
Subject: kvm: Make init_rmode_identity_map() return 0 on success.

In init_rmode_identity_map(), there two variables indicating the return
value, r and ret, and it return 0 on error, 1 on success. The function
is only called by vmx_create_vcpu(), and ret is redundant.

This patch removes the redundant variable, and makes init_rmode_identity_map()
return 0 on success, -errno on failure.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5de0d7b1d8a9..6ffd643d1a64 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3963,25 +3963,23 @@ out:
 
 static int init_rmode_identity_map(struct kvm *kvm)
 {
-	int i, idx, r, ret = 0;
+	int i, idx, r = 0;
 	pfn_t identity_map_pfn;
 	u32 tmp;
 
 	if (!enable_ept)
-		return 1;
+		return 0;
 
 	/* Protect kvm->arch.ept_identity_pagetable_done. */
 	mutex_lock(&kvm->slots_lock);
 
-	if (likely(kvm->arch.ept_identity_pagetable_done)) {
-		ret = 1;
+	if (likely(kvm->arch.ept_identity_pagetable_done))
 		goto out2;
-	}
 
 	identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
 
 	r = alloc_identity_pagetable(kvm);
-	if (r)
+	if (r < 0)
 		goto out2;
 
 	idx = srcu_read_lock(&kvm->srcu);
@@ -3998,13 +3996,13 @@ static int init_rmode_identity_map(struct kvm *kvm)
 			goto out;
 	}
 	kvm->arch.ept_identity_pagetable_done = true;
-	ret = 1;
+
 out:
 	srcu_read_unlock(&kvm->srcu, idx);
 
 out2:
 	mutex_unlock(&kvm->slots_lock);
-	return ret;
+	return r;
 }
 
 static void seg_setup(int seg)
@@ -7756,8 +7754,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 		if (!kvm->arch.ept_identity_map_addr)
 			kvm->arch.ept_identity_map_addr =
 				VMX_EPT_IDENTITY_PAGETABLE_ADDR;
-		err = -ENOMEM;
-		if (!init_rmode_identity_map(kvm))
+		err = init_rmode_identity_map(kvm);
+		if (err)
 			goto free_vmcs;
 	}
 
-- 
cgit v1.2.3


From c1bfb577addd4867a82c4f235824a315d5afb94a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:01 +0100
Subject: arm/arm64: KVM: vgic: switch to dynamic allocation

So far, all the VGIC data structures are statically defined by the
*maximum* number of vcpus and interrupts it supports. It means that
we always have to oversize it to cater for the worse case.

Start by changing the data structures to be dynamically sizeable,
and allocate them at runtime.

The sizes are still very static though.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/arm.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 88c901cfc75e..c1a11496817b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -161,6 +161,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 			kvm->vcpus[i] = NULL;
 		}
 	}
+
+	kvm_vgic_destroy(kvm);
 }
 
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -243,6 +245,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_free_memory_caches(vcpu);
 	kvm_timer_vcpu_terminate(vcpu);
+	kvm_vgic_vcpu_destroy(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
-- 
cgit v1.2.3


From 4956f2bc1fdee4bc336532f3f34635a8534cedfd Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:06 +0100
Subject: arm/arm64: KVM: vgic: delay vgic allocation until init time

It is now quite easy to delay the allocation of the vgic tables
until we actually require it to be up and running (when the first
vcpu is kicking around, or someones tries to access the GIC registers).

This allow us to allocate memory for the exact number of CPUs we
have. As nobody configures the number of interrupts just yet,
use a fallback to VGIC_NR_IRQS_LEGACY.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/arm.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c1a11496817b..40bc3df6d87b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -261,16 +261,9 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	int ret;
-
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
 
-	/* Set up VGIC */
-	ret = kvm_vgic_vcpu_init(vcpu);
-	if (ret)
-		return ret;
-
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
 
-- 
cgit v1.2.3


From a98f26f183801685ef57333de4bafd4bbc692c7c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 8 Jul 2014 12:09:07 +0100
Subject: arm/arm64: KVM: vgic: make number of irqs a configurable attribute

In order to make the number of interrupts configurable, use the new
fancy device management API to add KVM_DEV_ARM_VGIC_GRP_NR_IRQS as
a VGIC configurable attribute.

Userspace can now specify the exact size of the GIC (by increments
of 32 interrupts).

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/include/uapi/asm/kvm.h   | 1 +
 arch/arm64/include/uapi/asm/kvm.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 51257fda254b..09ee408c1a67 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -174,6 +174,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index f4ec5a674d05..8e38878c87c6 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -160,6 +160,7 @@ struct kvm_arch_memory_slot {
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS	3
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
-- 
cgit v1.2.3


From 9fee7563cdb535596c48e7b05383d75590a64418 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Wed, 6 Aug 2014 12:08:51 +0530
Subject: KVM: PPC: BOOKE: allow debug interrupt at "debug level"

Debug interrupt can be either "critical level" or "debug level".
There are separate set of save/restore registers used for different level.
Example: DSRR0/DSRR1 are used for "debug level" and CSRR0/CSRR1
are used for critical level debug interrupt.

Using CPU_FTR_DEBUG_LVL_EXC to decide which interrupt level to be used.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b4c89fa6f109..322da7d2334a 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -377,7 +377,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		allowed = vcpu->arch.shared->msr & MSR_DE;
 		allowed = allowed && !crit;
 		msr_mask = MSR_ME;
-		int_class = INT_CLASS_CRIT;
+		if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+			int_class = INT_CLASS_DBG;
+		else
+			int_class = INT_CLASS_CRIT;
+
 		break;
 	}
 
-- 
cgit v1.2.3


From c8ca97ca9b87c0a9c9e67feda656b8dbca65cf08 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Wed, 6 Aug 2014 12:08:52 +0530
Subject: KVM: PPC: BOOKE : Emulate rfdi instruction

This patch adds "rfdi" instruction emulation which is required for
guest debug hander on BOOKE-HV

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/kvm/booke_emulate.c    | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 604000882352..8a8da0acfd28 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -139,6 +139,7 @@ enum kvm_exit_types {
 	EMULATED_TLBWE_EXITS,
 	EMULATED_RFI_EXITS,
 	EMULATED_RFCI_EXITS,
+	EMULATED_RFDI_EXITS,
 	DEC_EXITS,
 	EXT_INTR_EXITS,
 	HALT_WAKEUP,
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 28c158881d23..4b9a07992753 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -25,6 +25,7 @@
 
 #define OP_19_XOP_RFI     50
 #define OP_19_XOP_RFCI    51
+#define OP_19_XOP_RFDI    39
 
 #define OP_31_XOP_MFMSR   83
 #define OP_31_XOP_WRTEE   131
@@ -37,6 +38,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
 	kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
 }
 
+static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pc = vcpu->arch.dsrr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.dsrr1);
+}
+
 static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pc = vcpu->arch.csrr0;
@@ -65,6 +72,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			*advance = 0;
 			break;
 
+		case OP_19_XOP_RFDI:
+			kvmppc_emul_rfdi(vcpu);
+			kvmppc_set_exit_type(vcpu, EMULATED_RFDI_EXITS);
+			*advance = 0;
+			break;
+
 		default:
 			emulated = EMULATE_FAIL;
 			break;
-- 
cgit v1.2.3


From 37277b1129de84024f1f427da7bf25aabccf835e Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Wed, 6 Aug 2014 12:08:53 +0530
Subject: KVM: PPC: BOOKE: Allow guest to change MSR_DE

This patch changes the default behavior of MSRP_DEP, that is
guest is not allowed to change the MSR_DE, to guest can change
MSR_DE. When userspace is debugging guest then it override the
default behavior and set MSRP_DEP. This stops guest to change
MSR_DE when userspace is debugging guest.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/e500mc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 164bad2a19bf..000cf8242e7d 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -194,7 +194,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_64BIT
 	vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
 #endif
-	vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP;
+	vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP;
 	vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
 	vcpu->arch.epsc = vcpu->arch.eplc;
 
-- 
cgit v1.2.3


From 2190991e7caa2e2a2b976e9ed16e55f8c51623be Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Wed, 6 Aug 2014 12:08:54 +0530
Subject: KVM: PPC: BOOKE: Clear guest dbsr in userspace exit KVM_EXIT_DEBUG

Dbsr is not visible to userspace and we do not think any need to
expose this to userspace because:
  Userspace cannot inject debug interrupt to guest (as this
  does not know guest ability to handle debug interrupt), so
  userspace will always clear DBSR.
  Now if userspace has to always clear DBSR in KVM_EXIT_DEBUG
  handling then clearing dbsr in kernel looks simple as this
  avoid doing SET_SREGS/set_one_reg() to clear DBSR

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 322da7d2334a..b4ab86cdb51d 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -735,6 +735,8 @@ static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
 	u32 dbsr = vcpu->arch.dbsr;
 
+	/* Clear guest dbsr (vcpu->arch.dbsr) */
+	vcpu->arch.dbsr = 0;
 	run->debug.arch.status = 0;
 	run->debug.arch.address = vcpu->arch.pc;
 
-- 
cgit v1.2.3


From 348ba71081cd8444178d24d3ed13d34fc1b61dae Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Wed, 6 Aug 2014 12:08:55 +0530
Subject: KVM: PPC: BOOKE: Guest and hardware visible debug registers are same

Guest visible debug register and hardware visible debug registers are
same, so ther is no need to have arch->shadow_dbg_reg, instead use
arch->dbg_reg.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h |  2 --
 arch/powerpc/kvm/booke.c            | 16 +++++++---------
 arch/powerpc/kvm/booke_emulate.c    |  2 ++
 3 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 8a8da0acfd28..bad3491cc32c 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -585,8 +585,6 @@ struct kvm_vcpu_arch {
 	u32 crit_save;
 	/* guest debug registers*/
 	struct debug_reg dbg_reg;
-	/* hardware visible debug registers when in guest state */
-	struct debug_reg shadow_dbg_reg;
 #endif
 	gpa_t paddr_accessed;
 	gva_t vaddr_accessed;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b4ab86cdb51d..e47de01b0dbb 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -668,10 +668,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 #endif
 
 	/* Switch to guest debug context */
-	debug = vcpu->arch.shadow_dbg_reg;
+	debug = vcpu->arch.dbg_reg;
 	switch_booke_debug_regs(&debug);
 	debug = current->thread.debug;
-	current->thread.debug = vcpu->arch.shadow_dbg_reg;
+	current->thread.debug = vcpu->arch.dbg_reg;
 
 	vcpu->arch.pgdir = current->mm->pgd;
 	kvmppc_fix_ee_before_entry();
@@ -732,7 +732,7 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
-	struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+	struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg);
 	u32 dbsr = vcpu->arch.dbsr;
 
 	/* Clear guest dbsr (vcpu->arch.dbsr) */
@@ -1848,7 +1848,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	int n, b = 0, w = 0;
 
 	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
-		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+		vcpu->arch.dbg_reg.dbcr0 = 0;
 		vcpu->guest_debug = 0;
 		kvm_guest_protect_msr(vcpu, MSR_DE, false);
 		return 0;
@@ -1856,15 +1856,13 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
 	kvm_guest_protect_msr(vcpu, MSR_DE, true);
 	vcpu->guest_debug = dbg->control;
-	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
-	/* Set DBCR0_EDM in guest visible DBCR0 register. */
-	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+	vcpu->arch.dbg_reg.dbcr0 = 0;
 
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+		vcpu->arch.dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
 
 	/* Code below handles only HW breakpoints */
-	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+	dbg_reg = &(vcpu->arch.dbg_reg);
 
 #ifdef CONFIG_KVM_BOOKE_HV
 	/*
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 4b9a07992753..92bc668d3e08 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -293,6 +293,8 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 		break;
 	case SPRN_DBCR0:
 		*spr_val = vcpu->arch.dbg_reg.dbcr0;
+		if (vcpu->guest_debug)
+			*spr_val = *spr_val | DBCR0_EDM;
 		break;
 	case SPRN_DBCR1:
 		*spr_val = vcpu->arch.dbg_reg.dbcr1;
-- 
cgit v1.2.3


From 2c5096720f3e325df697881cc2a5b3d810b57109 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Wed, 6 Aug 2014 12:08:56 +0530
Subject: KVM: PPC: BOOKE: Add one reg interface for DBSR

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/uapi/asm/kvm.h | 1 +
 arch/powerpc/kvm/booke.c            | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index e0e49dbb145d..3ca357a7a26a 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -557,6 +557,7 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_DABRX	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
 #define KVM_REG_PPC_WORT	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
 #define KVM_REG_PPC_SPRG9	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
+#define KVM_REG_PPC_DBSR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index e47de01b0dbb..074b7fc795b7 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1494,6 +1494,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	case KVM_REG_PPC_DAC2:
 		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
 		break;
+	case KVM_REG_PPC_DBSR:
+		val = get_reg_val(reg->id, vcpu->arch.dbsr);
+		break;
 	case KVM_REG_PPC_EPR: {
 		u32 epr = kvmppc_get_epr(vcpu);
 		val = get_reg_val(reg->id, epr);
@@ -1564,6 +1567,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	case KVM_REG_PPC_DAC2:
 		vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
 		break;
+	case KVM_REG_PPC_DBSR:
+		vcpu->arch.dbsr = set_reg_val(reg->id, val);
+		break;
 	case KVM_REG_PPC_EPR: {
 		u32 new_epr = set_reg_val(reg->id, val);
 		kvmppc_set_epr(vcpu, new_epr);
-- 
cgit v1.2.3


From 3477e71d53197d7f1b6a1f7416f3c42ffec5f221 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 20 Aug 2014 16:09:03 +0300
Subject: powerpc/booke: Restrict SPE exception handlers to e200/e500 cores

SPE exception handlers are now defined for 32-bit e500mc cores even though
SPE unit is not present and CONFIG_SPE is undefined.

Restrict SPE exception handlers to e200/e500 cores adding CONFIG_SPE_POSSIBLE
and consequently guard __stup_ivors and __setup_cpu functions.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kernel/cpu_setup_fsl_booke.S | 12 +++++++++++-
 arch/powerpc/kernel/cputable.c            |  5 +++++
 arch/powerpc/kernel/head_fsl_booke.S      | 18 +++++++++++++-----
 arch/powerpc/platforms/Kconfig.cputype    |  6 +++++-
 4 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 4f1393d20079..dddba3e94260 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -91,6 +91,7 @@ _GLOBAL(setup_altivec_idle)
 
 	blr
 
+#ifdef CONFIG_PPC_E500MC
 _GLOBAL(__setup_cpu_e6500)
 	mflr	r6
 #ifdef CONFIG_PPC64
@@ -107,14 +108,20 @@ _GLOBAL(__setup_cpu_e6500)
 	bl	__setup_cpu_e5500
 	mtlr	r6
 	blr
+#endif /* CONFIG_PPC_E500MC */
 
 #ifdef CONFIG_PPC32
+#ifdef CONFIG_E200
 _GLOBAL(__setup_cpu_e200)
 	/* enable dedicated debug exception handling resources (Debug APU) */
 	mfspr	r3,SPRN_HID0
 	ori	r3,r3,HID0_DAPUEN@l
 	mtspr	SPRN_HID0,r3
 	b	__setup_e200_ivors
+#endif /* CONFIG_E200 */
+
+#ifdef CONFIG_E500
+#ifndef CONFIG_PPC_E500MC
 _GLOBAL(__setup_cpu_e500v1)
 _GLOBAL(__setup_cpu_e500v2)
 	mflr	r4
@@ -129,6 +136,7 @@ _GLOBAL(__setup_cpu_e500v2)
 #endif
 	mtlr	r4
 	blr
+#else /* CONFIG_PPC_E500MC */
 _GLOBAL(__setup_cpu_e500mc)
 _GLOBAL(__setup_cpu_e5500)
 	mflr	r5
@@ -159,7 +167,9 @@ _GLOBAL(__setup_cpu_e5500)
 2:
 	mtlr	r5
 	blr
-#endif
+#endif /* CONFIG_PPC_E500MC */
+#endif /* CONFIG_E500 */
+#endif /* CONFIG_PPC32 */
 
 #ifdef CONFIG_PPC_BOOK3E_64
 _GLOBAL(__restore_cpu_e6500)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 9b6dcaaec1a3..808405906336 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1961,6 +1961,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 #endif /* CONFIG_PPC32 */
 #ifdef CONFIG_E500
 #ifdef CONFIG_PPC32
+#ifndef CONFIG_PPC_E500MC
 	{	/* e500 */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x80200000,
@@ -2000,6 +2001,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_e500,
 		.platform		= "ppc8548",
 	},
+#else
 	{	/* e500mc */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x80230000,
@@ -2018,7 +2020,9 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_e500mc,
 		.platform		= "ppce500mc",
 	},
+#endif /* CONFIG_PPC_E500MC */
 #endif /* CONFIG_PPC32 */
+#ifdef CONFIG_PPC_E500MC
 	{	/* e5500 */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x80240000,
@@ -2062,6 +2066,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_e500mc,
 		.platform		= "ppce6500",
 	},
+#endif /* CONFIG_PPC_E500MC */
 #ifdef CONFIG_PPC32
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index b497188a94a1..90f487f71707 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -613,6 +613,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 	mfspr	r10, SPRN_SPRG_RSCRATCH0
 	b	InstructionStorage
 
+/* Define SPE handlers for e200 and e500v2 */
 #ifdef CONFIG_SPE
 	/* SPE Unavailable */
 	START_EXCEPTION(SPEUnavailable)
@@ -622,10 +623,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 	b	fast_exception_return
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	EXC_XFER_EE_LITE(0x2010, KernelSPE)
-#else
+#elif defined(CONFIG_SPE_POSSIBLE)
 	EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \
 		  unknown_exception, EXC_XFER_EE)
-#endif /* CONFIG_SPE */
+#endif /* CONFIG_SPE_POSSIBLE */
 
 	/* SPE Floating Point Data */
 #ifdef CONFIG_SPE
@@ -635,12 +636,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 	/* SPE Floating Point Round */
 	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
 		  SPEFloatingPointRoundException, EXC_XFER_EE)
-#else
+#elif defined(CONFIG_SPE_POSSIBLE)
 	EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
 		  unknown_exception, EXC_XFER_EE)
 	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
 		  unknown_exception, EXC_XFER_EE)
-#endif /* CONFIG_SPE */
+#endif /* CONFIG_SPE_POSSIBLE */
+
 
 	/* Performance Monitor */
 	EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
@@ -947,6 +949,7 @@ get_phys_addr:
  * Global functions
  */
 
+#ifdef CONFIG_E200
 /* Adjust or setup IVORs for e200 */
 _GLOBAL(__setup_e200_ivors)
 	li	r3,DebugDebug@l
@@ -959,7 +962,10 @@ _GLOBAL(__setup_e200_ivors)
 	mtspr	SPRN_IVOR34,r3
 	sync
 	blr
+#endif
 
+#ifdef CONFIG_E500
+#ifndef CONFIG_PPC_E500MC
 /* Adjust or setup IVORs for e500v1/v2 */
 _GLOBAL(__setup_e500_ivors)
 	li	r3,DebugCrit@l
@@ -974,7 +980,7 @@ _GLOBAL(__setup_e500_ivors)
 	mtspr	SPRN_IVOR35,r3
 	sync
 	blr
-
+#else
 /* Adjust or setup IVORs for e500mc */
 _GLOBAL(__setup_e500mc_ivors)
 	li	r3,DebugDebug@l
@@ -1000,6 +1006,8 @@ _GLOBAL(__setup_ehv_ivors)
 	mtspr	SPRN_IVOR41,r3
 	sync
 	blr
+#endif /* CONFIG_PPC_E500MC */
+#endif /* CONFIG_E500 */
 
 #ifdef CONFIG_SPE
 /*
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e8bc40869cbd..7d9ee3d8c618 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -303,9 +303,13 @@ config PPC_ICSWX_USE_SIGILL
 
 	  If in doubt, say N here.
 
+config SPE_POSSIBLE
+	def_bool y
+	depends on E200 || (E500 && !PPC_E500MC)
+
 config SPE
 	bool "SPE Support"
-	depends on E200 || (E500 && !PPC_E500MC)
+	depends on SPE_POSSIBLE
 	default y
 	---help---
 	  This option enables kernel support for the Signal Processing
-- 
cgit v1.2.3


From 2b2695a8d85593ec0253f7fdbeea1e18f0f9e5e2 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 20 Aug 2014 16:09:04 +0300
Subject: powerpc/booke: Revert SPE/AltiVec common defines for interrupt
 numbers

Book3E specification defines shared interrupt numbers for SPE and AltiVec
units. Still SPE is present in e200/e500v2 cores while AltiVec is present in
e6500 core. So we can currently decide at compile-time which unit to support
exclusively. As Alexander Graf suggested, this will improve code readability
especially in KVM.

Use distinct defines to identify SPE/AltiVec interrupt numbers, reverting
c58ce397 and 6b310fc5 patches that added common defines.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kernel/exceptions-64e.S | 4 ++--
 arch/powerpc/kernel/head_fsl_booke.S | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index bb9cac6c8051..3e68d1c69718 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -635,7 +635,7 @@ interrupt_end_book3e:
 
 /* Altivec Unavailable Interrupt */
 	START_EXCEPTION(altivec_unavailable);
-	NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL,
+	NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL,
 				PROLOG_ADDITION_NONE)
 	/* we can probably do a shorter exception entry for that one... */
 	EXCEPTION_COMMON(0x200)
@@ -658,7 +658,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 /* AltiVec Assist */
 	START_EXCEPTION(altivec_assist);
 	NORMAL_EXCEPTION_PROLOG(0x220,
-				BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST,
+				BOOKE_INTERRUPT_ALTIVEC_ASSIST,
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x220)
 	INTS_DISABLE
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 90f487f71707..fffd1f96bb1d 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -617,27 +617,27 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 #ifdef CONFIG_SPE
 	/* SPE Unavailable */
 	START_EXCEPTION(SPEUnavailable)
-	NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL)
+	NORMAL_EXCEPTION_PROLOG(SPE_UNAVAIL)
 	beq	1f
 	bl	load_up_spe
 	b	fast_exception_return
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	EXC_XFER_EE_LITE(0x2010, KernelSPE)
 #elif defined(CONFIG_SPE_POSSIBLE)
-	EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \
+	EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
 		  unknown_exception, EXC_XFER_EE)
 #endif /* CONFIG_SPE_POSSIBLE */
 
 	/* SPE Floating Point Data */
 #ifdef CONFIG_SPE
-	EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
+	EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
 		  SPEFloatingPointException, EXC_XFER_EE)
 
 	/* SPE Floating Point Round */
 	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
 		  SPEFloatingPointRoundException, EXC_XFER_EE)
 #elif defined(CONFIG_SPE_POSSIBLE)
-	EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
+	EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
 		  unknown_exception, EXC_XFER_EE)
 	EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
 		  unknown_exception, EXC_XFER_EE)
-- 
cgit v1.2.3


From 06a29e427432b4baaacc3b816b57e90fe08561ff Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Tue, 19 Aug 2014 14:59:30 +1000
Subject: KVM: PPC: Book3S HV: Add register name when loading toc

Add 'r' to register name r2 in kvmppc_hv_enter.

Also update comment at the top of kvmppc_hv_enter to indicate that R2/TOC is
non-volatile.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f0c4db7704c3..edb2ccdbb2ba 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -355,6 +355,7 @@ kvmppc_hv_entry:
 	 * MSR = ~IR|DR
 	 * R13 = PACA
 	 * R1 = host R1
+	 * R2 = TOC
 	 * all other volatile GPRS = free
 	 */
 	mflr	r0
@@ -503,7 +504,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 toc_tlbie_lock:
 	.tc	native_tlbie_lock[TC],native_tlbie_lock
 	.previous
-	ld	r3,toc_tlbie_lock@toc(2)
+	ld	r3,toc_tlbie_lock@toc(r2)
 #ifdef __BIG_ENDIAN__
 	lwz	r8,PACA_LOCK_TOKEN(r13)
 #else
-- 
cgit v1.2.3


From 3efc7da61f6c5af78f67f03df8b0e1a473d8bc45 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 20 Aug 2014 16:36:22 +0300
Subject: KVM: PPC: Book3E: Increase FPU laziness

Increase FPU laziness by loading the guest state into the unit before entering
the guest instead of doing it on each vcpu schedule. Without this improvement
an interrupt may claim floating point corrupting guest state.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c  | 43 ++++++++++++++++++++++++++++++++++++-------
 arch/powerpc/kvm/booke.h  | 34 ----------------------------------
 arch/powerpc/kvm/e500mc.c |  2 --
 3 files changed, 36 insertions(+), 43 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 074b7fc795b7..91e7217db9d9 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -124,6 +124,40 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
 }
 #endif
 
+/*
+ * Load up guest vcpu FP state if it's needed.
+ * It also set the MSR_FP in thread so that host know
+ * we're holding FPU, and then host can help to save
+ * guest vcpu FP state if other threads require to use FPU.
+ * This simulates an FP unavailable fault.
+ *
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+	if (!(current->thread.regs->msr & MSR_FP)) {
+		enable_kernel_fp();
+		load_fp_state(&vcpu->arch.fp);
+		current->thread.fp_save_area = &vcpu->arch.fp;
+		current->thread.regs->msr |= MSR_FP;
+	}
+#endif
+}
+
+/*
+ * Save guest vcpu FP state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+	if (current->thread.regs->msr & MSR_FP)
+		giveup_fpu(current);
+	current->thread.fp_save_area = NULL;
+#endif
+}
+
 static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 {
 #if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
@@ -658,12 +692,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	/*
 	 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
-	 * as always using the FPU.  Kernel usage of FP (via
-	 * enable_kernel_fp()) in this thread must not occur while
-	 * vcpu->fpu_active is set.
+	 * as always using the FPU.
 	 */
-	vcpu->fpu_active = 1;
-
 	kvmppc_load_guest_fp(vcpu);
 #endif
 
@@ -687,8 +717,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 #ifdef CONFIG_PPC_FPU
 	kvmppc_save_guest_fp(vcpu);
-
-	vcpu->fpu_active = 0;
 #endif
 
 out:
@@ -1194,6 +1222,7 @@ out:
 		else {
 			/* interrupts now hard-disabled */
 			kvmppc_fix_ee_before_entry();
+			kvmppc_load_guest_fp(vcpu);
 		}
 	}
 
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index f753543c56fa..e73d513f72d0 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -116,40 +116,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
 extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
 					  ulong *spr_val);
 
-/*
- * Load up guest vcpu FP state if it's needed.
- * It also set the MSR_FP in thread so that host know
- * we're holding FPU, and then host can help to save
- * guest vcpu FP state if other threads require to use FPU.
- * This simulates an FP unavailable fault.
- *
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
-	if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
-		enable_kernel_fp();
-		load_fp_state(&vcpu->arch.fp);
-		current->thread.fp_save_area = &vcpu->arch.fp;
-		current->thread.regs->msr |= MSR_FP;
-	}
-#endif
-}
-
-/*
- * Save guest vcpu FP state into thread.
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
-	if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
-		giveup_fpu(current);
-	current->thread.fp_save_area = NULL;
-#endif
-}
-
 static inline void kvmppc_clear_dbsr(void)
 {
 	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 000cf8242e7d..454934990672 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -145,8 +145,6 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
 		kvmppc_e500_tlbil_all(vcpu_e500);
 		__get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu;
 	}
-
-	kvmppc_load_guest_fp(vcpu);
 }
 
 static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From 95d80a294b1eec83eb58c57e101b05828d97a851 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 20 Aug 2014 16:36:23 +0300
Subject: KVM: PPC: Book3e: Add AltiVec support

Add AltiVec support in KVM for Book3e. FPU support gracefully reuse host
infrastructure so follow the same approach for AltiVec.

Book3e specification defines shared interrupt numbers for SPE and AltiVec
units. Still SPE is present in e200/e500v2 cores while AltiVec is present in
e6500 core. So we can currently decide at compile-time which of the SPE or
AltiVec units to support exclusively by using CONFIG_SPE_POSSIBLE and
CONFIG_PPC_E500MC defines. As Alexander Graf suggested, keep SPE and AltiVec
exception handlers distinct to improve code readability.

Guests have the privilege to enable AltiVec, so we always need to support
AltiVec in KVM and implicitly in host to reflect interrupts and to save/restore
the unit context. KVM will be loaded on cores with AltiVec unit only if
CONFIG_ALTIVEC is defined. Use this define to guard KVM AltiVec logic.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c              | 74 ++++++++++++++++++++++++++++++++++-
 arch/powerpc/kvm/booke.h              |  6 +++
 arch/powerpc/kvm/bookehv_interrupts.S |  9 +----
 arch/powerpc/kvm/e500_emulate.c       | 20 ++++++++++
 4 files changed, 101 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 91e7217db9d9..8ace6120ef9b 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -168,6 +168,40 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+/*
+ * Simulate AltiVec unavailable fault to load guest state
+ * from thread to AltiVec unit.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+		if (!(current->thread.regs->msr & MSR_VEC)) {
+			enable_kernel_altivec();
+			load_vr_state(&vcpu->arch.vr);
+			current->thread.vr_save_area = &vcpu->arch.vr;
+			current->thread.regs->msr |= MSR_VEC;
+		}
+	}
+#endif
+}
+
+/*
+ * Save guest vcpu AltiVec state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+		if (current->thread.regs->msr & MSR_VEC)
+			giveup_altivec(current);
+		current->thread.vr_save_area = NULL;
+	}
+#endif
+}
+
 static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
 {
 	/* Synchronize guest's desire to get debug interrupts into shadow MSR */
@@ -375,9 +409,15 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	case BOOKE_IRQPRIO_ITLB_MISS:
 	case BOOKE_IRQPRIO_SYSCALL:
 	case BOOKE_IRQPRIO_FP_UNAVAIL:
+#ifdef CONFIG_SPE_POSSIBLE
 	case BOOKE_IRQPRIO_SPE_UNAVAIL:
 	case BOOKE_IRQPRIO_SPE_FP_DATA:
 	case BOOKE_IRQPRIO_SPE_FP_ROUND:
+#endif
+#ifdef CONFIG_ALTIVEC
+	case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL:
+	case BOOKE_IRQPRIO_ALTIVEC_ASSIST:
+#endif
 	case BOOKE_IRQPRIO_AP_UNAVAIL:
 		allowed = 1;
 		msr_mask = MSR_CE | MSR_ME | MSR_DE;
@@ -697,6 +737,17 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	kvmppc_load_guest_fp(vcpu);
 #endif
 
+#ifdef CONFIG_ALTIVEC
+	/* Save userspace AltiVec state in stack */
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		enable_kernel_altivec();
+	/*
+	 * Since we can't trap on MSR_VEC in GS-mode, we consider the guest
+	 * as always using the AltiVec.
+	 */
+	kvmppc_load_guest_altivec(vcpu);
+#endif
+
 	/* Switch to guest debug context */
 	debug = vcpu->arch.dbg_reg;
 	switch_booke_debug_regs(&debug);
@@ -719,6 +770,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	kvmppc_save_guest_fp(vcpu);
 #endif
 
+#ifdef CONFIG_ALTIVEC
+	kvmppc_save_guest_altivec(vcpu);
+#endif
+
 out:
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	return ret;
@@ -1025,7 +1080,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
 		r = RESUME_GUEST;
 		break;
-#else
+#elif defined(CONFIG_SPE_POSSIBLE)
 	case BOOKE_INTERRUPT_SPE_UNAVAIL:
 		/*
 		 * Guest wants SPE, but host kernel doesn't support it.  Send
@@ -1046,6 +1101,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		run->hw.hardware_exit_reason = exit_nr;
 		r = RESUME_HOST;
 		break;
+#endif /* CONFIG_SPE_POSSIBLE */
+
+/*
+ * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC,
+ * see kvmppc_core_check_processor_compat().
+ */
+#ifdef CONFIG_ALTIVEC
+	case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_ALTIVEC_ASSIST:
+		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST);
+		r = RESUME_GUEST;
+		break;
 #endif
 
 	case BOOKE_INTERRUPT_DATA_STORAGE:
@@ -1223,6 +1294,7 @@ out:
 			/* interrupts now hard-disabled */
 			kvmppc_fix_ee_before_entry();
 			kvmppc_load_guest_fp(vcpu);
+			kvmppc_load_guest_altivec(vcpu);
 		}
 	}
 
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index e73d513f72d0..22ba08ea68e9 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -32,9 +32,15 @@
 #define BOOKE_IRQPRIO_ALIGNMENT 2
 #define BOOKE_IRQPRIO_PROGRAM 3
 #define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#ifdef CONFIG_SPE_POSSIBLE
 #define BOOKE_IRQPRIO_SPE_UNAVAIL 5
 #define BOOKE_IRQPRIO_SPE_FP_DATA 6
 #define BOOKE_IRQPRIO_SPE_FP_ROUND 7
+#endif
+#ifdef CONFIG_PPC_E500MC
+#define BOOKE_IRQPRIO_ALTIVEC_UNAVAIL 5
+#define BOOKE_IRQPRIO_ALTIVEC_ASSIST 6
+#endif
 #define BOOKE_IRQPRIO_SYSCALL 8
 #define BOOKE_IRQPRIO_AP_UNAVAIL 9
 #define BOOKE_IRQPRIO_DTLB_MISS 10
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index e9fa56a911fd..c8e4da55cb43 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -256,11 +256,9 @@ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
 	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
 kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
 	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
 	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
 	SPRN_SRR0, SPRN_SRR1, 0
 kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
 	SPRN_SRR0, SPRN_SRR1, 0
@@ -361,9 +359,6 @@ kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
 kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
 	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
 kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0
 kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
 kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
 kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index c99c40e9182a..ce7291c79f6c 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -259,6 +259,7 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
 		break;
 
 	/* extra exceptions */
+#ifdef CONFIG_SPE_POSSIBLE
 	case SPRN_IVOR32:
 		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
 		break;
@@ -268,6 +269,15 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
 	case SPRN_IVOR34:
 		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
 		break;
+#endif
+#ifdef CONFIG_ALTIVEC
+	case SPRN_IVOR32:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL] = spr_val;
+		break;
+	case SPRN_IVOR33:
+		vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST] = spr_val;
+		break;
+#endif
 	case SPRN_IVOR35:
 		vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
 		break;
@@ -381,6 +391,7 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
 		break;
 
 	/* extra exceptions */
+#ifdef CONFIG_SPE_POSSIBLE
 	case SPRN_IVOR32:
 		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
 		break;
@@ -390,6 +401,15 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
 	case SPRN_IVOR34:
 		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
 		break;
+#endif
+#ifdef CONFIG_ALTIVEC
+	case SPRN_IVOR32:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL];
+		break;
+	case SPRN_IVOR33:
+		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST];
+		break;
+#endif
 	case SPRN_IVOR35:
 		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
 		break;
-- 
cgit v1.2.3


From 8a41ea53b32ffbe7524e3424cf0403fa3b4c73fb Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 20 Aug 2014 16:36:24 +0300
Subject: KVM: PPC: Make ONE_REG powerpc generic

Make ONE_REG generic for server and embedded architectures by moving
kvm_vcpu_ioctl_get_one_reg() and kvm_vcpu_ioctl_set_one_reg() functions
to powerpc layer.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s.c  | 121 +++++++++++++++++++--------------------------
 arch/powerpc/kvm/booke.c   |  91 +++++++++++++---------------------
 arch/powerpc/kvm/powerpc.c |  55 +++++++++++++++++++++
 3 files changed, 138 insertions(+), 129 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b299ba..26868e207ff0 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -535,33 +535,28 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return -ENOTSUPP;
 }
 
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
 {
-	int r;
-	union kvmppc_one_reg val;
-	int size;
+	int r = 0;
 	long int i;
 
-	size = one_reg_size(reg->id);
-	if (size > sizeof(val))
-		return -EINVAL;
-
-	r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
+	r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
 	if (r == -EINVAL) {
 		r = 0;
-		switch (reg->id) {
+		switch (id) {
 		case KVM_REG_PPC_DAR:
-			val = get_reg_val(reg->id, kvmppc_get_dar(vcpu));
+			*val = get_reg_val(id, kvmppc_get_dar(vcpu));
 			break;
 		case KVM_REG_PPC_DSISR:
-			val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu));
+			*val = get_reg_val(id, kvmppc_get_dsisr(vcpu));
 			break;
 		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
-			i = reg->id - KVM_REG_PPC_FPR0;
-			val = get_reg_val(reg->id, VCPU_FPR(vcpu, i));
+			i = id - KVM_REG_PPC_FPR0;
+			*val = get_reg_val(id, VCPU_FPR(vcpu, i));
 			break;
 		case KVM_REG_PPC_FPSCR:
-			val = get_reg_val(reg->id, vcpu->arch.fp.fpscr);
+			*val = get_reg_val(id, vcpu->arch.fp.fpscr);
 			break;
 #ifdef CONFIG_ALTIVEC
 		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
@@ -569,110 +564,94 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 				r = -ENXIO;
 				break;
 			}
-			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+			val->vval = vcpu->arch.vr.vr[id - KVM_REG_PPC_VR0];
 			break;
 		case KVM_REG_PPC_VSCR:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				break;
 			}
-			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+			*val = get_reg_val(id, vcpu->arch.vr.vscr.u[3]);
 			break;
 		case KVM_REG_PPC_VRSAVE:
-			val = get_reg_val(reg->id, vcpu->arch.vrsave);
+			*val = get_reg_val(id, vcpu->arch.vrsave);
 			break;
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
 		case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
 			if (cpu_has_feature(CPU_FTR_VSX)) {
-				long int i = reg->id - KVM_REG_PPC_VSR0;
-				val.vsxval[0] = vcpu->arch.fp.fpr[i][0];
-				val.vsxval[1] = vcpu->arch.fp.fpr[i][1];
+				i = id - KVM_REG_PPC_VSR0;
+				val->vsxval[0] = vcpu->arch.fp.fpr[i][0];
+				val->vsxval[1] = vcpu->arch.fp.fpr[i][1];
 			} else {
 				r = -ENXIO;
 			}
 			break;
 #endif /* CONFIG_VSX */
-		case KVM_REG_PPC_DEBUG_INST: {
-			u32 opcode = INS_TW;
-			r = copy_to_user((u32 __user *)(long)reg->addr,
-					 &opcode, sizeof(u32));
+		case KVM_REG_PPC_DEBUG_INST:
+			*val = get_reg_val(id, INS_TW);
 			break;
-		}
 #ifdef CONFIG_KVM_XICS
 		case KVM_REG_PPC_ICP_STATE:
 			if (!vcpu->arch.icp) {
 				r = -ENXIO;
 				break;
 			}
-			val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
+			*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
 			break;
 #endif /* CONFIG_KVM_XICS */
 		case KVM_REG_PPC_FSCR:
-			val = get_reg_val(reg->id, vcpu->arch.fscr);
+			*val = get_reg_val(id, vcpu->arch.fscr);
 			break;
 		case KVM_REG_PPC_TAR:
-			val = get_reg_val(reg->id, vcpu->arch.tar);
+			*val = get_reg_val(id, vcpu->arch.tar);
 			break;
 		case KVM_REG_PPC_EBBHR:
-			val = get_reg_val(reg->id, vcpu->arch.ebbhr);
+			*val = get_reg_val(id, vcpu->arch.ebbhr);
 			break;
 		case KVM_REG_PPC_EBBRR:
-			val = get_reg_val(reg->id, vcpu->arch.ebbrr);
+			*val = get_reg_val(id, vcpu->arch.ebbrr);
 			break;
 		case KVM_REG_PPC_BESCR:
-			val = get_reg_val(reg->id, vcpu->arch.bescr);
+			*val = get_reg_val(id, vcpu->arch.bescr);
 			break;
 		case KVM_REG_PPC_VTB:
-			val = get_reg_val(reg->id, vcpu->arch.vtb);
+			*val = get_reg_val(id, vcpu->arch.vtb);
 			break;
 		case KVM_REG_PPC_IC:
-			val = get_reg_val(reg->id, vcpu->arch.ic);
+			*val = get_reg_val(id, vcpu->arch.ic);
 			break;
 		default:
 			r = -EINVAL;
 			break;
 		}
 	}
-	if (r)
-		return r;
-
-	if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
-		r = -EFAULT;
 
 	return r;
 }
 
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
 {
-	int r;
-	union kvmppc_one_reg val;
-	int size;
+	int r = 0;
 	long int i;
 
-	size = one_reg_size(reg->id);
-	if (size > sizeof(val))
-		return -EINVAL;
-
-	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
-		return -EFAULT;
-
-	r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
+	r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
 	if (r == -EINVAL) {
 		r = 0;
-		switch (reg->id) {
+		switch (id) {
 		case KVM_REG_PPC_DAR:
-			kvmppc_set_dar(vcpu, set_reg_val(reg->id, val));
+			kvmppc_set_dar(vcpu, set_reg_val(id, *val));
 			break;
 		case KVM_REG_PPC_DSISR:
-			kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val));
+			kvmppc_set_dsisr(vcpu, set_reg_val(id, *val));
 			break;
 		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
-			i = reg->id - KVM_REG_PPC_FPR0;
-			VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val);
+			i = id - KVM_REG_PPC_FPR0;
+			VCPU_FPR(vcpu, i) = set_reg_val(id, *val);
 			break;
 		case KVM_REG_PPC_FPSCR:
-			vcpu->arch.fp.fpscr = set_reg_val(reg->id, val);
+			vcpu->arch.fp.fpscr = set_reg_val(id, *val);
 			break;
 #ifdef CONFIG_ALTIVEC
 		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
@@ -680,29 +659,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 				r = -ENXIO;
 				break;
 			}
-			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+			vcpu->arch.vr.vr[id - KVM_REG_PPC_VR0] = val->vval;
 			break;
 		case KVM_REG_PPC_VSCR:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				break;
 			}
-			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+			vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
 			break;
 		case KVM_REG_PPC_VRSAVE:
 			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
 				r = -ENXIO;
 				break;
 			}
-			vcpu->arch.vrsave = set_reg_val(reg->id, val);
+			vcpu->arch.vrsave = set_reg_val(id, *val);
 			break;
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
 		case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
 			if (cpu_has_feature(CPU_FTR_VSX)) {
-				long int i = reg->id - KVM_REG_PPC_VSR0;
-				vcpu->arch.fp.fpr[i][0] = val.vsxval[0];
-				vcpu->arch.fp.fpr[i][1] = val.vsxval[1];
+				i = id - KVM_REG_PPC_VSR0;
+				vcpu->arch.fp.fpr[i][0] = val->vsxval[0];
+				vcpu->arch.fp.fpr[i][1] = val->vsxval[1];
 			} else {
 				r = -ENXIO;
 			}
@@ -715,29 +694,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 				break;
 			}
 			r = kvmppc_xics_set_icp(vcpu,
-						set_reg_val(reg->id, val));
+						set_reg_val(id, *val));
 			break;
 #endif /* CONFIG_KVM_XICS */
 		case KVM_REG_PPC_FSCR:
-			vcpu->arch.fscr = set_reg_val(reg->id, val);
+			vcpu->arch.fscr = set_reg_val(id, *val);
 			break;
 		case KVM_REG_PPC_TAR:
-			vcpu->arch.tar = set_reg_val(reg->id, val);
+			vcpu->arch.tar = set_reg_val(id, *val);
 			break;
 		case KVM_REG_PPC_EBBHR:
-			vcpu->arch.ebbhr = set_reg_val(reg->id, val);
+			vcpu->arch.ebbhr = set_reg_val(id, *val);
 			break;
 		case KVM_REG_PPC_EBBRR:
-			vcpu->arch.ebbrr = set_reg_val(reg->id, val);
+			vcpu->arch.ebbrr = set_reg_val(id, *val);
 			break;
 		case KVM_REG_PPC_BESCR:
-			vcpu->arch.bescr = set_reg_val(reg->id, val);
+			vcpu->arch.bescr = set_reg_val(id, *val);
 			break;
 		case KVM_REG_PPC_VTB:
-			vcpu->arch.vtb = set_reg_val(reg->id, val);
+			vcpu->arch.vtb = set_reg_val(id, *val);
 			break;
 		case KVM_REG_PPC_IC:
-			vcpu->arch.ic = set_reg_val(reg->id, val);
+			vcpu->arch.ic = set_reg_val(id, *val);
 			break;
 		default:
 			r = -EINVAL;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8ace6120ef9b..831c1b433b09 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1564,150 +1564,125 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
 }
 
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
 {
 	int r = 0;
-	union kvmppc_one_reg val;
-	int size;
 
-	size = one_reg_size(reg->id);
-	if (size > sizeof(val))
-		return -EINVAL;
-
-	switch (reg->id) {
+	switch (id) {
 	case KVM_REG_PPC_IAC1:
-		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1);
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.iac1);
 		break;
 	case KVM_REG_PPC_IAC2:
-		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2);
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.iac2);
 		break;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 	case KVM_REG_PPC_IAC3:
-		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3);
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.iac3);
 		break;
 	case KVM_REG_PPC_IAC4:
-		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.iac4);
 		break;
 #endif
 	case KVM_REG_PPC_DAC1:
-		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1);
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.dac1);
 		break;
 	case KVM_REG_PPC_DAC2:
-		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
-		break;
-	case KVM_REG_PPC_DBSR:
-		val = get_reg_val(reg->id, vcpu->arch.dbsr);
+		*val = get_reg_val(id, vcpu->arch.dbg_reg.dac2);
 		break;
 	case KVM_REG_PPC_EPR: {
 		u32 epr = kvmppc_get_epr(vcpu);
-		val = get_reg_val(reg->id, epr);
+		*val = get_reg_val(id, epr);
 		break;
 	}
 #if defined(CONFIG_64BIT)
 	case KVM_REG_PPC_EPCR:
-		val = get_reg_val(reg->id, vcpu->arch.epcr);
+		*val = get_reg_val(id, vcpu->arch.epcr);
 		break;
 #endif
 	case KVM_REG_PPC_TCR:
-		val = get_reg_val(reg->id, vcpu->arch.tcr);
+		*val = get_reg_val(id, vcpu->arch.tcr);
 		break;
 	case KVM_REG_PPC_TSR:
-		val = get_reg_val(reg->id, vcpu->arch.tsr);
+		*val = get_reg_val(id, vcpu->arch.tsr);
 		break;
 	case KVM_REG_PPC_DEBUG_INST:
-		val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG);
+		*val = get_reg_val(id, KVMPPC_INST_EHPRIV_DEBUG);
 		break;
 	case KVM_REG_PPC_VRSAVE:
-		val = get_reg_val(reg->id, vcpu->arch.vrsave);
+		*val = get_reg_val(id, vcpu->arch.vrsave);
 		break;
 	default:
-		r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
+		r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
 		break;
 	}
 
-	if (r)
-		return r;
-
-	if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
-		r = -EFAULT;
-
 	return r;
 }
 
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+			union kvmppc_one_reg *val)
 {
 	int r = 0;
-	union kvmppc_one_reg val;
-	int size;
 
-	size = one_reg_size(reg->id);
-	if (size > sizeof(val))
-		return -EINVAL;
-
-	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
-		return -EFAULT;
-
-	switch (reg->id) {
+	switch (id) {
 	case KVM_REG_PPC_IAC1:
-		vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val);
+		vcpu->arch.dbg_reg.iac1 = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_IAC2:
-		vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val);
+		vcpu->arch.dbg_reg.iac2 = set_reg_val(id, *val);
 		break;
 #if CONFIG_PPC_ADV_DEBUG_IACS > 2
 	case KVM_REG_PPC_IAC3:
-		vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val);
+		vcpu->arch.dbg_reg.iac3 = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_IAC4:
-		vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);
+		vcpu->arch.dbg_reg.iac4 = set_reg_val(id, *val);
 		break;
 #endif
 	case KVM_REG_PPC_DAC1:
-		vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val);
+		vcpu->arch.dbg_reg.dac1 = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_DAC2:
-		vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
-		break;
-	case KVM_REG_PPC_DBSR:
-		vcpu->arch.dbsr = set_reg_val(reg->id, val);
+		vcpu->arch.dbg_reg.dac2 = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_EPR: {
-		u32 new_epr = set_reg_val(reg->id, val);
+		u32 new_epr = set_reg_val(id, *val);
 		kvmppc_set_epr(vcpu, new_epr);
 		break;
 	}
 #if defined(CONFIG_64BIT)
 	case KVM_REG_PPC_EPCR: {
-		u32 new_epcr = set_reg_val(reg->id, val);
+		u32 new_epcr = set_reg_val(id, *val);
 		kvmppc_set_epcr(vcpu, new_epcr);
 		break;
 	}
 #endif
 	case KVM_REG_PPC_OR_TSR: {
-		u32 tsr_bits = set_reg_val(reg->id, val);
+		u32 tsr_bits = set_reg_val(id, *val);
 		kvmppc_set_tsr_bits(vcpu, tsr_bits);
 		break;
 	}
 	case KVM_REG_PPC_CLEAR_TSR: {
-		u32 tsr_bits = set_reg_val(reg->id, val);
+		u32 tsr_bits = set_reg_val(id, *val);
 		kvmppc_clr_tsr_bits(vcpu, tsr_bits);
 		break;
 	}
 	case KVM_REG_PPC_TSR: {
-		u32 tsr = set_reg_val(reg->id, val);
+		u32 tsr = set_reg_val(id, *val);
 		kvmppc_set_tsr(vcpu, tsr);
 		break;
 	}
 	case KVM_REG_PPC_TCR: {
-		u32 tcr = set_reg_val(reg->id, val);
+		u32 tcr = set_reg_val(id, *val);
 		kvmppc_set_tcr(vcpu, tcr);
 		break;
 	}
 	case KVM_REG_PPC_VRSAVE:
-		vcpu->arch.vrsave = set_reg_val(reg->id, val);
+		vcpu->arch.vrsave = set_reg_val(id, *val);
 		break;
 	default:
-		r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
+		r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
 		break;
 	}
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index da505237a664..8a26126db482 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -907,6 +907,61 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(kvmppc_handle_store);
 
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	int r = 0;
+	union kvmppc_one_reg val;
+	int size;
+
+	size = one_reg_size(reg->id);
+	if (size > sizeof(val))
+		return -EINVAL;
+
+	r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+	if (r == -EINVAL) {
+		r = 0;
+		switch (reg->id) {
+		default:
+			r = -EINVAL;
+			break;
+		}
+	}
+
+	if (r)
+		return r;
+
+	if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+		r = -EFAULT;
+
+	return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	int r;
+	union kvmppc_one_reg val;
+	int size;
+
+	size = one_reg_size(reg->id);
+	if (size > sizeof(val))
+		return -EINVAL;
+
+	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+		return -EFAULT;
+
+	r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+	if (r == -EINVAL) {
+		r = 0;
+		switch (reg->id) {
+		default:
+			r = -EINVAL;
+			break;
+		}
+	}
+
+	return r;
+}
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	int r;
-- 
cgit v1.2.3


From 3840edc8033ad5b86deee309c1c321ca54257452 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 20 Aug 2014 16:36:25 +0300
Subject: KVM: PPC: Move ONE_REG AltiVec support to powerpc

Move ONE_REG AltiVec support to powerpc generic layer.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/uapi/asm/kvm.h |  5 +++++
 arch/powerpc/kvm/book3s.c           | 42 -------------------------------------
 arch/powerpc/kvm/powerpc.c          | 42 +++++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 42 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 3ca357a7a26a..ab4d4732c492 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -476,6 +476,11 @@ struct kvm_get_htab_header {
 
 /* FP and vector status/control registers */
 #define KVM_REG_PPC_FPSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+/*
+ * VSCR register is documented as a 32-bit register in the ISA, but it can
+ * only be accesses via a vector register. Expose VSCR as a 32-bit register
+ * even though the kernel represents it as a 128-bit vector.
+ */
 #define KVM_REG_PPC_VSCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
 
 /* Virtual processor areas */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 26868e207ff0..1b5adda2a224 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -558,25 +558,6 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
 		case KVM_REG_PPC_FPSCR:
 			*val = get_reg_val(id, vcpu->arch.fp.fpscr);
 			break;
-#ifdef CONFIG_ALTIVEC
-		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
-			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-				r = -ENXIO;
-				break;
-			}
-			val->vval = vcpu->arch.vr.vr[id - KVM_REG_PPC_VR0];
-			break;
-		case KVM_REG_PPC_VSCR:
-			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-				r = -ENXIO;
-				break;
-			}
-			*val = get_reg_val(id, vcpu->arch.vr.vscr.u[3]);
-			break;
-		case KVM_REG_PPC_VRSAVE:
-			*val = get_reg_val(id, vcpu->arch.vrsave);
-			break;
-#endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
 		case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
 			if (cpu_has_feature(CPU_FTR_VSX)) {
@@ -653,29 +634,6 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
 		case KVM_REG_PPC_FPSCR:
 			vcpu->arch.fp.fpscr = set_reg_val(id, *val);
 			break;
-#ifdef CONFIG_ALTIVEC
-		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
-			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-				r = -ENXIO;
-				break;
-			}
-			vcpu->arch.vr.vr[id - KVM_REG_PPC_VR0] = val->vval;
-			break;
-		case KVM_REG_PPC_VSCR:
-			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-				r = -ENXIO;
-				break;
-			}
-			vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
-			break;
-		case KVM_REG_PPC_VRSAVE:
-			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-				r = -ENXIO;
-				break;
-			}
-			vcpu->arch.vrsave = set_reg_val(id, *val);
-			break;
-#endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
 		case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
 			if (cpu_has_feature(CPU_FTR_VSX)) {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8a26126db482..0c7d1917c23d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -921,6 +921,29 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	if (r == -EINVAL) {
 		r = 0;
 		switch (reg->id) {
+#ifdef CONFIG_ALTIVEC
+		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+			break;
+		case KVM_REG_PPC_VSCR:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+			break;
+		case KVM_REG_PPC_VRSAVE:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vrsave = set_reg_val(reg->id, val);
+			break;
+#endif /* CONFIG_ALTIVEC */
 		default:
 			r = -EINVAL;
 			break;
@@ -953,6 +976,25 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	if (r == -EINVAL) {
 		r = 0;
 		switch (reg->id) {
+#ifdef CONFIG_ALTIVEC
+		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+			break;
+		case KVM_REG_PPC_VSCR:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+			break;
+		case KVM_REG_PPC_VRSAVE:
+			val = get_reg_val(reg->id, vcpu->arch.vrsave);
+			break;
+#endif /* CONFIG_ALTIVEC */
 		default:
 			r = -EINVAL;
 			break;
-- 
cgit v1.2.3


From 2f699a59f399d65d51df6eb916bf2e0f7c6f8148 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Wed, 13 Aug 2014 14:39:44 +0530
Subject: KVM: PPC: BOOKE: Emulate debug registers and exception

This patch emulates debug registers and debug exception
to support guest using debug resource. This enables running
gdb/kgdb etc in guest.

On BOOKE architecture we cannot share debug resources between QEMU and
guest because:
    When QEMU is using debug resources then debug exception must
    be always enabled. To achieve this we set MSR_DE and also set
    MSRP_DEP so guest cannot change MSR_DE.

    When emulating debug resource for guest we want guest
    to control MSR_DE (enable/disable debug interrupt on need).

    So above mentioned two configuration cannot be supported
    at the same time. So the result is that we cannot share
    debug resources between QEMU and Guest on BOOKE architecture.

In the current design QEMU gets priority over guest, this means that if
QEMU is using debug resources then guest cannot use them and if guest is
using debug resource then QEMU can overwrite them.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_ppc.h   |   3 +
 arch/powerpc/include/asm/reg_booke.h |   2 +
 arch/powerpc/kvm/booke.c             |  42 +++++++++-
 arch/powerpc/kvm/booke_emulate.c     | 148 +++++++++++++++++++++++++++++++++++
 4 files changed, 194 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index fb86a2299d8a..05e58b630601 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -206,6 +206,9 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
 extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
 extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
 
+void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu);
+void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu);
+
 union kvmppc_one_reg {
 	u32	wval;
 	u64	dval;
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 1d653308a33c..16547efa2d5a 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -319,6 +319,8 @@
  * DBSR bits which have conflicting definitions on true Book E versus IBM 40x.
  */
 #ifdef CONFIG_BOOKE
+#define DBSR_IDE	0x80000000	/* Imprecise Debug Event */
+#define DBSR_MRR	0x30000000	/* Most Recent Reset */
 #define DBSR_IC		0x08000000	/* Instruction Completion */
 #define DBSR_BT		0x04000000	/* Branch Taken */
 #define DBSR_IRPT	0x02000000	/* Exception Debug Event */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 831c1b433b09..b4e81e6be11a 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -335,6 +335,16 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
 	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
 }
 
+void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DEBUG);
+}
+
+void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu)
+{
+	clear_bit(BOOKE_IRQPRIO_DEBUG, &vcpu->arch.pending_exceptions);
+}
+
 static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 {
 	kvmppc_set_srr0(vcpu, srr0);
@@ -818,7 +828,32 @@ static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg);
 	u32 dbsr = vcpu->arch.dbsr;
 
-	/* Clear guest dbsr (vcpu->arch.dbsr) */
+	if (vcpu->guest_debug == 0) {
+		/*
+		 * Debug resources belong to Guest.
+		 * Imprecise debug event is not injected
+		 */
+		if (dbsr & DBSR_IDE) {
+			dbsr &= ~DBSR_IDE;
+			if (!dbsr)
+				return RESUME_GUEST;
+		}
+
+		if (dbsr && (vcpu->arch.shared->msr & MSR_DE) &&
+			    (vcpu->arch.dbg_reg.dbcr0 & DBCR0_IDM))
+			kvmppc_core_queue_debug(vcpu);
+
+		/* Inject a program interrupt if trap debug is not allowed */
+		if ((dbsr & DBSR_TIE) && !(vcpu->arch.shared->msr & MSR_DE))
+			kvmppc_core_queue_program(vcpu, ESR_PTR);
+
+		return RESUME_GUEST;
+	}
+
+	/*
+	 * Debug resource owned by userspace.
+	 * Clear guest dbsr (vcpu->arch.dbsr)
+	 */
 	vcpu->arch.dbsr = 0;
 	run->debug.arch.status = 0;
 	run->debug.arch.address = vcpu->arch.pc;
@@ -1350,6 +1385,11 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
 	setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
 		    (unsigned long)vcpu);
 
+	/*
+	 * Clear DBSR.MRR to avoid guest debug interrupt as
+	 * this is of host interest
+	 */
+	mtspr(SPRN_DBSR, DBSR_MRR);
 	return 0;
 }
 
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 92bc668d3e08..a82f64502de1 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -131,6 +131,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 {
 	int emulated = EMULATE_DONE;
+	bool debug_inst = false;
 
 	switch (sprn) {
 	case SPRN_DEAR:
@@ -145,14 +146,128 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	case SPRN_CSRR1:
 		vcpu->arch.csrr1 = spr_val;
 		break;
+	case SPRN_DSRR0:
+		vcpu->arch.dsrr0 = spr_val;
+		break;
+	case SPRN_DSRR1:
+		vcpu->arch.dsrr1 = spr_val;
+		break;
+	case SPRN_IAC1:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.iac1 = spr_val;
+		break;
+	case SPRN_IAC2:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.iac2 = spr_val;
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	case SPRN_IAC3:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.iac3 = spr_val;
+		break;
+	case SPRN_IAC4:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.iac4 = spr_val;
+		break;
+#endif
+	case SPRN_DAC1:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.dac1 = spr_val;
+		break;
+	case SPRN_DAC2:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.dac2 = spr_val;
+		break;
 	case SPRN_DBCR0:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		spr_val &= (DBCR0_IDM | DBCR0_IC | DBCR0_BT | DBCR0_TIE |
+			DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4  |
+			DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W);
+
 		vcpu->arch.dbg_reg.dbcr0 = spr_val;
 		break;
 	case SPRN_DBCR1:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
 		vcpu->arch.dbg_reg.dbcr1 = spr_val;
 		break;
+	case SPRN_DBCR2:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
+		debug_inst = true;
+		vcpu->arch.dbg_reg.dbcr2 = spr_val;
+		break;
 	case SPRN_DBSR:
+		/*
+		 * If userspace is debugging guest then guest
+		 * can not access debug registers.
+		 */
+		if (vcpu->guest_debug)
+			break;
+
 		vcpu->arch.dbsr &= ~spr_val;
+		if (!(vcpu->arch.dbsr & ~DBSR_IDE))
+			kvmppc_core_dequeue_debug(vcpu);
 		break;
 	case SPRN_TSR:
 		kvmppc_clr_tsr_bits(vcpu, spr_val);
@@ -265,6 +380,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		emulated = EMULATE_FAIL;
 	}
 
+	if (debug_inst) {
+		current->thread.debug = vcpu->arch.dbg_reg;
+		switch_booke_debug_regs(&vcpu->arch.dbg_reg);
+	}
 	return emulated;
 }
 
@@ -291,6 +410,32 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 	case SPRN_CSRR1:
 		*spr_val = vcpu->arch.csrr1;
 		break;
+	case SPRN_DSRR0:
+		*spr_val = vcpu->arch.dsrr0;
+		break;
+	case SPRN_DSRR1:
+		*spr_val = vcpu->arch.dsrr1;
+		break;
+	case SPRN_IAC1:
+		*spr_val = vcpu->arch.dbg_reg.iac1;
+		break;
+	case SPRN_IAC2:
+		*spr_val = vcpu->arch.dbg_reg.iac2;
+		break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	case SPRN_IAC3:
+		*spr_val = vcpu->arch.dbg_reg.iac3;
+		break;
+	case SPRN_IAC4:
+		*spr_val = vcpu->arch.dbg_reg.iac4;
+		break;
+#endif
+	case SPRN_DAC1:
+		*spr_val = vcpu->arch.dbg_reg.dac1;
+		break;
+	case SPRN_DAC2:
+		*spr_val = vcpu->arch.dbg_reg.dac2;
+		break;
 	case SPRN_DBCR0:
 		*spr_val = vcpu->arch.dbg_reg.dbcr0;
 		if (vcpu->guest_debug)
@@ -299,6 +444,9 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 	case SPRN_DBCR1:
 		*spr_val = vcpu->arch.dbg_reg.dbcr1;
 		break;
+	case SPRN_DBCR2:
+		*spr_val = vcpu->arch.dbg_reg.dbcr2;
+		break;
 	case SPRN_DBSR:
 		*spr_val = vcpu->arch.dbsr;
 		break;
-- 
cgit v1.2.3


From d02d4d156e72baf9a6628c76eb53019124d3c82f Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Mon, 1 Sep 2014 17:19:56 +0300
Subject: KVM: PPC: Remove the tasklet used by the hrtimer

Powerpc timer implementation is a copycat version of s390. Now that they removed
the tasklet with commit ea74c0ea1b24a6978a6ebc80ba4dbc7b7848b32d follow this
optimization.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h | 1 -
 arch/powerpc/include/asm/kvm_ppc.h  | 2 +-
 arch/powerpc/kvm/book3s.c           | 4 +---
 arch/powerpc/kvm/booke.c            | 4 +---
 arch/powerpc/kvm/powerpc.c          | 8 +-------
 5 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index bad3491cc32c..d2432401d301 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -606,7 +606,6 @@ struct kvm_vcpu_arch {
 	u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
 
 	struct hrtimer dec_timer;
-	struct tasklet_struct tasklet;
 	u64 dec_jiffies;
 	u64 dec_expires;
 	unsigned long pending_exceptions;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 05e58b630601..73063ef53694 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -89,7 +89,7 @@ extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
-extern void kvmppc_decrementer_func(unsigned long data);
+extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
 extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
 extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 1b5adda2a224..f23b6a553082 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -718,10 +718,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	return -EINVAL;
 }
 
-void kvmppc_decrementer_func(unsigned long data)
+void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
 {
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
-
 	kvmppc_core_queue_dec(vcpu);
 	kvm_vcpu_kick(vcpu);
 }
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b4e81e6be11a..97ec5b7fce9c 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1822,10 +1822,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
 	update_timer_ints(vcpu);
 }
 
-void kvmppc_decrementer_func(unsigned long data)
+void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
 {
-	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
-
 	if (vcpu->arch.tcr & TCR_ARE) {
 		vcpu->arch.dec = vcpu->arch.decar;
 		kvmppc_emulate_dec(vcpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0c7d1917c23d..ecf057551a2f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -638,7 +638,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	/* Make sure we're not using the vcpu anymore */
 	hrtimer_cancel(&vcpu->arch.dec_timer);
-	tasklet_kill(&vcpu->arch.tasklet);
 
 	kvmppc_remove_vcpu_debugfs(vcpu);
 
@@ -664,16 +663,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 	return kvmppc_core_pending_dec(vcpu);
 }
 
-/*
- * low level hrtimer wake routine. Because this runs in hardirq context
- * we schedule a tasklet to do the real work.
- */
 enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
 {
 	struct kvm_vcpu *vcpu;
 
 	vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
-	tasklet_schedule(&vcpu->arch.tasklet);
+	kvmppc_decrementer_func(vcpu);
 
 	return HRTIMER_NORESTART;
 }
@@ -683,7 +678,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	int ret;
 
 	hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
-	tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
 	vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
 	vcpu->arch.dec_expires = ~(u64)0;
 
-- 
cgit v1.2.3


From e9a94832f3d3ce10f5ae48dd119ccb50cfb0d04e Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Mon, 1 Sep 2014 13:17:43 +0300
Subject: KVM: PPC: Remove shared defines for SPE and AltiVec interrupts

We currently decide at compile-time which of the SPE or AltiVec units to
support exclusively. Guard kernel defines with CONFIG_SPE_POSSIBLE and
CONFIG_PPC_E500MC and remove shared defines.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_asm.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 465dfcb82c92..5bca220bbb60 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -53,17 +53,17 @@
 #define BOOKE_INTERRUPT_DEBUG 15
 
 /* E500 */
-#define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32
-#define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33
-/*
- * TODO: Unify 32-bit and 64-bit kernel exception handlers to use same defines
- */
-#define BOOKE_INTERRUPT_SPE_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
-#define BOOKE_INTERRUPT_SPE_FP_DATA BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
-#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
-#define BOOKE_INTERRUPT_ALTIVEC_ASSIST \
-				BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
+#ifdef CONFIG_SPE_POSSIBLE
+#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
+#define BOOKE_INTERRUPT_SPE_FP_DATA 33
 #define BOOKE_INTERRUPT_SPE_FP_ROUND 34
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32
+#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33
+#endif
+
 #define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
 #define BOOKE_INTERRUPT_DOORBELL 36
 #define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37
-- 
cgit v1.2.3


From b754c739ee8cd0101dc3f5a24e8c2ff76ee7eff9 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@au1.ibm.com>
Date: Tue, 2 Sep 2014 16:14:42 +1000
Subject: KVM: PPC: Book3S HV: Increase timeout for grabbing secondary threads

Occasional failures have been seen with split-core mode and migration
where the message "KVM: couldn't grab cpu" appears.  This increases
the length of time that we wait from 1ms to 10ms, which seems to
work around the issue.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 27cced9c7249..4526befd60c5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1489,7 +1489,7 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 static int kvmppc_grab_hwthread(int cpu)
 {
 	struct paca_struct *tpaca;
-	long timeout = 1000;
+	long timeout = 10000;
 
 	tpaca = &paca[cpu];
 
-- 
cgit v1.2.3


From 9333e6c4c15a4084dd5f4336cd4379afbf99e458 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 2 Sep 2014 16:14:43 +1000
Subject: KVM: PPC: Book3S HV: Only accept host PVR value for guest PVR

Since the guest can read the machine's PVR (Processor Version Register)
directly and see the real value, we should disallow userspace from
setting any value for the guest's PVR other than the real host value.
Therefore this makes kvm_arch_vcpu_set_sregs_hv() check the supplied
PVR value and return an error if it is different from the host value,
which has been put into vcpu->arch.pvr at vcpu creation time.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_hv.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4526befd60c5..529d10a7a36f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -856,7 +856,9 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
 {
 	int i, j;
 
-	kvmppc_set_pvr_hv(vcpu, sregs->pvr);
+	/* Only accept the same PVR as the host's, since we can't spoof it */
+	if (sregs->pvr != vcpu->arch.pvr)
+		return -EINVAL;
 
 	j = 0;
 	for (i = 0; i < vcpu->arch.slb_nr; i++) {
-- 
cgit v1.2.3


From 188e267ce249b491dfbb77d881996dcb5610dc90 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Mon, 1 Sep 2014 12:01:58 +0300
Subject: KVM: PPC: e500mc: Add support for single threaded vcpus on e6500 core

ePAPR represents hardware threads as cpu node properties in device tree.
So with existing QEMU, hardware threads are simply exposed as vcpus with
one hardware thread.

The e6500 core shares TLBs between hardware threads. Without tlb write
conditional instruction, the Linux kernel uses per core mechanisms to
protect against duplicate TLB entries.

The guest is unable to detect real siblings threads, so it can't use the
TLB protection mechanism. An alternative solution is to use the hypervisor
to allocate different lpids to guest's vcpus that runs simultaneous on real
siblings threads. On systems with two threads per core this patch halves
the size of the lpid pool that the allocator sees and use two lpids per VM.
Use even numbers to speedup vcpu lpid computation with consecutive lpids
per VM: vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: fix spelling]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_booke.h |  5 +++-
 arch/powerpc/kvm/e500.h              | 20 ++++++++++++++++
 arch/powerpc/kvm/e500_mmu_host.c     | 18 +++++++-------
 arch/powerpc/kvm/e500mc.c            | 46 ++++++++++++++++++++++++++----------
 4 files changed, 65 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index f7aa5cc395c4..630134d17f5d 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -23,7 +23,10 @@
 #include <linux/types.h>
 #include <linux/kvm_host.h>
 
-/* LPIDs we support with this build -- runtime limit may be lower */
+/*
+ * Number of available lpids. Only the low-order 6 bits of LPID rgister are
+ * implemented on e500mc+ cores.
+ */
 #define KVMPPC_NR_LPIDS                        64
 
 #define KVMPPC_INST_EHPRIV		0x7c00021c
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index a326178bdea5..72920bed3ac6 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -22,6 +22,7 @@
 #include <linux/kvm_host.h>
 #include <asm/mmu-book3e.h>
 #include <asm/tlb.h>
+#include <asm/cputhreads.h>
 
 enum vcpu_ftr {
 	VCPU_FTR_MMU_V2
@@ -289,6 +290,25 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
 #define kvmppc_e500_get_tlb_stid(vcpu, gtlbe)       get_tlb_tid(gtlbe)
 #define get_tlbmiss_tid(vcpu)           get_cur_pid(vcpu)
 #define get_tlb_sts(gtlbe)              (gtlbe->mas1 & MAS1_TS)
+
+/*
+ * These functions should be called with preemption disabled
+ * and the returned value is valid only in that context
+ */
+static inline int get_thread_specific_lpid(int vm_lpid)
+{
+	int vcpu_lpid = vm_lpid;
+
+	if (threads_per_core == 2)
+		vcpu_lpid |= smp_processor_id() & 1;
+
+	return vcpu_lpid;
+}
+
+static inline int get_lpid(struct kvm_vcpu *vcpu)
+{
+	return get_thread_specific_lpid(vcpu->kvm->arch.lpid);
+}
 #else
 unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
 				      struct kvm_book3e_206_tlb_entry *gtlbe);
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 08f14bb57897..c8795a64e935 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -69,7 +69,8 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
  * writing shadow tlb entry to host TLB
  */
 static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
-				     uint32_t mas0)
+				     uint32_t mas0,
+				     uint32_t lpid)
 {
 	unsigned long flags;
 
@@ -80,7 +81,7 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
 	mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
 	mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
 #ifdef CONFIG_KVM_BOOKE_HV
-	mtspr(SPRN_MAS8, stlbe->mas8);
+	mtspr(SPRN_MAS8, MAS8_TGS | get_thread_specific_lpid(lpid));
 #endif
 	asm volatile("isync; tlbwe" : : : "memory");
 
@@ -129,11 +130,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 	if (tlbsel == 0) {
 		mas0 = get_host_mas0(stlbe->mas2);
-		__write_host_tlbe(stlbe, mas0);
+		__write_host_tlbe(stlbe, mas0, vcpu_e500->vcpu.kvm->arch.lpid);
 	} else {
 		__write_host_tlbe(stlbe,
 				  MAS0_TLBSEL(1) |
-				  MAS0_ESEL(to_htlb1_esel(sesel)));
+				  MAS0_ESEL(to_htlb1_esel(sesel)),
+				  vcpu_e500->vcpu.kvm->arch.lpid);
 	}
 }
 
@@ -176,7 +178,7 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
 		       MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
 	magic.mas8 = 0;
 
-	__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
+	__write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index), 0);
 	preempt_enable();
 }
 #endif
@@ -317,10 +319,6 @@ static void kvmppc_e500_setup_stlbe(
 	stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
 	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
 			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
-
-#ifdef CONFIG_KVM_BOOKE_HV
-	stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
-#endif
 }
 
 static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -633,7 +631,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
 
 	local_irq_save(flags);
 	mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space);
-	mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid);
+	mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(vcpu));
 	asm volatile("tlbsx 0, %[geaddr]\n" : :
 		     [geaddr] "r" (geaddr));
 	mtspr(SPRN_MAS5, 0);
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 454934990672..bf8f99f6676a 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -48,10 +48,11 @@ void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
 		return;
 	}
 
-
-	tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id;
+	preempt_disable();
+	tag = PPC_DBELL_LPID(get_lpid(vcpu)) | vcpu->vcpu_id;
 	mb();
 	ppc_msgsnd(dbell_type, 0, tag);
+	preempt_enable();
 }
 
 /* gtlbe must not be mapped by more than one host tlb entry */
@@ -60,12 +61,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
 {
 	unsigned int tid, ts;
 	gva_t eaddr;
-	u32 val, lpid;
+	u32 val;
 	unsigned long flags;
 
 	ts = get_tlb_ts(gtlbe);
 	tid = get_tlb_tid(gtlbe);
-	lpid = vcpu_e500->vcpu.kvm->arch.lpid;
 
 	/* We search the host TLB to invalidate its shadow TLB entry */
 	val = (tid << 16) | ts;
@@ -74,7 +74,7 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
 	local_irq_save(flags);
 
 	mtspr(SPRN_MAS6, val);
-	mtspr(SPRN_MAS5, MAS5_SGS | lpid);
+	mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
 
 	asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
 	val = mfspr(SPRN_MAS1);
@@ -95,7 +95,7 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid);
+	mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
 	asm volatile("tlbilxlpid");
 	mtspr(SPRN_MAS5, 0);
 	local_irq_restore(flags);
@@ -110,6 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
 {
 }
 
+/* We use two lpids per VM */
 static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid);
 
 static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
@@ -118,10 +119,12 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
 
 	kvmppc_booke_vcpu_load(vcpu, cpu);
 
-	mtspr(SPRN_LPID, vcpu->kvm->arch.lpid);
+	mtspr(SPRN_LPID, get_lpid(vcpu));
 	mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
 	mtspr(SPRN_GPIR, vcpu->vcpu_id);
 	mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
+	vcpu->arch.eplc = EPC_EGS | (get_lpid(vcpu) << EPC_ELPID_SHIFT);
+	vcpu->arch.epsc = vcpu->arch.eplc;
 	mtspr(SPRN_EPLC, vcpu->arch.eplc);
 	mtspr(SPRN_EPSC, vcpu->arch.epsc);
 
@@ -141,9 +144,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
 	mtspr(SPRN_GESR, vcpu->arch.shared->esr);
 
 	if (vcpu->arch.oldpir != mfspr(SPRN_PIR) ||
-	    __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) {
+	    __get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] != vcpu) {
 		kvmppc_e500_tlbil_all(vcpu_e500);
-		__get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu;
+		__get_cpu_var(last_vcpu_of_lpid)[get_lpid(vcpu)] = vcpu;
 	}
 }
 
@@ -193,8 +196,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 	vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
 #endif
 	vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP;
-	vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
-	vcpu->arch.epsc = vcpu->arch.eplc;
 
 	vcpu->arch.pvr = mfspr(SPRN_PVR);
 	vcpu_e500->svr = mfspr(SPRN_SVR);
@@ -354,13 +355,26 @@ static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
 	if (lpid < 0)
 		return lpid;
 
+	/*
+	 * Use two lpids per VM on cores with two threads like e6500. Use
+	 * even numbers to speedup vcpu lpid computation with consecutive lpids
+	 * per VM. vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on.
+	 */
+	if (threads_per_core == 2)
+		lpid <<= 1;
+
 	kvm->arch.lpid = lpid;
 	return 0;
 }
 
 static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
 {
-	kvmppc_free_lpid(kvm->arch.lpid);
+	int lpid = kvm->arch.lpid;
+
+	if (threads_per_core == 2)
+		lpid >>= 1;
+
+	kvmppc_free_lpid(lpid);
 }
 
 static struct kvmppc_ops kvm_ops_e500mc = {
@@ -388,7 +402,13 @@ static int __init kvmppc_e500mc_init(void)
 	if (r)
 		goto err_out;
 
-	kvmppc_init_lpid(64);
+	/*
+	 * Use two lpids per VM on dual threaded processors like e6500
+	 * to workarround the lack of tlb write conditional instruction.
+	 * Expose half the number of available hardware lpids to the lpid
+	 * allocator.
+	 */
+	kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core);
 	kvmppc_claim_lpid(0); /* host */
 
 	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
-- 
cgit v1.2.3


From d2ca32a2d4f029b90e4b8f67879a9dfead0c85fa Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Mon, 1 Sep 2014 12:01:59 +0300
Subject: KVM: PPC: Book3E: Enable e6500 core

Now that AltiVec and hardware thread support is in place enable e6500 core.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/e500mc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index bf8f99f6676a..2fdc8722e324 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -180,6 +180,16 @@ int kvmppc_core_check_processor_compat(void)
 		r = 0;
 	else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
 		r = 0;
+#ifdef CONFIG_ALTIVEC
+	/*
+	 * Since guests have the priviledge to enable AltiVec, we need AltiVec
+	 * support in the host to save/restore their context.
+	 * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit
+	 * because it's cleared in the absence of CONFIG_ALTIVEC!
+	 */
+	else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
+		r = 0;
+#endif
 	else
 		r = -ENOTSUPP;
 
-- 
cgit v1.2.3


From a59c1d9e609c4bbad9ec3b238221ecf3b9ca091b Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Tue, 9 Sep 2014 22:37:35 +0530
Subject: powerpc/kvm: support to handle sw breakpoint

This patch adds kernel side support for software breakpoint.
Design is that, by using an illegal instruction, we trap to hypervisor
via Emulation Assistance interrupt, where we check for the illegal instruction
and accordingly we return to Host or Guest. Patch also adds support for
software breakpoint in PR KVM.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_ppc.h |  6 ++++++
 arch/powerpc/kvm/book3s.c          |  3 ++-
 arch/powerpc/kvm/book3s_hv.c       | 41 ++++++++++++++++++++++++++++++++++----
 arch/powerpc/kvm/book3s_pr.c       |  3 +++
 arch/powerpc/kvm/emulate.c         | 15 ++++++++++++++
 5 files changed, 63 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 73063ef53694..dbd160f16cb0 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -38,6 +38,12 @@
 #include <asm/paca.h>
 #endif
 
+/*
+ * KVMPPC_INST_SW_BREAKPOINT is debug Instruction
+ * for supporting software breakpoint.
+ */
+#define KVMPPC_INST_SW_BREAKPOINT	0x00dddd00
+
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with MMIO request */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index f23b6a553082..27d1b7041746 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -715,7 +715,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					struct kvm_guest_debug *dbg)
 {
-	return -EINVAL;
+	vcpu->guest_debug = dbg->control;
+	return 0;
 }
 
 void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 529d10a7a36f..e63587d30b70 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -725,6 +725,30 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
 	return kvmppc_hcall_impl_hv_realmode(cmd);
 }
 
+static int kvmppc_emulate_debug_inst(struct kvm_run *run,
+					struct kvm_vcpu *vcpu)
+{
+	u32 last_inst;
+
+	if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
+					EMULATE_DONE) {
+		/*
+		 * Fetch failed, so return to guest and
+		 * try executing it again.
+		 */
+		return RESUME_GUEST;
+	}
+
+	if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
+		run->exit_reason = KVM_EXIT_DEBUG;
+		run->debug.arch.address = kvmppc_get_pc(vcpu);
+		return RESUME_HOST;
+	} else {
+		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+		return RESUME_GUEST;
+	}
+}
+
 static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				 struct task_struct *tsk)
 {
@@ -807,12 +831,18 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 	/*
 	 * This occurs if the guest executes an illegal instruction.
-	 * We just generate a program interrupt to the guest, since
-	 * we don't emulate any guest instructions at this stage.
+	 * If the guest debug is disabled, generate a program interrupt
+	 * to the guest. If guest debug is enabled, we need to check
+	 * whether the instruction is a software breakpoint instruction.
+	 * Accordingly return to Guest or Host.
 	 */
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
-		kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-		r = RESUME_GUEST;
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+			r = kvmppc_emulate_debug_inst(run, vcpu);
+		} else {
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			r = RESUME_GUEST;
+		}
 		break;
 	/*
 	 * This occurs if the guest (kernel or userspace), does something that
@@ -924,6 +954,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	long int i;
 
 	switch (id) {
+	case KVM_REG_PPC_DEBUG_INST:
+		*val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+		break;
 	case KVM_REG_PPC_HIOR:
 		*val = get_reg_val(id, 0);
 		break;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index faffb27badd9..6d7370890775 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1319,6 +1319,9 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 	int r = 0;
 
 	switch (id) {
+	case KVM_REG_PPC_DEBUG_INST:
+		*val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+		break;
 	case KVM_REG_PPC_HIOR:
 		*val = get_reg_val(id, to_book3s(vcpu)->hior);
 		break;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index e96b50d0bdab..005222b580ea 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -274,6 +274,21 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		}
 		break;
 
+	case 0:
+		/*
+		 * Instruction with primary opcode 0. Based on PowerISA
+		 * these are illegal instructions.
+		 */
+		if (inst == KVMPPC_INST_SW_BREAKPOINT) {
+			run->exit_reason = KVM_EXIT_DEBUG;
+			run->debug.arch.address = kvmppc_get_pc(vcpu);
+			emulated = EMULATE_EXIT_USER;
+			advance = 0;
+		} else
+			emulated = EMULATE_FAIL;
+
+		break;
+
 	default:
 		emulated = EMULATE_FAIL;
 	}
-- 
cgit v1.2.3


From 033aaa14af0251285226a7dafc11c24a13959bca Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Tue, 9 Sep 2014 22:37:36 +0530
Subject: powerpc/kvm: common sw breakpoint instr across ppc

This patch extends the use of illegal instruction as software
breakpoint instruction across the ppc platform. Patch extends
booke program interrupt code to support software breakpoint.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
[agraf: Fix bookehv]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_booke.h  |  2 --
 arch/powerpc/kvm/booke.c              | 19 ++++++++++++++++++-
 arch/powerpc/kvm/bookehv_interrupts.S |  4 ++--
 3 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index 630134d17f5d..3286f0d6a86c 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -33,8 +33,6 @@
 #define EHPRIV_OC_SHIFT			11
 /* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */
 #define EHPRIV_OC_DEBUG			1
-#define KVMPPC_INST_EHPRIV_DEBUG	(KVMPPC_INST_EHPRIV | \
-					 (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT))
 
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 97ec5b7fce9c..ed5b0dd58726 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -994,6 +994,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOKE_INTERRUPT_HV_PRIV:
 		emulated = kvmppc_get_last_inst(vcpu, false, &last_inst);
 		break;
+	case BOOKE_INTERRUPT_PROGRAM:
+		/* SW breakpoints arrive as illegal instructions on HV */
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+			emulated = kvmppc_get_last_inst(vcpu, false, &last_inst);
+		break;
 	default:
 		break;
 	}
@@ -1071,6 +1076,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_PROGRAM:
+		if ((vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
+			(last_inst == KVMPPC_INST_SW_BREAKPOINT)) {
+			/*
+			 * We are here because of an SW breakpoint instr,
+			 * so lets return to host to handle.
+			 */
+			r = kvmppc_handle_debug(run, vcpu);
+			run->exit_reason = KVM_EXIT_DEBUG;
+			kvmppc_account_exit(vcpu, DEBUG_EXITS);
+			break;
+		}
+
 		if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
 			/*
 			 * Program traps generated by user-level software must
@@ -1647,7 +1664,7 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
 		*val = get_reg_val(id, vcpu->arch.tsr);
 		break;
 	case KVM_REG_PPC_DEBUG_INST:
-		*val = get_reg_val(id, KVMPPC_INST_EHPRIV_DEBUG);
+		*val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
 		break;
 	case KVM_REG_PPC_VRSAVE:
 		*val = get_reg_val(id, vcpu->arch.vrsave);
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index c8e4da55cb43..81bd8a07aa51 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -238,7 +238,7 @@ kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
 kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
 	SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
 kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1,NEED_ESR
+	SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
 kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
 	SPRN_SRR0, SPRN_SRR1, 0
 kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
@@ -348,7 +348,7 @@ kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
 kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
 kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
 	SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
-kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
 kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
 kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
 kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
-- 
cgit v1.2.3


From 8d0eff6385640a9e6eed0b0c09113794b2bb74e9 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 10 Sep 2014 14:37:29 +0200
Subject: KVM: PPC: Pass enum to kvmppc_get_last_inst

The kvmppc_get_last_inst function recently received a facelift that allowed
us to pass an enum of the type of instruction we want to read into it rather
than an unreadable boolean.

Unfortunately, not all callers ended up passing the enum. This wasn't really
an issue as "true" and "false" happen to match the two enum values we have,
but it's still hard to read.

Update all callers of kvmppc_get_last_inst() to follow the new calling
convention.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c             | 4 ++--
 arch/powerpc/kvm/emulate.c           | 2 +-
 arch/powerpc/kvm/emulate_loadstore.c | 2 +-
 arch/powerpc/kvm/powerpc.c           | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ed5b0dd58726..9b55dec2d6cc 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -992,12 +992,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOKE_INTERRUPT_DATA_STORAGE:
 	case BOOKE_INTERRUPT_DTLB_MISS:
 	case BOOKE_INTERRUPT_HV_PRIV:
-		emulated = kvmppc_get_last_inst(vcpu, false, &last_inst);
+		emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
 		break;
 	case BOOKE_INTERRUPT_PROGRAM:
 		/* SW breakpoints arrive as illegal instructions on HV */
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
-			emulated = kvmppc_get_last_inst(vcpu, false, &last_inst);
+			emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
 		break;
 	default:
 		break;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 005222b580ea..5cc2e7af3a7b 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -219,7 +219,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	/* this default type might be overwritten by subcategories */
 	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
 
-	emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+	emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
 	if (emulated != EMULATE_DONE)
 		return emulated;
 
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 0de4ffa175a9..6d3c0ee1d744 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -58,7 +58,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
 	/* this default type might be overwritten by subcategories */
 	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
 
-	emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+	emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
 	if (emulated != EMULATE_DONE)
 		return emulated;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ecf057551a2f..c1f8f53cd312 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -294,7 +294,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	{
 		u32 last_inst;
 
-		kvmppc_get_last_inst(vcpu, false, &last_inst);
+		kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
 		/* XXX Deliver Program interrupt to guest. */
 		pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst);
 		r = RESUME_HOST;
-- 
cgit v1.2.3


From dd598091de4aabbc8bd7290a04f364e443c03455 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Tue, 16 Sep 2014 15:10:03 +0300
Subject: KVM: x86: Warn if guest virtual address space is not 48-bits

The KVM emulator code assumes that the guest virtual address space (in 64-bit)
is 48-bits wide.  Fail the KVM_SET_CPUID and KVM_SET_CPUID2 ioctl if
userspace tries to create a guest that does not obey this restriction.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 21 ++++++++++++++-------
 arch/x86/kvm/cpuid.h |  2 +-
 2 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f4bad87ef256..e28d798f7277 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -53,14 +53,14 @@ u64 kvm_supported_xcr0(void)
 	return xcr0;
 }
 
-void kvm_update_cpuid(struct kvm_vcpu *vcpu)
+int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	best = kvm_find_cpuid_entry(vcpu, 1, 0);
 	if (!best)
-		return;
+		return 0;
 
 	/* Update OSXSAVE bit */
 	if (cpu_has_xsave && best->function == 0x1) {
@@ -88,7 +88,17 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
 			xstate_required_size(vcpu->arch.xcr0);
 	}
 
+	/*
+	 * The existing code assumes virtual address is 48-bit in the canonical
+	 * address checks; exit if it is ever changed.
+	 */
+	best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+	if (best && ((best->eax & 0xff00) >> 8) != 48 &&
+		((best->eax & 0xff00) >> 8) != 0)
+		return -EINVAL;
+
 	kvm_pmu_cpuid_update(vcpu);
+	return 0;
 }
 
 static int is_efer_nx(void)
@@ -151,10 +161,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 	}
 	vcpu->arch.cpuid_nent = cpuid->nent;
 	cpuid_fix_nx_cap(vcpu);
-	r = 0;
 	kvm_apic_set_version(vcpu);
 	kvm_x86_ops->cpuid_update(vcpu);
-	kvm_update_cpuid(vcpu);
+	r = kvm_update_cpuid(vcpu);
 
 out_free:
 	vfree(cpuid_entries);
@@ -178,9 +187,7 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
 	vcpu->arch.cpuid_nent = cpuid->nent;
 	kvm_apic_set_version(vcpu);
 	kvm_x86_ops->cpuid_update(vcpu);
-	kvm_update_cpuid(vcpu);
-	return 0;
-
+	r = kvm_update_cpuid(vcpu);
 out:
 	return r;
 }
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 43b33e301e68..4452eedfaedd 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -3,7 +3,7 @@
 
 #include "x86.h"
 
-void kvm_update_cpuid(struct kvm_vcpu *vcpu);
+int kvm_update_cpuid(struct kvm_vcpu *vcpu);
 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 					      u32 function, u32 index);
 int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
-- 
cgit v1.2.3


From 1f755a827538226fff38aad128c4a6836bd89b48 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 16 Sep 2014 13:37:40 +0200
Subject: kvm: Make init_rmode_tss() return 0 on success.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In init_rmode_tss(), there two variables indicating the return
value, r and ret, and it return 0 on error, 1 on success. The function
is only called by vmx_set_tss_addr(), and ret is redundant.

This patch removes the redundant variable, by making init_rmode_tss()
return 0 on success, -errno on failure.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6ffd643d1a64..e8f08e91592d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3930,7 +3930,7 @@ static int init_rmode_tss(struct kvm *kvm)
 {
 	gfn_t fn;
 	u16 data = 0;
-	int r, idx, ret = 0;
+	int idx, r;
 
 	idx = srcu_read_lock(&kvm->srcu);
 	fn = kvm->arch.tss_addr >> PAGE_SHIFT;
@@ -3952,13 +3952,9 @@ static int init_rmode_tss(struct kvm *kvm)
 	r = kvm_write_guest_page(kvm, fn, &data,
 				 RMODE_TSS_SIZE - 2 * PAGE_SIZE - 1,
 				 sizeof(u8));
-	if (r < 0)
-		goto out;
-
-	ret = 1;
 out:
 	srcu_read_unlock(&kvm->srcu, idx);
-	return ret;
+	return r;
 }
 
 static int init_rmode_identity_map(struct kvm *kvm)
@@ -4751,10 +4747,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 	if (ret)
 		return ret;
 	kvm->arch.tss_addr = addr;
-	if (!init_rmode_tss(kvm))
-		return  -ENOMEM;
-
-	return 0;
+	return init_rmode_tss(kvm);
 }
 
 static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
-- 
cgit v1.2.3


From a2b9e6c1a35afcc0973acb72e591c714e78885ff Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Wed, 17 Sep 2014 02:50:50 +0300
Subject: KVM: x86: Don't report guest userspace emulation error to userspace

Commit fc3a9157d314 ("KVM: X86: Don't report L2 emulation failures to
user-space") disabled the reporting of L2 (nested guest) emulation failures to
userspace due to race-condition between a vmexit and the instruction emulator.
The same rational applies also to userspace applications that are permitted by
the guest OS to access MMIO area or perform PIO.

This patch extends the current behavior - of injecting a #UD instead of
reporting it to userspace - also for guest userspace code.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2d7f65daa8d0..e46da50e2cec 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5000,7 +5000,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 
 	++vcpu->stat.insn_emulation_fail;
 	trace_kvm_emulate_insn_failed(vcpu);
-	if (!is_guest_mode(vcpu)) {
+	if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
 		vcpu->run->internal.ndata = 0;
-- 
cgit v1.2.3


From bc6134942dbbf31c25e9bd7c876be5da81c9e1ce Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 18 Sep 2014 18:24:57 -0300
Subject: KVM: nested VMX: disable perf cpuid reporting

Initilization of L2 guest with -cpu host, on L1 guest with -cpu host
triggers:

(qemu) KVM: entry failed, hardware error 0x7
...
nested_vmx_run: VMCS MSR_{LOAD,STORE} unsupported

Nested VMX MSR load/store support is not sufficient to
allow perf for L2 guest.

Until properly fixed, trap CPUID and disable function 0xA.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 6 ++++++
 arch/x86/kvm/vmx.c   | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e28d798f7277..976e3a57f9ea 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -774,6 +774,12 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
 	if (!best)
 		best = check_cpuid_limit(vcpu, function, index);
 
+	/*
+	 * Perfmon not yet supported for L2 guest.
+	 */
+	if (is_guest_mode(vcpu) && function == 0xa)
+		best = NULL;
+
 	if (best) {
 		*eax = best->eax;
 		*ebx = best->ebx;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e8f08e91592d..2ab504791190 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6986,6 +6986,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	case EXIT_REASON_TASK_SWITCH:
 		return 1;
 	case EXIT_REASON_CPUID:
+		if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
+			return 0;
 		return 1;
 	case EXIT_REASON_HLT:
 		return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
-- 
cgit v1.2.3


From a70656b63a82d639b0cec54861bf8faf16ad74e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 18 Sep 2014 12:38:36 -0400
Subject: KVM: x86: count actual tlb flushes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- we count KVM_REQ_TLB_FLUSH requests, not actual flushes
  (KVM can have multiple requests for one flush)
- flushes from kvm_flush_remote_tlbs aren't counted
- it's easy to make a direct request by mistake

Solve these by postponing the counting to kvm_check_request().

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 1 -
 arch/x86/kvm/x86.c | 8 +++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 76398fe15df2..90f0aceea4cc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3446,7 +3446,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 
 void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 {
-	++vcpu->stat.tlb_flush;
 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e46da50e2cec..65b97d5567fe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6019,6 +6019,12 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 	kvm_apic_update_tmr(vcpu, tmr);
 }
 
+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	++vcpu->stat.tlb_flush;
+	kvm_x86_ops->tlb_flush(vcpu);
+}
+
 /*
  * Returns 1 to let __vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
@@ -6048,7 +6054,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
 			kvm_mmu_sync_roots(vcpu);
 		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
-			kvm_x86_ops->tlb_flush(vcpu);
+			kvm_vcpu_flush_tlb(vcpu);
 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
 			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
 			r = 0;
-- 
cgit v1.2.3


From 77c3913b74212a86027d311f5e81625736816620 Mon Sep 17 00:00:00 2001
From: Liang Chen <liangchen.linux@gmail.com>
Date: Thu, 18 Sep 2014 12:38:37 -0400
Subject: KVM: x86: directly use kvm_make_request again

A one-line wrapper around kvm_make_request is not particularly
useful. Replace kvm_mmu_flush_tlb() with kvm_make_request().

Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/mmu.c              | 16 +++++-----------
 arch/x86/kvm/vmx.c              |  2 +-
 arch/x86/kvm/x86.c              |  2 +-
 4 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 028df8dc538e..eeeb573fcf6f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -914,7 +914,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 
 int fx_init(struct kvm_vcpu *vcpu);
 
-void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		       const u8 *new, int bytes);
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 90f0aceea4cc..045d592af1dd 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1743,7 +1743,7 @@ static int __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		return 1;
 	}
 
-	kvm_mmu_flush_tlb(vcpu);
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	return 0;
 }
 
@@ -1796,7 +1796,7 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu,  gfn_t gfn)
 
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
 	if (flush)
-		kvm_mmu_flush_tlb(vcpu);
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 }
 
 struct mmu_page_path {
@@ -2530,7 +2530,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	      true, host_writable)) {
 		if (write_fault)
 			*emulate = 1;
-		kvm_mmu_flush_tlb(vcpu);
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	}
 
 	if (unlikely(is_mmio_spte(*sptep) && emulate))
@@ -3444,12 +3444,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 	context->nx = false;
 }
 
-void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
-{
-	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_flush_tlb);
-
 void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
 {
 	mmu_free_roots(vcpu);
@@ -3964,7 +3958,7 @@ static void mmu_pte_write_flush_tlb(struct kvm_vcpu *vcpu, bool zap_page,
 	if (remote_flush)
 		kvm_flush_remote_tlbs(vcpu->kvm);
 	else if (local_flush)
-		kvm_mmu_flush_tlb(vcpu);
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 }
 
 static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
@@ -4225,7 +4219,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
 	vcpu->arch.mmu.invlpg(vcpu, gva);
-	kvm_mmu_flush_tlb(vcpu);
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	++vcpu->stat.invlpg;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2ab504791190..ec389a6dc82a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6711,7 +6711,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 	switch (type) {
 	case VMX_EPT_EXTENT_GLOBAL:
 		kvm_mmu_sync_roots(vcpu);
-		kvm_mmu_flush_tlb(vcpu);
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 		nested_vmx_succeed(vcpu);
 		break;
 	default:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 65b97d5567fe..47678d4e229a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -729,7 +729,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
 	if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
 		kvm_mmu_sync_roots(vcpu);
-		kvm_mmu_flush_tlb(vcpu);
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 040c8dc8a5afa7364bb8bb5b1b76c30007d6be14 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Thu, 18 Sep 2014 22:39:43 +0300
Subject: KVM: x86: emulating descriptor load misses long-mode case
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In 64-bit mode a #GP should be delivered to the guest "if the code segment
descriptor pointed to by the selector in the 64-bit gate doesn't have the L-bit
set and the D-bit clear." - Intel SDM "Interrupt 13â€”General Protection
Exception (#GP)".

This patch fixes the behavior of CS loading emulation code. Although the
comment says that segment loading is not supported in long mode, this function
is executed in long mode, so the fix is necassary.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 20d91873d831..a46207a05835 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1504,6 +1504,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 			if (rpl > cpl || dpl != cpl)
 				goto exception;
 		}
+		/* in long-mode d/b must be clear if l is set */
+		if (seg_desc.d && seg_desc.l) {
+			u64 efer = 0;
+
+			ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+			if (efer & EFER_LMA)
+				goto exception;
+		}
+
 		/* CS(RPL) <- CPL */
 		selector = (selector & 0xfffc) | cpl;
 		break;
-- 
cgit v1.2.3


From 4566654bb9be9e8864df417bb72ceee5136b6a6a Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Thu, 18 Sep 2014 22:39:44 +0300
Subject: KVM: vmx: Inject #GP on invalid PAT CR

Guest which sets the PAT CR to invalid value should get a #GP.  Currently, if
vmx supports loading PAT CR during entry, then the value is not checked.  This
patch makes the required check in that case.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 ++
 arch/x86/kvm/x86.c | 5 +++--
 arch/x86/kvm/x86.h | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ec389a6dc82a..06746c0a3c70 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2626,6 +2626,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_IA32_CR_PAT:
 		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
+			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+				return 1;
 			vmcs_write64(GUEST_IA32_PAT, data);
 			vcpu->arch.pat = data;
 			break;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 47678d4e229a..56129dc9f0ba 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1726,7 +1726,7 @@ static bool valid_mtrr_type(unsigned t)
 	return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
 }
 
-static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	int i;
 	u64 mask;
@@ -1769,12 +1769,13 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
 
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
 
-	if (!mtrr_valid(vcpu, msr, data))
+	if (!kvm_mtrr_valid(vcpu, msr, data))
 		return 1;
 
 	if (msr == MSR_MTRRdefType) {
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 985fb2c006fa..7cb9c45a5fe0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -159,6 +159,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 	gva_t addr, void *val, unsigned int bytes,
 	struct x86_exception *exception);
 
+bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+
 #define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
 				| XSTATE_BNDREGS | XSTATE_BNDCSR)
 extern u64 host_xcr0;
-- 
cgit v1.2.3


From b4619660635732bd2da376bb8f31f94d0f15fc98 Mon Sep 17 00:00:00 2001
From: Tiejun Chen <tiejun.chen@intel.com>
Date: Mon, 22 Sep 2014 10:31:38 +0800
Subject: kvm: x86: fix two typos in comment

s/drity/dirty and s/vmsc01/vmcs01

Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 arch/x86/kvm/vmx.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 045d592af1dd..bdd1acbab84d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1174,7 +1174,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
  * Write-protect on the specified @sptep, @pt_protect indicates whether
  * spte write-protection is caused by protecting shadow page table.
  *
- * Note: write protection is difference between drity logging and spte
+ * Note: write protection is difference between dirty logging and spte
  * protection:
  * - for dirty logging, the spte can be set to writable at anytime if
  *   its dirty bitmap is properly set.
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06746c0a3c70..bfe3022e60f1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8005,7 +8005,7 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
 /*
  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
- * with L0's requirements for its guest (a.k.a. vmsc01), so we can run the L2
+ * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
  * guest in a way that will both be appropriate to L1's requests, and our
  * needs. In addition to modifying the active vmcs (which is vmcs02), this
  * function also has additional necessary side-effects, like setting various
-- 
cgit v1.2.3


From d5262739cbb8a6414ba521a79bf0549717459a09 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Tue, 23 Sep 2014 10:01:57 +0300
Subject: KVM: x86: Remove debug assertion of non-PAE reserved bits

Commit 346874c9507a ("KVM: x86: Fix CR3 reserved bits") removed non-PAE
reserved bits which were not according to Intel SDM.  However, residue was left
in a debug assertion (CR3_NONPAE_RESERVED_BITS).  Remove it.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 0ab6c65a2821..806d58e3c320 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,8 +298,7 @@ retry_walk:
 	}
 #endif
 	walker->max_level = walker->level;
-	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
-	       (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
+	ASSERT(!is_long_mode(vcpu) && is_pae(vcpu));
 
 	accessed_dirty = PT_GUEST_ACCESSED_MASK;
 	pt_access = pte_access = ACC_ALL;
-- 
cgit v1.2.3


From 81760dccf8d1fe5b128b58736fe3f56a566133cb Mon Sep 17 00:00:00 2001
From: Chen Yucong <slaoub@gmail.com>
Date: Tue, 23 Sep 2014 10:44:35 +0800
Subject: kvm: x86: use macros to compute bank MSRs

Avoid open coded calculations for bank MSRs by using well-defined
macros that hide the index of higher bank MSRs.

No semantic changes.

Signed-off-by: Chen Yucong <slaoub@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 56129dc9f0ba..6152aa0da7dd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1826,7 +1826,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		break;
 	default:
 		if (msr >= MSR_IA32_MC0_CTL &&
-		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+		    msr < MSR_IA32_MCx_CTL(bank_num)) {
 			u32 offset = msr - MSR_IA32_MC0_CTL;
 			/* only 0 or all 1s can be written to IA32_MCi_CTL
 			 * some Linux kernels though clear bit 10 in bank 4 to
@@ -2185,7 +2185,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 	case MSR_IA32_MCG_CTL:
 	case MSR_IA32_MCG_STATUS:
-	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
 		return set_msr_mce(vcpu, msr, data);
 
 	/* Performance counters are not protected by a CPUID bit,
@@ -2351,7 +2351,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		break;
 	default:
 		if (msr >= MSR_IA32_MC0_CTL &&
-		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+		    msr < MSR_IA32_MCx_CTL(bank_num)) {
 			u32 offset = msr - MSR_IA32_MC0_CTL;
 			data = vcpu->arch.mce_banks[offset];
 			break;
@@ -2532,7 +2532,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_IA32_MCG_CAP:
 	case MSR_IA32_MCG_CTL:
 	case MSR_IA32_MCG_STATUS:
-	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
 		return get_msr_mce(vcpu, msr, pdata);
 	case MSR_K7_CLK_CTL:
 		/*
-- 
cgit v1.2.3


From c1118b3602c2329671ad5ec8bdf8e374323d6343 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 22 Sep 2014 13:17:48 +0200
Subject: x86: kvm: use alternatives for VMCALL vs. VMMCALL if kernel text is
 read-only

On x86_64, kernel text mappings are mapped read-only with CONFIG_DEBUG_RODATA.
In that case, KVM will fail to patch VMCALL instructions to VMMCALL
as required on AMD processors.

The failure mode is currently a divide-by-zero exception, which obviously
is a KVM bug that has to be fixed.  However, picking the right instruction
between VMCALL and VMMCALL will be faster and will help if you cannot upgrade
the hypervisor.

Reported-by: Chris Webb <chris@arachsys.com>
Tested-by: Chris Webb <chris@arachsys.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/cpufeature.h |  1 +
 arch/x86/include/asm/kvm_para.h   | 10 ++++++++--
 arch/x86/kernel/cpu/amd.c         |  7 +++++++
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index bb9b258d60e7..2075e6c34c78 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -202,6 +202,7 @@
 #define X86_FEATURE_DECODEASSISTS ( 8*32+12) /* AMD Decode Assists support */
 #define X86_FEATURE_PAUSEFILTER ( 8*32+13) /* AMD filtered pause intercept */
 #define X86_FEATURE_PFTHRESHOLD ( 8*32+14) /* AMD pause filter threshold */
+#define X86_FEATURE_VMMCALL     ( 8*32+15) /* Prefer vmmcall to vmcall */
 
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c7678e43465b..e62cf897f781 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_KVM_PARA_H
 
 #include <asm/processor.h>
+#include <asm/alternative.h>
 #include <uapi/asm/kvm_para.h>
 
 extern void kvmclock_init(void);
@@ -16,10 +17,15 @@ static inline bool kvm_check_and_clear_guest_paused(void)
 }
 #endif /* CONFIG_KVM_GUEST */
 
-/* This instruction is vmcall.  On non-VT architectures, it will generate a
- * trap that we will then rewrite to the appropriate instruction.
+#ifdef CONFIG_DEBUG_RODATA
+#define KVM_HYPERCALL \
+        ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL)
+#else
+/* On AMD processors, vmcall will generate a trap that we will
+ * then rewrite to the appropriate instruction.
  */
 #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
+#endif
 
 /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
  * instruction.  The hypervisor may replace it with something else but only the
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 60e5497681f5..813d29d00a17 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -525,6 +525,13 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	}
 #endif
 
+	/*
+	 * This is only needed to tell the kernel whether to use VMCALL
+	 * and VMMCALL.  VMMCALL is never executed except under virt, so
+	 * we can set it unconditionally.
+	 */
+	set_cpu_cap(c, X86_FEATURE_VMMCALL);
+
 	/* F16h erratum 793, CVE-2013-6885 */
 	if (c->x86 == 0x16 && c->x86_model <= 0xf)
 		msr_set_bit(MSR_AMD64_LS_CFG, 15);
-- 
cgit v1.2.3


From 8a9522d2fe6a1b643d3aef5ab7f097f73c601e7a Mon Sep 17 00:00:00 2001
From: Andres Lagar-Cavilla <andreslc@google.com>
Date: Tue, 23 Sep 2014 12:34:54 -0700
Subject: kvm/x86/mmu: Pass gfn and level to rmapp callback.

Callbacks don't have to do extra computation to learn what the caller
(lvm_handle_hva_range()) knows very well. Useful for
debugging/tracing/printk/future.

Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index bdd1acbab84d..47d534066325 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1262,7 +1262,8 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			   struct kvm_memory_slot *slot, unsigned long data)
+			   struct kvm_memory_slot *slot, gfn_t gfn, int level,
+			   unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1270,7 +1271,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 	while ((sptep = rmap_get_first(*rmapp, &iter))) {
 		BUG_ON(!(*sptep & PT_PRESENT_MASK));
-		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", sptep, *sptep);
+		rmap_printk("kvm_rmap_unmap_hva: spte %p %llx gfn %llx (%d)\n",
+			     sptep, *sptep, gfn, level);
 
 		drop_spte(kvm, sptep);
 		need_tlb_flush = 1;
@@ -1280,7 +1282,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			     struct kvm_memory_slot *slot, unsigned long data)
+			     struct kvm_memory_slot *slot, gfn_t gfn, int level,
+			     unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1294,7 +1297,8 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
 		BUG_ON(!is_shadow_present_pte(*sptep));
-		rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", sptep, *sptep);
+		rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
+			     sptep, *sptep, gfn, level);
 
 		need_flush = 1;
 
@@ -1328,6 +1332,8 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 				int (*handler)(struct kvm *kvm,
 					       unsigned long *rmapp,
 					       struct kvm_memory_slot *slot,
+					       gfn_t gfn,
+					       int level,
 					       unsigned long data))
 {
 	int j;
@@ -1357,6 +1363,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 		     j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
 			unsigned long idx, idx_end;
 			unsigned long *rmapp;
+			gfn_t gfn = gfn_start;
 
 			/*
 			 * {idx(page_j) | page_j intersects with
@@ -1367,8 +1374,10 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 
 			rmapp = __gfn_to_rmap(gfn_start, j, memslot);
 
-			for (; idx <= idx_end; ++idx)
-				ret |= handler(kvm, rmapp++, memslot, data);
+			for (; idx <= idx_end;
+			       ++idx, gfn += (1UL << KVM_HPAGE_GFN_SHIFT(j)))
+				ret |= handler(kvm, rmapp++, memslot,
+					       gfn, j, data);
 		}
 	}
 
@@ -1379,6 +1388,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  unsigned long data,
 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
 					 struct kvm_memory_slot *slot,
+					 gfn_t gfn, int level,
 					 unsigned long data))
 {
 	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
@@ -1400,7 +1410,8 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 }
 
 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			 struct kvm_memory_slot *slot, unsigned long data)
+			 struct kvm_memory_slot *slot, gfn_t gfn, int level,
+			 unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator uninitialized_var(iter);
@@ -1415,7 +1426,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	 * out actively used pages or breaking up actively used hugepages.
 	 */
 	if (!shadow_accessed_mask) {
-		young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+		young = kvm_unmap_rmapp(kvm, rmapp, slot, gfn, level, data);
 		goto out;
 	}
 
@@ -1430,13 +1441,13 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		}
 	}
 out:
-	/* @data has hva passed to kvm_age_hva(). */
-	trace_kvm_age_page(data, slot, young);
+	trace_kvm_age_page(gfn, level, slot, young);
 	return young;
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			      struct kvm_memory_slot *slot, unsigned long data)
+			      struct kvm_memory_slot *slot, gfn_t gfn,
+			      int level, unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1474,13 +1485,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
-	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
+	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
+	return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-- 
cgit v1.2.3


From 57128468080a8b6ea452223036d3e417f748af55 Mon Sep 17 00:00:00 2001
From: Andres Lagar-Cavilla <andreslc@google.com>
Date: Mon, 22 Sep 2014 14:54:42 -0700
Subject: kvm: Fix page ageing bugs

1. We were calling clear_flush_young_notify in unmap_one, but we are
within an mmu notifier invalidate range scope. The spte exists no more
(due to range_start) and the accessed bit info has already been
propagated (due to kvm_pfn_set_accessed). Simply call
clear_flush_young.

2. We clear_flush_young on a primary MMU PMD, but this may be mapped
as a collection of PTEs by the secondary MMU (e.g. during log-dirty).
This required expanding the interface of the clear_flush_young mmu
notifier, so a lot of code has been trivially touched.

3. In the absence of shadow_accessed_mask (e.g. EPT A bit), we emulate
the access bit by blowing the spte. This requires proper synchronizing
with MMU notifier consumers, like every other removal of spte's does.

Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  3 ++-
 arch/arm64/include/asm/kvm_host.h   |  3 ++-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/include/asm/kvm_ppc.h  |  2 +-
 arch/powerpc/kvm/book3s.c           |  4 ++--
 arch/powerpc/kvm/book3s.h           |  3 ++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  4 ++--
 arch/powerpc/kvm/book3s_pr.c        |  3 ++-
 arch/powerpc/kvm/e500_mmu_host.c    |  2 +-
 arch/x86/include/asm/kvm_host.h     |  2 +-
 arch/x86/kvm/mmu.c                  | 38 ++++++++++++++++++++++---------------
 11 files changed, 39 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 032a8538318a..8c3f7eb62b54 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -170,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+			      unsigned long end)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index be9970a59497..a3c671b3acc9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -180,7 +180,8 @@ int kvm_unmap_hva_range(struct kvm *kvm,
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+			      unsigned long end)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 604000882352..d329bc5543a2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -56,7 +56,7 @@
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
-extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index fb86a2299d8a..d4a92d7cea6a 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -243,7 +243,7 @@ struct kvmppc_ops {
 	int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
 	int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
 			   unsigned long end);
-	int (*age_hva)(struct kvm *kvm, unsigned long hva);
+	int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
 	int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
 	void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
 	void (*mmu_destroy)(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b299ba..c16cfbfeb781 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -851,9 +851,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 	return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	return kvm->arch.kvm_ops->age_hva(kvm, hva);
+	return kvm->arch.kvm_ops->age_hva(kvm, start, end);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 4bf956cf94d6..d2b3ec088b8c 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
 extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
 				  unsigned long end);
-extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
+			  unsigned long end);
 extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 72c20bb16d26..81460c5359c0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	return ret;
 }
 
-int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	if (!kvm->arch.using_mmu_notifiers)
 		return 0;
-	return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+	return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index faffb27badd9..852fcd8951c4 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
 	return 0;
 }
 
-static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start,
+			  unsigned long end)
 {
 	/* XXX could be more clever ;) */
 	return 0;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 08f14bb57897..b1f3f630315e 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -732,7 +732,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 	return 0;
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	/* XXX could be more clever ;) */
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index eeeb573fcf6f..763d273cab1d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1035,7 +1035,7 @@ asmlinkage void kvm_spurious_fault(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 47d534066325..3201e93ebd07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1417,18 +1417,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	struct rmap_iterator uninitialized_var(iter);
 	int young = 0;
 
-	/*
-	 * In case of absence of EPT Access and Dirty Bits supports,
-	 * emulate the accessed bit for EPT, by checking if this page has
-	 * an EPT mapping, and clearing it if it does. On the next access,
-	 * a new EPT mapping will be established.
-	 * This has some overhead, but not as much as the cost of swapping
-	 * out actively used pages or breaking up actively used hugepages.
-	 */
-	if (!shadow_accessed_mask) {
-		young = kvm_unmap_rmapp(kvm, rmapp, slot, gfn, level, data);
-		goto out;
-	}
+	BUG_ON(!shadow_accessed_mask);
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
 	     sptep = rmap_get_next(&iter)) {
@@ -1440,7 +1429,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 				 (unsigned long *)sptep);
 		}
 	}
-out:
 	trace_kvm_age_page(gfn, level, slot, young);
 	return young;
 }
@@ -1489,9 +1477,29 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
+	/*
+	 * In case of absence of EPT Access and Dirty Bits supports,
+	 * emulate the accessed bit for EPT, by checking if this page has
+	 * an EPT mapping, and clearing it if it does. On the next access,
+	 * a new EPT mapping will be established.
+	 * This has some overhead, but not as much as the cost of swapping
+	 * out actively used pages or breaking up actively used hugepages.
+	 */
+	if (!shadow_accessed_mask) {
+		/*
+		 * We are holding the kvm->mmu_lock, and we are blowing up
+		 * shadow PTEs. MMU notifier consumers need to be kept at bay.
+		 * This is correct as long as we don't decouple the mmu_lock
+		 * protected regions (like invalidate_range_start|end does).
+		 */
+		kvm->mmu_notifier_seq++;
+		return kvm_handle_hva_range(kvm, start, end, 0,
+					    kvm_unmap_rmapp);
+	}
+
+	return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-- 
cgit v1.2.3


From fe71557afbec641fee73711e40602bed37f6f33b Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 24 Sep 2014 15:57:57 +0800
Subject: kvm: Add arch specific mmu notifier for page invalidation

This will be used to let the guest run while the APIC access page is
not pinned.  Because subsequent patches will fill in the function
for x86, place the (still empty) x86 implementation in the x86.c file
instead of adding an inline function in kvm_host.h.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     | 5 +++++
 arch/arm64/include/asm/kvm_host.h   | 5 +++++
 arch/powerpc/include/asm/kvm_host.h | 5 +++++
 arch/x86/include/asm/kvm_host.h     | 2 ++
 arch/x86/kvm/x86.c                  | 5 +++++
 5 files changed, 22 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8c3f7eb62b54..155497c2b4da 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -181,6 +181,11 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
 	return 0;
 }
 
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+							 unsigned long address)
+{
+}
+
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a3c671b3acc9..992d9da88119 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -191,6 +191,11 @@ static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
 	return 0;
 }
 
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+							 unsigned long address)
+{
+}
+
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d329bc5543a2..2cf6c1587d43 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -60,6 +60,11 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
+static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+							 unsigned long address)
+{
+}
+
 #define HPTEG_CACHE_NUM			(1 << 15)
 #define HPTEG_HASH_BITS_PTE		13
 #define HPTEG_HASH_BITS_PTE_LONG	12
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 763d273cab1d..022c356e0fed 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1044,6 +1044,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+					   unsigned long address);
 
 void kvm_define_shared_msr(unsigned index, u32 msr);
 void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6152aa0da7dd..142569e6f8f9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6026,6 +6026,11 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->tlb_flush(vcpu);
 }
 
+void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
+					   unsigned long address)
+{
+}
+
 /*
  * Returns 1 to let __vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
-- 
cgit v1.2.3


From 4256f43f9fab91e1c17b5846a240cf4b66a768a8 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 24 Sep 2014 15:57:54 +0800
Subject: kvm: x86: Add request bit to reload APIC access page address

Currently, the APIC access page is pinned by KVM for the entire life
of the guest.  We want to make it migratable in order to make memory
hot-unplug available for machines that run KVM.

This patch prepares to handle this in generic code, through a new
request bit (that will be set by the MMU notifier) and a new hook
that is called whenever the request bit is processed.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 022c356e0fed..60f9d73c6282 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -736,6 +736,7 @@ struct kvm_x86_ops {
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
@@ -1044,6 +1045,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 					   unsigned long address);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 142569e6f8f9..c1412f5d93db 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6026,6 +6026,18 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->tlb_flush(vcpu);
 }
 
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+{
+	if (!kvm_x86_ops->set_apic_access_page_addr)
+		return;
+
+	vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm,
+			APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+	kvm_x86_ops->set_apic_access_page_addr(vcpu,
+			page_to_phys(vcpu->kvm->arch.apic_access_page));
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
+
 void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 					   unsigned long address)
 {
@@ -6091,6 +6103,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_deliver_pmi(vcpu);
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
+		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
+			kvm_vcpu_reload_apic_access_page(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
-- 
cgit v1.2.3


From 38b9917350cb2946e368ba684cfc33d1672f104e Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 24 Sep 2014 15:57:54 +0800
Subject: kvm: vmx: Implement set_apic_access_page_addr

Currently, the APIC access page is pinned by KVM for the entire life
of the guest.  We want to make it migratable in order to make memory
hot-unplug available for machines that run KVM.

This patch prepares to handle this for the case where there is no nested
virtualization, or where the nested guest does not have an APIC page of
its own.  All accesses to kvm->arch.apic_access_page are changed to go
through kvm_vcpu_reload_apic_access_page.

If the APIC access page is invalidated when the host is running, we update
the VMCS in the next guest entry.

If it is invalidated when the guest is running, the MMU notifier will force
an exit, after which we will handle everything as in the previous case.

If it is invalidated when a nested guest is running, the request will update
either the VMCS01 or the VMCS02.  Updating the VMCS01 is done at the
next L2->L1 exit, while updating the VMCS02 is done in prepare_vmcs02.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 47 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 41 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe3022e60f1..881d266eb3ae 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3134,9 +3134,17 @@ static __init int hardware_setup(void)
 	if (!cpu_has_vmx_unrestricted_guest())
 		enable_unrestricted_guest = 0;
 
-	if (!cpu_has_vmx_flexpriority())
+	if (!cpu_has_vmx_flexpriority()) {
 		flexpriority_enabled = 0;
 
+		/*
+		 * set_apic_access_page_addr() is used to reload apic access
+		 * page upon invalidation.  No need to do anything if the
+		 * processor does not have the APIC_ACCESS_ADDR VMCS field.
+		 */
+		kvm_x86_ops->set_apic_access_page_addr = NULL;
+	}
+
 	if (!cpu_has_vmx_tpr_shadow())
 		kvm_x86_ops->update_cr8_intercept = NULL;
 
@@ -4557,9 +4565,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 		vmcs_write32(TPR_THRESHOLD, 0);
 	}
 
-	if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
-		vmcs_write64(APIC_ACCESS_ADDR,
-			     page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
+	kvm_vcpu_reload_apic_access_page(vcpu);
 
 	if (vmx_vm_has_apicv(vcpu->kvm))
 		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
@@ -7198,6 +7204,29 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	vmx_set_msr_bitmap(vcpu);
 }
 
+static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	/*
+	 * Currently we do not handle the nested case where L2 has an
+	 * APIC access page of its own; that page is still pinned.
+	 * Hence, we skip the case where the VCPU is in guest mode _and_
+	 * L1 prepared an APIC access page for L2.
+	 *
+	 * For the case where L1 and L2 share the same APIC access page
+	 * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
+	 * in the vmcs12), this function will only update either the vmcs01
+	 * or the vmcs02.  If the former, the vmcs02 will be updated by
+	 * prepare_vmcs02.  If the latter, the vmcs01 will be updated in
+	 * the next L2->L1 exit.
+	 */
+	if (!is_guest_mode(vcpu) ||
+	    !nested_cpu_has2(vmx->nested.current_vmcs12,
+			     SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+		vmcs_write64(APIC_ACCESS_ADDR, hpa);
+}
+
 static void vmx_hwapic_isr_update(struct kvm *kvm, int isr)
 {
 	u16 status;
@@ -8140,8 +8169,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		} else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
 			exec_control |=
 				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-			vmcs_write64(APIC_ACCESS_ADDR,
-				page_to_phys(vcpu->kvm->arch.apic_access_page));
+			kvm_vcpu_reload_apic_access_page(vcpu);
 		}
 
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -8949,6 +8977,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 		vmx->nested.virtual_apic_page = NULL;
 	}
 
+	/*
+	 * We are now running in L2, mmu_notifier will force to reload the
+	 * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1.
+	 */
+	kvm_vcpu_reload_apic_access_page(vcpu);
+
 	/*
 	 * Exiting from L2 to L1, we're now back to L1 which thinks it just
 	 * finished a VMLAUNCH or VMRESUME instruction, so we need to set the
@@ -9074,6 +9108,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
 	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
 	.vm_has_apicv = vmx_vm_has_apicv,
 	.load_eoi_exitmap = vmx_load_eoi_exitmap,
 	.hwapic_irr_update = vmx_hwapic_irr_update,
-- 
cgit v1.2.3


From c24ae0dcd3e8695efa43e71704d1fc4bc7e29e9b Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 24 Sep 2014 15:57:58 +0800
Subject: kvm: x86: Unpin and remove kvm_arch->apic_access_page

In order to make the APIC access page migratable, stop pinning it in
memory.

And because the APIC access page is not pinned in memory, we can
remove kvm_arch->apic_access_page.  When we need to write its
physical address into vmcs, we use gfn_to_page() to get its page
struct, which is needed to call page_to_phys(); the page is then
immediately unpinned.

Suggested-by: Gleb Natapov <gleb@kernel.org>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/vmx.c              |  9 +++++++--
 arch/x86/kvm/x86.c              | 22 ++++++++++++++++------
 3 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 60f9d73c6282..7d603a71ab3a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -574,7 +574,7 @@ struct kvm_arch {
 	struct kvm_apic_map *apic_map;
 
 	unsigned int tss_addr;
-	struct page *apic_access_page;
+	bool apic_access_page_done;
 
 	gpa_t wall_clock;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 881d266eb3ae..04fa1b8298c8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4033,7 +4033,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	int r = 0;
 
 	mutex_lock(&kvm->slots_lock);
-	if (kvm->arch.apic_access_page)
+	if (kvm->arch.apic_access_page_done)
 		goto out;
 	kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
 	kvm_userspace_mem.flags = 0;
@@ -4049,7 +4049,12 @@ static int alloc_apic_access_page(struct kvm *kvm)
 		goto out;
 	}
 
-	kvm->arch.apic_access_page = page;
+	/*
+	 * Do not pin the page in memory, so that memory hot-unplug
+	 * is able to migrate it.
+	 */
+	put_page(page);
+	kvm->arch.apic_access_page_done = true;
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return r;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c1412f5d93db..6857257f3810 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6028,19 +6028,31 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
+	struct page *page = NULL;
+
 	if (!kvm_x86_ops->set_apic_access_page_addr)
 		return;
 
-	vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm,
-			APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
-	kvm_x86_ops->set_apic_access_page_addr(vcpu,
-			page_to_phys(vcpu->kvm->arch.apic_access_page));
+	page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+	kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
+
+	/*
+	 * Do not pin apic access page in memory, the MMU notifier
+	 * will call us again if it is migrated or swapped out.
+	 */
+	put_page(page);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
 
 void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 					   unsigned long address)
 {
+	/*
+	 * The physical address of apic access page is stored in the VMCS.
+	 * Update it when it becomes invalid.
+	 */
+	if (address == gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT))
+		kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
 }
 
 /*
@@ -7297,8 +7309,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
 	kvm_free_vcpus(kvm);
-	if (kvm->arch.apic_access_page)
-		put_page(kvm->arch.apic_access_page);
 	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
-- 
cgit v1.2.3


From dbff124e29fa24aff9705b354b5f4648cd96e0bb Mon Sep 17 00:00:00 2001
From: Joel Schopp <joel.schopp@amd.com>
Date: Wed, 9 Jul 2014 11:17:04 -0500
Subject: arm/arm64: KVM: Fix VTTBR_BADDR_MASK and pgd alloc

The current aarch64 calculation for VTTBR_BADDR_MASK masks only 39 bits
and not all the bits in the PA range. This is clearly a bug that
manifests itself on systems that allocate memory in the higher address
space range.

 [ Modified from Joel's original patch to be based on PHYS_MASK_SHIFT
   instead of a hard-coded value and to move the alignment check of the
   allocation to mmu.c.  Also added a comment explaining why we hardcode
   the IPA range and changed the stage-2 pgd allocation to be based on
   the 40 bit IPA range instead of the maximum possible 48 bit PA range.
   - Christoffer ]

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Joel Schopp <joel.schopp@amd.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c               |  4 ++--
 arch/arm64/include/asm/kvm_arm.h | 13 ++++++++++++-
 arch/arm64/include/asm/kvm_mmu.h |  5 ++---
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 40bc3df6d87b..779605122f32 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -410,9 +410,9 @@ static void update_vttbr(struct kvm *kvm)
 
 	/* update vttbr to be used with the new vmid */
 	pgd_phys = virt_to_phys(kvm->arch.pgd);
+	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
 	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
-	kvm->arch.vttbr = pgd_phys & VTTBR_BADDR_MASK;
-	kvm->arch.vttbr |= vmid;
+	kvm->arch.vttbr = pgd_phys | vmid;
 
 	spin_unlock(&kvm_vmid_lock);
 }
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cc83520459ed..7fd3e27e3ccc 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -122,6 +122,17 @@
 #define VTCR_EL2_T0SZ_MASK	0x3f
 #define VTCR_EL2_T0SZ_40B	24
 
+/*
+ * We configure the Stage-2 page tables to always restrict the IPA space to be
+ * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
+ * not known to exist and will break with this configuration.
+ *
+ * Note that when using 4K pages, we concatenate two first level page tables
+ * together.
+ *
+ * The magic numbers used for VTTBR_X in this patch can be found in Tables
+ * D4-23 and D4-25 in ARM DDI 0487A.b.
+ */
 #ifdef CONFIG_ARM64_64K_PAGES
 /*
  * Stage2 translation configuration:
@@ -149,7 +160,7 @@
 #endif
 
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_BADDR_MASK  (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
 #define VTTBR_VMID_SHIFT  (48LLU)
 #define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 737da742b293..a030d163840b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -59,10 +59,9 @@
 #define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
 
 /*
- * Align KVM with the kernel's view of physical memory. Should be
- * 40bit IPA, with PGD being 8kB aligned in the 4KB page configuration.
+ * We currently only support a 40bit IPA.
  */
-#define KVM_PHYS_SHIFT	PHYS_MASK_SHIFT
+#define KVM_PHYS_SHIFT	(40)
 #define KVM_PHYS_SIZE	(1UL << KVM_PHYS_SHIFT)
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1UL)
 
-- 
cgit v1.2.3


From 0496daa5cf99741ce8db82686b4c7446a37feabb Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 26 Sep 2014 12:29:34 +0200
Subject: arm/arm64: KVM: Report correct FSC for unsupported fault types

When we catch something that's not a permission fault or a translation
fault, we log the unsupported FSC in the kernel log, but we were masking
off the bottom bits of the FSC which was not very helpful.

Also correctly report the FSC for data and instruction faults rather
than telling people it was a DFCS, which doesn't exist in the ARM ARM.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_emulate.h   | 5 +++++
 arch/arm/kvm/mmu.c                   | 8 +++++---
 arch/arm64/include/asm/kvm_emulate.h | 5 +++++
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 69b746955fca..b9db269c6e61 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -148,6 +148,11 @@ static inline bool kvm_vcpu_trap_is_iabt(struct kvm_vcpu *vcpu)
 }
 
 static inline u8 kvm_vcpu_trap_get_fault(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & HSR_FSC_TYPE;
 }
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index bb06f76a8f89..eea03069161b 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -882,10 +882,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
 
 	/* Check the stage-2 fault is trans. fault or write fault */
-	fault_status = kvm_vcpu_trap_get_fault(vcpu);
+	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
 	if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
-		kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n",
-			kvm_vcpu_trap_get_class(vcpu), fault_status);
+		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
+			kvm_vcpu_trap_get_class(vcpu),
+			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+			(unsigned long)kvm_vcpu_get_hsr(vcpu));
 		return -EFAULT;
 	}
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index fdc3e21abd8d..5674a55b5518 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -173,6 +173,11 @@ static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu)
 }
 
 static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC;
+}
+
+static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
 {
 	return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE;
 }
-- 
cgit v1.2.3


From cec26bc3c125b5dd12a02f04133cd91eae3f1622 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 29 Sep 2014 13:32:38 +0530
Subject: KVM: PPC: BOOK3S: HV: CMA: Reserve cma region only in hypervisor mode

We use cma reserved area for creating guest hash page table.
Don't do the reservation in non-hypervisor mode. This avoids unnecessary
CMA reservation when booting with limited memory configs like
fadump and kdump.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index b9615ba5b083..4fdc27c80f4c 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -163,6 +163,12 @@ void __init kvm_cma_reserve(void)
 	unsigned long align_size;
 	struct memblock_region *reg;
 	phys_addr_t selected_size = 0;
+
+	/*
+	 * We need CMA reservation only when we are in HV mode
+	 */
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return;
 	/*
 	 * We cannot use memblock_phys_mem_size() here, because
 	 * memblock_analyze() has not been called yet.
-- 
cgit v1.2.3


From 7be81a46695d2088f848653f2bbec06bb178adce Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 19 Sep 2014 15:55:20 +0200
Subject: KVM: s390/facilities: allow TOD-CLOCK steering facility bit

There is nothing to do for KVM to support TOD-CLOCK steering.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 56a411c0245a..0d5aa88e238b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1786,7 +1786,7 @@ static int __init kvm_s390_init(void)
 		return -ENOMEM;
 	}
 	memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
-	vfacilities[0] &= 0xff82fff3f4fc2000UL;
+	vfacilities[0] &= 0xff82fffbf47c2000UL;
 	vfacilities[1] &= 0x005c000000000000UL;
 	return 0;
 }
-- 
cgit v1.2.3


From ce2e4f0b75a567d25375b52476662c724304e476 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Fri, 11 Jul 2014 10:00:43 +0200
Subject: KVM: s390: count vcpu wakeups in stat.halt_wakeup

This patch introduces the halt_wakeup counter used by common code and uses it to
count vcpu wakeups done in s390 arch specific code.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 1 +
 arch/s390/kvm/interrupt.c        | 1 +
 arch/s390/kvm/kvm-s390.c         | 1 +
 3 files changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1a6f6fd8bd34..2175f911a73a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -192,6 +192,7 @@ struct kvm_vcpu_stat {
 	u32 exit_stop_request;
 	u32 exit_validity;
 	u32 exit_instruction;
+	u32 halt_wakeup;
 	u32 instruction_lctl;
 	u32 instruction_lctlg;
 	u32 instruction_stctl;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4cad00adef93..a39838457f01 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -626,6 +626,7 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 		 */
 		vcpu->preempted = true;
 		wake_up_interruptible(&vcpu->wq);
+		vcpu->stat.halt_wakeup++;
 	}
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0d5aa88e238b..55aade49b6d1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
-- 
cgit v1.2.3


From f439ed27f8b8b90d243ae15acb193d37f96eebe0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 2 Oct 2014 13:53:24 +0200
Subject: kvm: do not handle APIC access page if in-kernel irqchip is not in
 use

This fixes the following OOPS:

   loaded kvm module (v3.17-rc1-168-gcec26bc)
   BUG: unable to handle kernel paging request at fffffffffffffffe
   IP: [<ffffffff81168449>] put_page+0x9/0x30
   PGD 1e15067 PUD 1e17067 PMD 0
   Oops: 0000 [#1] PREEMPT SMP
    [<ffffffffa063271d>] ? kvm_vcpu_reload_apic_access_page+0x5d/0x70 [kvm]
    [<ffffffffa013b6db>] vmx_vcpu_reset+0x21b/0x470 [kvm_intel]
    [<ffffffffa0658816>] ? kvm_pmu_reset+0x76/0xb0 [kvm]
    [<ffffffffa064032a>] kvm_vcpu_reset+0x15a/0x1b0 [kvm]
    [<ffffffffa06403ac>] kvm_arch_vcpu_setup+0x2c/0x50 [kvm]
    [<ffffffffa062e540>] kvm_vm_ioctl+0x200/0x780 [kvm]
    [<ffffffff81212170>] do_vfs_ioctl+0x2d0/0x4b0
    [<ffffffff8108bd99>] ? __mmdrop+0x69/0xb0
    [<ffffffff812123d1>] SyS_ioctl+0x81/0xa0
    [<ffffffff8112a6f6>] ? __audit_syscall_exit+0x1f6/0x2a0
    [<ffffffff817229e9>] system_call_fastpath+0x16/0x1b
   Code: c6 78 ce a3 81 4c 89 e7 e8 d9 80 ff ff 0f 0b 4c 89 e7 e8 8f f6 ff ff e9 fa fe ff ff 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 <48> f7 07 00 c0 00 00 55 48 89 e5 75 1e 8b 47 1c 85 c0 74 27 f0
   RIP  [<ffffffff81193045>] put_page+0x5/0x50

when not using the in-kernel irqchip ("-machine kernel_irqchip=off"
with QEMU).  The fix is to make the same check in
kvm_vcpu_reload_apic_access_page that we already have
in vmx.c's vm_need_virtualize_apic_accesses().

Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
Tested-by: Jan Kiszka <jan.kiszka@siemens.com>
Fixes: 4256f43f9fab91e1c17b5846a240cf4b66a768a8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6857257f3810..5430e4b0af29 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6030,6 +6030,9 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 {
 	struct page *page = NULL;
 
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return;
+
 	if (!kvm_x86_ops->set_apic_access_page_addr)
 		return;
 
-- 
cgit v1.2.3